use "do files - REPLICATION/Maindata", clear
*this do-file generates the three year averages.
*Recode restrictions
replace restrictions_all_racial_max = 6.5 - restrictions_all_racial_max
replace restrictions_dinkin_racial_max = 6 - restrictions_dinkin_racial_max
replace restrictions_binary = 1 - restrictions_binary
replace factor_pca_max = - factor_pca_max
gen restrictions_unweighted = (freeman>0) + (land_formalpatenting>0) + (particular_status>0) + (oath>0) + (property_income>0) + (householder>0) + (freeholding>0) + (min_freeholdingproperty>0) + (tax>0) + (felon>0) + (family_restriction>0) + (residency>0) + (good_person>0) + (religion>0) + (race >0)
replace restrictions_unweighted = 8 - restrictions_unweighted
replace residency=.25 if residency==.5 //just for one special case, 0.25 will then become .5 again in the alternative weighting scheme
gen restrictions_altweighted = sqrt(freeman) + sqrt(land_formalpatenting) + sqrt(particular_status) + sqrt(oath) + sqrt(property_income) + sqrt(householder) + sqrt(freeholding) + sqrt(min_freeholdingproperty) + sqrt(tax) + sqrt(felon) + sqrt(family_restriction) + sqrt(residency) + sqrt(good_person) + sqrt(religion) + sqrt(race)
replace restrictions_altweighted = 7 - restrictions_altweighted
gen south=lowersouth+uppersouth
gen northmiddle=north+middle
*Drop irrelevant observations, see article text
drop if drop_observations==1
*generate a variable that is the same over a 3 year period; this is in order to calculate 3 year averages
sort colony yearcleanup
by colony: egen idthird = seq(), block(3)
*Drop observations for Georgia (only 40 observations) and Maine (only 13 observations!)
drop if colony=="GA"
drop if colony=="ME"
*******************************
*GENERATE AVERAGES OF VARIABLES
********************************
*Generate and label average variables
*rename restrictions_all_racial_max
rename restrictions_all_racial_max restrictions_noavg
rename restrictions_unweighted restrictions_unweighted_noavg
rename restrictions_altweighted restrictions_altweighted_noavg
sort colony idthird
by colony idthird: egen restrictions_all_racial_max = mean(restrictions_noavg)
by colony idthird: egen restrictions_unweighted = mean(restrictions_unweighted)
by colony idthird: egen restrictions_altweighted = mean(restrictions_altweighted)
by colony idthird: egen perc_black_avg = mean(perc_black)
by colony idthird: egen wage_avg = mean(bldg_wages_new)
by colony idthird: egen perc_landless_avg = mean(perc_landless_new)
by colony idthird: egen urb_avg = mean(urbanization)
by colony idthird: egen dens_avg = mean(density)
*Rename proprietary and charter variables
rename proprietary proprietary_noavg
rename charter charter_noavg
rename restrictions_dinkin_racial_max restrictions_dinkin_noavg
rename slaveprice_carib slaveprice_carib_noavg
rename factor_pca_max factor_pca_max_noavg
rename restrictions_binary restrictions_binary_noavg
by colony idthird: egen proprietary = mean(proprietary_noavg)
by colony idthird: egen charter = mean(charter_noavg)
by colony idthird: egen rice_wheat_40_low_avg = mean(rice_wheat_40_low_noavg)
by colony idthird: egen restrictions_binary = mean(restrictions_binary_noavg)
by colony idthird: egen restrictions_dinkin_racial_max = mean(restrictions_dinkin_noavg)
by colony idthird: egen slaveprice_carib = mean(slaveprice_carib_noavg)
by colony idthird: egen factor_pca_max = mean(factor_pca_max_noavg)
*Drop the observations that are no longer needed, keep only the first year of the average
sort colony yearcleanup
duplicates drop idthird colony, force
*Generate lags, inequality variables for the new variables and label them
*sort colony yearcleanup
by colony: gen urb2 = urb_avg[_n-1]
by colony: gen density2 = dens_avg[_n-1]
by colony: gen proprietary2 = proprietary[_n-1]
by colony: gen charter2 = charter[_n-1]
*drop old index
drop rice_wheat2_40_low
by colony: gen rice_wheat2_40_low = rice_wheat_40_low_avg[_n-1]
by colony: gen wage2 = wage_avg[_n-1]
by colony: gen perc_landless2_new = perc_landless_avg[_n-1]
by colony: gen perc_landless3 = perc_landless_avg[_n-2]
by colony: gen restrictions2_racial_max = restrictions_all_racial_max[_n-1]
by colony: gen restrictions2_unweighted = restrictions_unweighted[_n-1]
by colony: gen restrictions2_altweighted = restrictions_altweighted[_n-1]
by colony: gen perc_black2 = perc_black_avg[_n-1]
by colony: gen restrictions2dinkin_racial_max = restrictions_dinkin_racial_max[_n-1]
by colony: gen factor2_pca_max = factor_pca_max[_n-1]
by colony: gen restrictions2_binary = restrictions_binary[_n-1]
cap gen year1700=(yearcleanup>=1700)
cap gen year1700_south=year1700*south
cap by colony: gen year17002_south = year1700_south[_n-1]
cap by colony: gen slaveprice2_carib = slaveprice_carib[_n-1]
gen restr1=restrictions_all_racial_max
replace restr1=. if restrictions_all_racial_max<=0
gen restr2=restrictions_all_racial_max
replace restr2=. if restrictions_all_racial_max>=5.5
gen north_trend=north*yearcleanup
gen middle_trend=middle*yearcleanup
global colonydum colony_d1 colony_d2 colony_d4 colony_d5 colony_d7 colony_d8 colony_d9 colony_d10 colony_d11 colony_d12 colony_d13 colony_d14
sort colony yearcleanup
gen colony_d1_trend=colony_d1*yearcleanup
gen colony_d2_trend=colony_d2*yearcleanup
gen colony_d4_trend=colony_d4*yearcleanup
gen colony_d5_trend=colony_d5*yearcleanup
gen colony_d7_trend=colony_d7*yearcleanup
gen colony_d8_trend=colony_d8*yearcleanup
gen colony_d9_trend=colony_d9*yearcleanup
gen colony_d10_trend=colony_d10*yearcleanup
gen colony_d11_trend=colony_d11*yearcleanup
gen colony_d12_trend=colony_d12*yearcleanup
gen colony_d13_trend=colony_d13*yearcleanup
global $colonydum_trend colony_d1_trend colony_d2_trend colony_d4_trend colony_d5_trend colony_d7_trend colony_d8_trend colony_d9_trend colony_d10_trend colony_d11_trend colony_d12_trend colony_d13_trend
//use compact var labels in the tables
label variable year17002_south "Labour (Dummy)\rowstyle{\bfseries\boldmath}"
label variable slaveprice2_carib "Labour (Slave prices)\rowstyle{\bfseries\boldmath}"
label variable restrictions2_racial_max "Lagged Suffrage"
label variable restrictions_all_racial_max "Suffrage"
label variable elf "Ethnic fractionalization"
label variable perc_black2 "Labour (\%black)\rowstyle{\bfseries\boldmath}" //the rowstyle makes sure that all that is printed in that row is printed in bold
label variable perc_landless2_new "Inequality (\%white l.less)"
label variable density2 "Population density"
label variable urb2 "Urbanisation"
gen suffrage_tmp =. //a helper variable that can be filled with different concepts, to let different variables be listed inthe same row of an output table
label variable suffrage_tmp "Lagged Suffrage"