//generation of DTA from the classification of the open ended questions. These DTA files are used for further merge with main data
clear all
//data after classification
import excel "E:\Dropbox\articles\eksperyment labowy z losami multi-multi\1. prosty wybór między multi-multi\data.xls", sheet("Classification (reviewer R)") cellrange(A4:BX330) firstrow clear


keep ID WhyThisNmbr_probs WhyThisNmbr_prz_size WhyThisNmbr_prz_nmbr WhyThisNmbr_superstit WhyThisNmbr_error WhyThisNmbr_risk safest_probs safest_prz_size safest_prz_nmbr safest_superstit safest_error safest_risk riskiest_probs riskiest_prz_size riskiest_prz_nmbr riskiest_superstit riskiest_error riskiest_risk edu_STEM edu_non_STEM gmbl_why_unknown gmbl_why_superstit gmbl_why_probs gmbl_why_risk gmbl_how_quickpick gmbl_how_superstit gmbl_how_ownchoice 
destring ID, replace force
foreach x of var * { 
	rename `x' c_`x' 
} 
rename c_ID id
saveold "R_classification_for_appending.dta", version(13)

clear all
//data after classification
import excel "E:\Dropbox\articles\eksperyment labowy z losami multi-multi\1. prosty wybór między multi-multi\data.xls", sheet("Classification (reviewer P)") cellrange(A4:BX330) firstrow clear


keep ID WhyThisNmbr_probs WhyThisNmbr_prz_size WhyThisNmbr_prz_nmbr WhyThisNmbr_superstit WhyThisNmbr_error WhyThisNmbr_risk safest_probs safest_prz_size safest_prz_nmbr safest_superstit safest_error safest_risk riskiest_probs riskiest_prz_size riskiest_prz_nmbr riskiest_superstit riskiest_error riskiest_risk edu_STEM edu_non_STEM gmbl_why_unknown gmbl_why_superstit gmbl_why_probs gmbl_why_risk gmbl_how_quickpick gmbl_how_superstit gmbl_how_ownchoice 
destring ID, replace force
//Let's add "P_" to the beginning of each variable
foreach x of var * { 
	rename `x' c_`x' 
} 
rename c_ID id
saveold "P_classification_for_appending.dta", version(13)
//end of generation of DTA files from the classification of the open ended questions.



//ACTUAL DATA ANALYSIS 

clear all
import excel "data.xls", sheet("Coded") cellrange(A2:AZ328) firstrow clear
//ssc install tabplot

replace treatment=0 if treatment==5 // renaming: from closest to reality to furthest


gen t_purchase=(treatment==0)

destring _all, replace
gen treat_name ="bla" 
replace treat_name = "aPurchT" if treatment==0
replace treat_name = "bChoiceT" if treatment==1
replace treat_name = "cChoiceProbT" if treatment==2
replace treat_name = "dLabProbT" if treatment==3
replace treat_name = "eHypoLabProbT" if treatment==4

label variable t_lottery "ChoiceT"
label variable t_prob "ChoiceProbT"
label variable t_lab "LabProbT"
label variable t_hypo "HypoLabProbT"
label variable t_purchase "PurchT"
label variable number_of_numbers "number of numbers"

gen session=.
replace session=1 if session_date=="14.08.2019"
replace session=2 if session_date=="16.09.2019"
replace session=3 if session_date=="03.10.2019"
replace session=4 if session_date=="24.10.2019"

//renaming
tab safest_option
tab riskiest_option

global vars "number_of_numbers safest_option riskiest_option"
foreach x of global vars {
replace `x'="10" if `x'=="A"
replace `x'="9" if `x'=="B"
replace `x'="8" if `x'=="C"
replace `x'="7" if `x'=="D"
replace `x'="6" if `x'=="E"
replace `x'="5" if `x'=="F"
replace `x'="4" if `x'=="G"
replace `x'="3" if `x'=="H"
replace `x'="2" if `x'=="I"
replace `x'="1" if `x'=="J"
destring `x', replace force
}
replace safest_option=. if safest_option>10
replace riskiest_option=. if riskiest_option>10

//ChoiceT has max 3 errors, other treatments may have 4 errors max
tab errors // 0 or 1 error- 91%

tab number_of_numbers example_version, chi2  //example version makes no difference

//recode Part 1: BDM -> WTP

replace BDM_first_row_Y=BDM_last+1 if BDM_fir==. 
gen BDM_switch=(BDM_f+BDM_l)/2
gen WTP=BDM_switch*3/4 // (/4) because we're converting to euros
gen WTP_weird=0
replace WTP_weird=1 if BDM_first_!=BDM_last+1

tab treat_name WTP_w

//recode Part 2: HL

replace HL_first_row_B=HL_last+1 if HL_fir==.
gen HL_switch=(HL_last_row_A+HL_first_row_B)/2
gen HL_diff=HL_first-HL_last
tab HL_diff
replace HL_switch=. if HL_first_row_<HL_las-2

gen RRA=HL_switch
recode RRA (0.5 = -0.95) (1.5 = -0.95) (2.5 = -0.72) (3.5 = -0.32) ///
			(4.5 = 0) (5.5 = 0.28) (6.5 = 0.545) (7.5 = 0.825) ///
			(8.5 = 1.17) (9.5 = 1.37) (10.5 = 1.37)

	//dealing with missing obs in HL
gen missing_RRA=0
replace missing_RRA=1 if RRA==.
sum RRA
replace RRA=r(mean) if RRA==.
			
// Part 3: questionaire
gen extraversion=personality1-personality6
gen agreeableness=personality2-personality7
gen conscientiousness=personality3-personality8
gen emotional_stability=personality4-personality9
gen openness_to_experience=personality5-personality10


	//dealing with missing obs in dohmen
global dohm_domains "general car finance sport work health people"
foreach x of global dohm_domains {
sum dohmen_`x' 
gen missing_dohmen_`x'=0
replace missing_dohmen_`x'=1 if dohmen_`x'==.
replace dohmen_`x'=r(mean) if dohmen_`x'==.
}

	// replacing to have bigger values for active gamblers 
replace gambling_habits_MM=5 if gambling_habits_MM==6 // 5 and 6 mean that they never played it, so I'm merging them here
	
replace gambling_habits_MM=6-gambling_habits_MM
replace gambling_habits_overall=6-gambling_habits_overall


	
global big_five "extraversion agreeableness conscientiousness emotional_stability openness_to_experience"
global demo "male age  education income  gambling_habits_overall gambling_habits_of_o supersti  relig"
//global demo "male age  education income  frequent_gamblers gambling_habits_of_o supersti  relig" // :gambling_habits_overall changed to frequent_gamblers for when we check the robustness in the WTP regressions and cannot include both of them at the same time
global other_quest "dohmen* missing_d* RRA missing_RRA $big_five" 

tab age


pwcorr dohmen*  RRA , sig // RRA correlated only with dohmen_general

//if one doesn't like risk, does one bet on lower numbers? when risk-seeking, bets on higher?
egen mediansplit_dohmen_general=median(dohmen_general)
gen dohmen_below_median = dohmen_general < mediansplit_dohmen_general
gen dohmen_above_median = dohmen_general > mediansplit_dohmen_general
// scatter safest_option number_of_numbers, jitter(3) by(dohmen_ab) // not much to see
//same but with RRA
egen mediansplit_RRA=median(RRA)
gen RRA_below_median = RRA < mediansplit_RRA
gen RRA_above_median = RRA > mediansplit_RRA
// scatter safest_option number_of_numbers, jitter(3) by(RRA_ab) // not much to see

// Appendix A1
sum $demo
sum $other_quest
sum $vars
tab number_of_numbers treat_name, col

bysort treat_n: sum number_of_numbers if WTP>15/4  // (/4) converts to euros

table session treat_name, contents(mean number_of_numbers sd number_of_numbers count number_of_numbers ) // git
anova number_of_numbers session treatment // git / session: F=0.50, p=0.69; tretment F=12.71, p=0.00
ranksum number_of_numbers, by(example_version_A) // does example version make a difference? NO: z=-0.65, p=0.52
// treatment effects (Table 2) 
bysort treat_name: sum number_of_numbers, detail
// hist number_of_numbers, by(treatment)

/// tests: (Table 3)

//preserve
// tab errors
// drop if errors>1 // (for Appendix A3(A))
// tab errors
// tab WTP
// drop if WTP<19/4 // (for Appendix A3(B))
// tab WTP

kwallis number_of_numbers, by(treat_name) // Chi2= 41.81 with 4 d.f., p=0.0001

// net describe conovertest, from(http://alexisdinno.com/stata)
// conovertest number_of_numbers, by(treat_name) // Appendix A2
// ssc install dunntest

// also mann-whitney and ksmirnov tests for each pair of treatments:
ranksum number_of_numbers if treatment<2, by(treat_name) // purchase makes no diff / z=0.43, p=0.67
ranksum number_of_numbers if treatment>0 & treatment<3, by(treat_name) // probs make a diff / z=3.91, p=0.0001
ranksum number_of_numbers if treatment>1 & treatment<4, by(treat_name) //  z=1.18, p=0.24
ranksum number_of_numbers if treatment>2, by(treat_name) // z=-1.14, p=0.25

gen no_prob	=treatment<2 // treats. without probabilities provided
gen prob_provided=(treatment>1) //treatments in which probabilities are provided (prob, lab, hypo)
gen lab_alike=(treatment>2) // treatments which are displayed as in the lab experiments (lab, hypo)

ranksum number_of_numbers, by(prob_provi) // z=6.355, p=0.0000
ranksum number_of_numbers, by(no_prob) // same as above
ranksum number_of_numbers, by(lab_alike) // 


ksmirnov number_of_numbers if treatment<2, by(treat_name) // D=0.10, p=0.90
ksmirnov number_of_numbers if treatment>0 & treatment<3, by(treat_name) //D=0.34, p=0.001
ksmirnov number_of_numbers if treatment>1 & treatment<4, by(treat_name) // D=0.16, p=0.35
ksmirnov number_of_numbers if treatment>2, by(treat_name) // D=0.15, p=0.44

gen help=.
sum help

sum prob_provi

sum help

capture drop *spec*
// logits
forval x=1/10{
gen bet_spec_`x' = number_of_numbers==`x'
gen bet_m_spec_`x' = number_of_numbers>`x'
replace help = bet_spec_`x'
quietly logit help $non_lab $other_quest $demo
est store spec_`x'
replace help = bet_m_spec_`x'

if(`x' <10){
quietly logit help $non_lab $other_quest $demo
est store m_spec_`x'
}
}

est table spec_*, b(%12.3f) var(40) star(.01 .05 .10) stats(N) noomit
est table m_spec_*, b(%12.3f) var(40) star(.01 .05 .10) stats(N) noomit

sum prob_provi


logit bet_spec_1 $non_lab $other_quest $demo
est store spec_1

// do gambling habits make a difference in number_of_numbers? 
kwallis number_of_numbers, by(gambling_habits_overall) // Chi(2)=2.88, p=0.58
bysort gambling_habits_overall: sum number_of_numbers

gen frequent_gamblers=gambling_habits_overall>2 //those who play at least few times a year
tab frequent_gamblers
ranksum number_of_numbers, by(frequent_gamblers) // z=-1.41, p=0.16
gen heavy_gamblers=gambling_habits_overall>3 // play once a month or more
tab heavy_gamblers
ranksum number_of_numbers, by(heavy_gamblers) // z=-1.51, p=0.13

global non_lab "t_purch t_lottery t_prob t_hypo" //all treatments apart from the "lab" one

// RRA --nope
kwallis RRA, by(treat_name) // nope; Chi(2)=6.02 with 4 d.f., p=0.20
ranksum RRA, by(prob_provi) // nope; z=1.29, p=0.20
ranksum RRA, by(lab_alike) // z=1.86, p=0.06
kwallis RRA, by(number_of_numbers) // nope; Chi(2)=9 with 9 d.f., p=0.44
ktau number_of_numbers RRA // tauB=-0.01, p=0.76
by treatment, sort : ktau number_of_numbers RRA // LabT with the strongest coefficient and lowest p-value but nothing great
by prob_provided, sort : ktau number_of_numbers RRA // if prob_prov RRA and number_of_numbers less independent (but not signf.)
by lab_alike, sort : ktau number_of_numbers RRA // if lab_alike RRA and number_of_numbers less independent (but not signf.)

// regressions for number_of_numbers (Table 5)

ologit number_of_numbers $non_lab $other_quest $demo
est store tutti_frutti // spec (2)
test $big_five
test dohmen_general/dohmen_people

xi: ologit number_of_numbers $non_lab $other_quest $demo i.treatment*RRA
test _ItreXRRA_1 _ItreXRRA_2 _ItreXRRA_3 _ItreXRRA_4
est store tutti_frutti_RRA // spec (3)

reg number_of_numbers  $non_lab $other_quest $demo
est store tf_mnk

ologit number_of_numbers $non_lab
est store treats // spec (1)

ologit number_of_numbers $non_lab  dohmen_people  gambling_habits_overall gambling_habits_of religious
est store signif // spec (4)


// scatter safest_option number_of_numbers, jitter(3)
// scatter riskiest_option number_of_numbers,  jitter(3)
// scatter riskiest_option safest_option, jitter(3)

ologit number_of_numbers $non_lab  dohmen_people   gambling_habits_overall gambling_habits_of religious safest_option riskiest_option
est store risk_safe // spec (5)

// est table (Table 5) moved to the very end of the file



// est table tutti_frutti tf_mnk, b(%12.3f) var(40) star(.01 .05 .10) stats(N) noomitted varlabel


 ***** riskiest safest
tab safest_option // 51% say 1 number
tab riskiest_option // 46% say 10 numbers
kwallis riskiest_option, by(number_of_numbers) // 
kwallis safest_option, by(number_of_numbers) // 


kwallis riskiest_option, by(treat_name) // 
kwallis safest_option, by(treat_name) // stop
ranksum safest_option if treatment<2, by(treat_name) // 
ranksum safest_option if treatment>0 & treatment<3, by(treat_name) //
ranksum safest_option if treatment>1 & treatment<4, by(treat_name) // 
ranksum safest_option if treatment>2, by(treat_name) //
ranksum safest_option, by(lab_alike) // 


tab safest_option treatment
tab riskiest_option treatment

//Table 9
pwcorr number_of_numbers safest_option riskiest_option WTP, sig

// 
//hist safest_option, disc
//hist riskiest_option if riskiest_op<11, disc

////// ***** WTP *****


// Figure 5 proposed dec 31:
ssc install cdfplot
cdfplot WTP, by(treatment)

table session treat_name, contents(mean WTP sd WTP count WTP ) // all good
kwallis WTP if treatment>0, by(session)

// Table 6
bysort treat_name: sum WTP, detail 
tab WTP treat_name

//

kwallis WTP, by(treat_name) // Chi(2) = 18.38 with 4 d.f.,p = 0.001
bysort treat_n: pwcorr WTP number_of_numbers

// Table 7
ranksum WTP if treatment<2, by(treat_name) // YES z= -2.94 p=0.003
ranksum WTP if treatment>0 & treatment<3, by(treat_name) // nope z=0.62, p=0.53
ranksum WTP if treatment>1 & treatment<4, by(treat_name) // almost z=-1.73, p=0.08
ranksum WTP if treatment>2, by(treat_name) // nope z=0.51, p=0.61

ksmirnov WTP if treatment<2, by(treat_name) // D=0.26, p=0.05
ksmirnov WTP if treatment>0 & treatment<3, by(treat_name) // D=0.11, p=0.92
ksmirnov WTP if treatment>1 & treatment<4, by(treat_name) // D=0.21, p=0.16
ksmirnov WTP if treatment>2, by(treat_name) // D=0.10, p=0.93
//
// conovertest   WTP, by(treat_name) // Appendix A4

ranksum WTP, by(t_purchase) // z=4.005, p=0.001
ksmirnov WTP, by(t_purchase) // D=0.2697, p= 0.002

ranksum WTP if treatment>0, by(lab_alike) // z=-1.610, p=0.1074
ksmirnov WTP if treatment>0, by(lab_alike) // no: D=0.1167, p=0.395

// gambling habits
kwallis WTP, by(gambling_habits_overall) // Chi(2)= 8.664 with 4d.f; p=0.07
bysort gambling_habits_overall: sum WTP

ranksum WTP, by(frequent_gamblers) // frequent gamblers higher РWTP z=-2.819, p=0.005
//

sum WTP
//hist WTP, disc

///// WTP regressions
reg WTP $non_lab
est store wtp_treats // spec (1)

reg WTP $non_lab $other_quest $demo
est store wtp_plus_demo // spec (2)

xi: reg WTP $non_lab $other_quest $demo i.treatment*RRA
test _ItreXRRA_1 _ItreXRRA_2 _ItreXRRA_3 _ItreXRRA_4
est store wtp_plus_demo_RRA // spec (3)

reg WTP $non_lab dohmen_general male income
est store wtp_sig // spec (4)

reg WTP $non_lab dohmen_general male income number_of_numbers
est store wtp_sig_beton // spec (5)

// Appendix A5
est table wtp_treats  wtp_plus_demo wtp_plus_demo_RRA wtp_sig wtp_sig_beton, b(%12.3f) var(40) star(.01 .05 .10) stats(N) noomitted varlabel
//////

//////////////////////////*****CLASSIFICATION**************//////////////////////

//mergind with Raman's data

merge 1:1 id using "other- tables etc/R_classification_for_appending.dta"
drop _merge
//Let's add "R_" to the beginning of each variable
foreach var of varlist c_* {
   	local newname : subinstr local var "c_" "R_"
   	rename `var' `newname'
}


//mergind with Particja's data

merge 1:1 id using "other- tables etc/P_classification_for_appending.dta"
drop _merge
//Let's add "P_" to the beginning of each variable
foreach var of varlist c_* {
   	local newname : subinstr local var "c_" "P_"
   	rename `var' `newname'
}


destring R_*, replace force
destring P_*, replace force

//replacing N/A with zeros
foreach x of varlist R_*{
  replace `x' = 0 if(`x' == .)
}
foreach x of varlist P_*{
  replace `x' = 0 if(`x' == .)
}


merge 1:1 id using "other- tables etc/P_classification_for_appending.dta"
drop _merge
destring c_*, replace force
//Let's add "K_" to the beginning of each variable
foreach var of varlist c_* {
   	local newname : subinstr local var "c_" "K_"
	replace `var'=0
	rename `var' `newname'
}

capture drop c_*


global justifications "WhyThisNmbr_probs WhyThisNmbr_prz_size WhyThisNmbr_prz_nmbr WhyThisNmbr_superstit WhyThisNmbr_error WhyThisNmbr_risk safest_probs safest_prz_size safest_prz_nmbr safest_superstit safest_error safest_risk riskiest_probs riskiest_prz_size riskiest_prz_nmbr riskiest_superstit riskiest_error riskiest_risk edu_STEM edu_non_STEM gmbl_why_unknown gmbl_why_superstit gmbl_why_probs gmbl_why_risk gmbl_how_quickpick gmbl_how_superstit gmbl_how_ownchoice" 

//Generating kappa's and storing them
foreach just of global justifications {
kap P_`just' R_`just'
replace K_`just'=r(kappa)
}

//all kappas at one place:
/*
Agreement:
below  0.0Poor
0.00 – 0.20Slight
0.21 – 0.40Fair
0.41 – 0.60Moderate
0.61 – 0.80Substantial
0.81 – 1.00Almost  perfect
*/
ssc install fsum
fsum K_WhyThisNmbr_probs K_WhyThisNmbr_prz_size K_WhyThisNmbr_prz_nmbr K_WhyThisNmbr_superstit K_WhyThisNmbr_error K_WhyThisNmbr_risk K_safest_probs K_safest_prz_size K_safest_prz_nmbr K_safest_superstit K_safest_error K_safest_risk K_riskiest_probs K_riskiest_prz_size K_riskiest_prz_nmbr K_riskiest_superstit K_riskiest_error K_riskiest_risk K_edu_STEM K_edu_non_STEM K_gmbl_why_unknown K_gmbl_why_superstit K_gmbl_why_probs K_gmbl_why_risk K_gmbl_how_quickpick K_gmbl_how_superstit K_gmbl_how_ownchoice
//calculating mean kappa
global kappas "K_WhyThisNmbr_probs K_WhyThisNmbr_prz_size K_WhyThisNmbr_prz_nmbr K_WhyThisNmbr_superstit K_WhyThisNmbr_error K_WhyThisNmbr_risk K_safest_probs K_safest_prz_size K_safest_prz_nmbr K_safest_superstit K_safest_error K_safest_risk K_riskiest_probs K_riskiest_prz_size K_riskiest_prz_nmbr K_riskiest_superstit K_riskiest_error K_riskiest_risk K_edu_STEM K_edu_non_STEM K_gmbl_why_unknown K_gmbl_why_superstit K_gmbl_why_probs K_gmbl_why_risk K_gmbl_how_quickpick K_gmbl_how_superstit K_gmbl_how_ownchoice"
gen mean_kappa=0
gen i_counter=0
foreach x of global kappas {
replace mean_kappa=mean_kappa+`x'
replace i_counter=i_counter+1
display i_counter
display `x'
display mean_kappa
}
replace mean_kappa=mean_kappa/i_counter
display mean_kappa
drop mean_kappa
drop i_counter


//kappa analysis:
//justifications analysis
//how to pack it on one table?
//I had to exclude c_riskiest_why_superstition_mean

global justifications "WhyThisNmbr_probs WhyThisNmbr_prz_size WhyThisNmbr_prz_nmbr WhyThisNmbr_superstit WhyThisNmbr_error WhyThisNmbr_risk safest_probs safest_prz_size safest_prz_nmbr safest_superstit safest_error safest_risk riskiest_probs riskiest_prz_size riskiest_prz_nmbr riskiest_superstit riskiest_error riskiest_risk edu_STEM edu_non_STEM gmbl_why_unknown gmbl_why_superstit gmbl_why_probs gmbl_why_risk gmbl_how_quickpick gmbl_how_superstit gmbl_how_ownchoice" 
global justifications_minus1 "WhyThisNmbr_probs WhyThisNmbr_prz_size WhyThisNmbr_prz_nmbr WhyThisNmbr_superstit WhyThisNmbr_error WhyThisNmbr_risk safest_probs safest_prz_size safest_prz_nmbr safest_superstit safest_error safest_risk riskiest_probs riskiest_prz_size riskiest_prz_nmbr riskiest_error riskiest_risk edu_STEM edu_non_STEM gmbl_why_unknown gmbl_why_superstit gmbl_why_probs gmbl_why_risk gmbl_how_quickpick gmbl_how_superstit gmbl_how_ownchoice" 

//gen common indicator
capture drop c_*
foreach just of global justifications_minus1 {
//gen c_`just'= (R_`just'+P_`just')/2
gen c_`just'= (R_`just'*P_`just')
display "c_`just'"
}

ttest number_of_numbers, by(c_safest_prz_nmbr)
//ttest number_of_numbers, by(c_safest_why_numberofprizesme)
// store the unabreviated varlist in the local macro y
// you can use that to name the rows in your matrix
unab c_just : c_*

// count the number of variables in `c_just'
local l : word count `c_just'

// prepare an empty matrix in which to store the
// p-values
matrix p = J(`l',6,.)
matrix rownames p = `c_just'
matrix colnames p = "kwls_treat1" "kwls_treat2" "ttst_WTP" "rnksm betn" "rnksm_rskst" "rnksm sfst"
// look at the empty matrix
//matlist p

//collection data into a matrix
capture drop i_counter
gen i_counter=0
foreach just of global justifications_minus1 {
//    reg `var' id
//    matrix p[`i++',1] = 2*ttail(e(df_r), abs(_b[id]/_se[id]))
//r(df)
// set varabbrev on
display "c_`just'"
//display i_counter
replace i_counter=i_counter+1
quietly kwallis c_`just', by(treatment) //
matrix p[i_counter,1] =chi2tail(r(df), r(chi2)) //manually calculated p value using r2
matrix p[i_counter,2] =chi2tail(r(df), r(chi2_adj)) //manually calculated p value using adj r2
quietly ttest WTP, by(c_`just') //
matrix p[i_counter,3] =r(p) //two sided p-value
quietly ranksum number_of_numbers, by(c_`just')
matrix p[i_counter,4] =2 * normprob(-abs(r(z)))  //p-value
quietly ranksum riskiest_option, by(c_`just')
matrix p[i_counter,5] =2 * normprob(-abs(r(z)))  //p-value
quietly ranksum safest_option, by(c_`just')
matrix p[i_counter,6] =2 * normprob(-abs(r(z)))  //p-value
}
drop i_counter
matlist p
//stop

ttest c_WhyThisNmbr_probs, by(prob)
ttest c_WhyThisNmbr_prz_nmbr, by(prob)


//////////////////////////*****STACKED BARCHART and adding the FIELD DATA**************//////////////////////
/////appending with field data for stacked chart
append using "other- tables etc/sales_data_for_appending.dta" //adding artificial data - 100k observations - that simulates real sales data distribution
//creating combined ID taking values up to 100326, instead of 326
gen combined_ID=id 
replace combined_ID=group_index if combined_ID==.
/////

/// recoding Field data and some tests with it
replace treat_name = "Field" if treatment==-1
tab treat_name
bysort treat_name: sum number_of_numbers


// comparing Field with everything else (Table 4)
ranksum number_of_numbers if treatment<1, by(treatment) // Field vs PurchT: z=0.997, p=0.32
ranksum number_of_numbers if treatment==-1 | treatment ==1, by(treat_name) // z=1.35, p=0.18
ranksum number_of_numbers if treatment==-1 | treatment ==2, by(treat_name) // z=6.97, p=0.0000
ranksum number_of_numbers if treatment==-1 | treatment ==3, by(treat_name) // z=8.32, p=0.0000
ranksum number_of_numbers if treatment==-1 | treatment ==4, by(treat_name) // Field vs HypoT: z=5.36, p=0.000
//stop
ksmirnov number_of_numbers if treatment<1, by(treat_name) // D=0.13, p=0.2
ksmirnov number_of_numbers if treatment==-1 | treatment ==1, by(treat_name) // D=0.15, p=0.10
ksmirnov number_of_numbers if treatment==-1 | treatment ==2, by(treat_name) // D=0.39, p=0.0000
ksmirnov number_of_numbers if treatment==-1 | treatment ==3, by(treat_name) // D=0.46, p=0.0000
ksmirnov number_of_numbers if treatment==-1 | treatment ==4, by(treat_name) // D=0.39, p=0.0000
//

//RK: if you remove this part again - treatments will not be sorted on the graph. Please dont remove this part again :) Later I remove gidits in graphs editor. If you know the better way to sort treatments - let me know. 
gen treat_name_I_need_it=treat_name
replace treat_name_I_need_it = "0 Field" if treatment==-1
replace treat_name_I_need_it = "1 PurchT" if treatment==0
replace treat_name_I_need_it = "2 ChoiceT" if treatment==1
replace treat_name_I_need_it = "3 ChoiceProbT" if treatment==2
replace treat_name_I_need_it = "4 LabProbT" if treatment==3
replace treat_name_I_need_it = "5 HypoProbT" if treatment==4

///////stacked chart - number_of_numbers versus treatment (Figure 4)
///////stacked chart - number_of_numbers versus treatment ALTERNATIVE VERSION WITH GREYSCALE FROM 16 TO 0
//Percent of participants by digit chosen
catplot number_of_numbers treat_name_I_need_it ,  percent(treat_name) asyvars stack ytitle("Percent of subjects", size (small)) title("") recast(bar) ///
bar(1, fcolor(gs16) lcolor(black)) ///
bar(2, fcolor(gs14) lcolor(black)) ///
bar(3, fcolor(gs12) lcolor(black)) ///
bar(4, fcolor(gs10) lcolor(black)) ///
bar(5, fcolor(gs8) lcolor(black)) ///
bar(6, fcolor(gs6) lcolor(black)) ///
bar(7, fcolor(gs5) lcolor(black)) ///
bar(8, fcolor(gs4) lcolor(black)) ///
bar(9, fcolor(gs2) lcolor(black)) ///
bar(10, fcolor(gs0) lcolor(gs1)) ///
legend(order (10 9 8 7 6 5 4 3 2 1) rows(1) stack size(small) title(Number of numbers chosen, size(small)))

///////stacked chart - safest_o versus treatment (Figure XXX)
//Percent of participants by safest_o
catplot safest_o treat_name_I_need_it ,  percent(treat_name) asyvars stack ytitle("Percent of subjects", size (small)) title("") recast(bar) ///
bar(1, fcolor(gs16) lcolor(black)) ///
bar(2, fcolor(gs14) lcolor(black)) ///
bar(3, fcolor(gs12) lcolor(black)) ///
bar(4, fcolor(gs10) lcolor(black)) ///
bar(5, fcolor(gs8) lcolor(black)) ///
bar(6, fcolor(gs6) lcolor(black)) ///
bar(7, fcolor(gs5) lcolor(black)) ///
bar(8, fcolor(gs4) lcolor(black)) ///
bar(9, fcolor(gs2) lcolor(black)) ///
bar(10, fcolor(gs0) lcolor(gs1)) ///
legend(order (10 9 8 7 6 5 4 3 2 1) rows(1) stack size(small) title(The safest option, size(small)))

///////stacked chart - riskiest_o versus treatment (Figure XXX)
//Percent of participants by riskiest_o
catplot safest_o treat_name_I_need_it ,  percent(treat_name) asyvars stack ytitle("Percent of subjects", size (small)) title("") recast(bar) ///
bar(1, fcolor(gs16) lcolor(black)) ///
bar(2, fcolor(gs14) lcolor(black)) ///
bar(3, fcolor(gs12) lcolor(black)) ///
bar(4, fcolor(gs10) lcolor(black)) ///
bar(5, fcolor(gs8) lcolor(black)) ///
bar(6, fcolor(gs6) lcolor(black)) ///
bar(7, fcolor(gs5) lcolor(black)) ///
bar(8, fcolor(gs4) lcolor(black)) ///
bar(9, fcolor(gs2) lcolor(black)) ///
bar(10, fcolor(gs0) lcolor(gs1)) ///
legend(rows(1) stack size(small) title(The riskiest option, size(small)))


tabplot  number_of_numbers treatment, percent(treatment) showval(format(%3.0f)) name(tab, replace)	   
	   
///drop _freq 
//drop in 327/425 //drop artificial data
///////


ologit number_of_numbers $non_lab  dohmen_people   gambling_habits_overall gambling_habits_of religious c_WhyThisNmbr_probs c_WhyThisNmbr_prz_nmbr c_safest_probs c_safest_prz_nmbr c_safest_error c_riskiest_probs
est store justs // spec (6)

ologit number_of_numbers $non_lab  dohmen_people   gambling_habits_overall gambling_habits_of religious  c_WhyThisNmbr_probs c_WhyThisNmbr_prz_nmbr c_safest_probs c_safest_prz_nmbr c_safest_error c_riskiest_probs riskiest_o safest_o
est store both
//Table 5
est table treats tutti_frutti tutti_frutti_RRA signif  risk_safe justs, b(%12.3f) var(40) star(.01 .05 .10) stats(N) noomitted varlabel
//