STOP
********************************************************************************


*** NCRM ONLINE RESOURCE

*** PRODUCING AUTOMATED PUBLICATION OUTPUTS

**#	PART 2: TABLES OF UNIVARIATE DESCRIPTIVE STATISTICS


/*	This file introduces code to produce tables of univariate descriptive
	statistics (e.g. frequency, percentage, mean etc.). These simple
	descriptive tables often appear as the first table in social science
	reports and publications. Examples are provided using the collect 
	commands, the estout commands, and the asdoc command.					*/
	

********************************************************************************
**# Set-Up


	clear all
	
	version 17
	

/*	Open the nhanes2b dataset	

	These data are from a US health survey, the National Health and
	Nutrition Survey.														*/

	webuse nhanes2b, clear
	
	numlabel, add

	
*	Set svy (for complex survey design)	

	svyset psuid [pweight=finalwgt], strata(stratid)
	
	
*	Examine some variables	

	tab1 sex rural agegrp diabetes 
	
	summ height weight 
			
			
*	Keep complete cases
			
	keep if !missing(sex, rural, agegrp, height, weight, heartatk)
	
	count

	



********************************************************************************
**#	The collect commands 

	
/*	The collect commands are built in Stata commands which are new to
	Stata 17.

	The collect commands allow you to store results from Stata commands
	in the form of 'collections' and to format collections of results.
	
	You can then format tables from these collections and output them
	to Word (or other formats).
	
	You can read about the collect commands in the Stata Customizable Tables 
	and Collected Results Reference Manual:
	
	https://www.stata.com/manuals/tables.pdf
	
	You can also view the collect command documentation using the 
	'help' command.															*/
	
	help collect
	
	
*	We start by clearing any collections of results from Stata's memory		
	
	collect clear

	
/*	Here is a table of univariate descriptive statistics for some
	categorical variables.	
	
	We first use the table command to provide a table of frequencies
	and percentages for some categorical variables. The table command will
	create a 'collection' of results.
	
	The basic code for the table command is:
	
	table (rowvars) (columnvars)
	
	We only require rowvars as this is a table of univariable descriptive
	statistics so we can leave the columnvars brackets empty. We could
	leave these brackets out, but we include them here for clarity.			*/
	
	
	table (var) (), statistic(fvfrequency sex agegrp rural) ///
					statistic(fvpercent sex agegrp rural) 
	
	
/*	Here is a table of univariate descriptive statistics for some
	continuous variables.	
	
	We use the table command to provide a table of mean and standard
	deviation.																*/
	
	
	table (var) (), statistic(mean height weight) ///  
					statistic(sd height weight) 	

					
/*	We could combine these two tables to make a table of appropriate 
	descriptive statistics for both categorical and continuous variables.	*/

	table (var) (), statistic(fvfrequency sex agegrp rural) ///
					statistic(fvpercent sex agegrp rural) ///
					statistic(mean height weight) ///  
					statistic(sd height weight) 
	
	
	
/*	Now we can improve the format of this table using a number of collect
	commands.
	
	Compare the old and new tables to understand what each of these commands
	is doing. Remember you can also see further details of each command 
	using the help files.
	
	We reorganise the column structure of this table.						*/
	
	help collect remap
			
	collect remap result[fvfrequency mean] = Col[1 1] 
	collect remap result[fvpercent sd] = Col[2 2]

	
*  We name the stored results Mean and SD in the collection.

	help collect get

	collect get resname = "Mean", tag(Col[1] var[mylabel]) 
	collect get resname = "SD", tag(Col[2] var[mylabel])

*  We collect an empty result to create a blank row in the table. 

	collect get empty = "  ", tag(Col[1] var[empty]) 
	collect get empty = "  ", tag(Col[2] var[empty])

	
*	We collect the sample size from the 'count' command.

	count
	
	collect get n = `r(N)', tag(Col[2] var[n])
	
		
/* 	We specify the order of the contents of our table.

	We want the categorical variables, then an empty row before the 
	continuous variables, and an empty row before the count so we 
	specify this order.														*/
	
	help collect layout

	collect layout (var[1.sex 2.sex ///
						1.agegrp 2.agegrp 3.agegrp ///
						4.agegrp 5.agegrp 6.agegrp ///
						0.rural 1.rural ////
						empty mylabel ///
						weight height ///
						empty n]) (Col[1 2])

					
*	We label the columns for the categorical variable (n and %).

	help collect label

	collect label levels Col 1 "n" 2 "%"

	
*	We drop the title column

	help collect style

	collect style header Col, title(hide)

	
*	We hide the variable names for the empty row

	collect style header var[empty mylabel], level(hide)
	collect style row stack, nobinder

	
/*	We edit the numerical formats of the numbers shown (i.e. number of 
	decimal places).														*/

	collect style cell var[sex agegrp rural]#Col[1], nformat(%6.0fc) 
	collect style cell var[sex agegrp rural]#Col[2], nformat(%6.2f) sformat("%s%%") 	
	collect style cell var[weight height], nformat(%6.2f)

	
*	We remove border above row-header and results 

	collect style cell border_block[item row-header], border(top, pattern(nil)) 
	
	
*	We add a title to the table

	help collect title

	collect title "Table 1: Descriptive Statistics"
	
	
*	We add a note to the table	

	help collect note
	
	collect note "Data Source: nhanes2b"
	
	
*	Let's take a look at the table now... 
	
	help collect preview
	
	collect preview

* 	Now you can export your finished table to Word

	help collect export
	
	collect export "table.docx", replace	


	
	
/*	We can also use the collect commands to produce a table of svy
	adjusted descriptives statistics when using complex survey data.	
	
	Here we use the 'prop' command instead of fvfrequency and fvpercent as
	this command works with svy.
	
	We produce a table of unajusted and adjusted descriptive statistics.
	
	Remember you can view details of what each collect command does using
	the Stata help files.													*/
	
	collect clear
	
	table () (result), ///
        command(prop sex, percent) ///
        command(prop agegrp, percent) ///
        command(prop rural, percent) ///
        command(mean weight) ///
        command(mean height)


	table () (result), ///
        command(svy: prop sex, percent) ///
        command(svy: prop agegrp, percent) ///
        command(svy: prop rural, percent) ///
        command(svy: mean weight) ///
        command(svy: mean height) name(Table) append

	collect style row stack, nobinder

	collect style cell result[_r_b]#colname[1.sex 2.sex c1 c2 ///
											1.agegrp 2.agegrp 3.agegrp ///
											4.agegrp 5.agegrp 6.agegrp ///
											0.rural 1.rural], sformat(%s%%)

		
	collect get _r_b = "Mean (SD)", tags(cmdset[1] colname[myvar])

	collect get _r_b = "Mean (SD)", tags(cmdset[2] colname[myvar])


	collect get freq = "n", tags(cmdset[1] colname[myvar0])

	collect get _r_b = "%", tags(cmdset[1] colname[myvar0])

	collect get _r_b = "%", tags(cmdset[2] colname[myvar0])

	collect get _r_b = "  ", tag(cmdset[1]  colname[empty])

	collect get _r_b = "  ", tag(cmdset[2]  colname[empty])

	count

	collect get freq = `r(N)', tag(cmdset[1] colname[n])

	collect remap result[_r_se] = result[se2], fortags(colname[weight height])

	collect style cell result[se2], sformat((%s))

	collect composite define meansd = _r_b se2

	collect style cell result[meansd], nformat(%6.2f)

	collect style header colname[myvar], level(hide)

	collect style header colname[myvar0], level(hide)

	collect style header colname[empty], level(hide)

	collect style header result, level(hide)

	collect label levels cmdset 1 "Unadjusted", modify

	collect label levels cmdset 2 "Adjusted", modify

	collect style header cmdset, title(hide)

	collect title "Table 1: Descriptive Statistics"
	
	collect note "Data Source: nhanes2b."
	
	collect note "Percentages, mean and standard deviation are adjusted for sample design."
	
	collect layout (colname[myvar0 sex agegrp rural empty myvar ///
							weight height empty n]) ///
							(result[freq]#cmdset[1] result[meansd]#cmdset) ()

							

	
	
********************************************************************************
**#	The estout commands


/*	An alternative command to produce automated tables of descriptive
	statistics is the esttab command, which is part of the estout suite 
	of commands.
	
	These commands were developed by Professor Ben Jann:
	
	Jann, Ben (2005): Making regression tables from stored estimates. 
	The Stata Journal 5(3): 288-308.

	Jann, Ben (2007): Making regression tables simplified. The Stata Journal 
	7(2): 227-244.

	This website provides a range of estout examples:
	http://repec.org/bocode/e/estout/esttab.html	
	
	To use estout you will need to first install it if you have not done
	so already.   															*/	
   
   
	ssc install estout
   
	help estout
   
   
*	First we clear any stored estimates from Stata's memory

	estimates clear

	
*	We examine the agegrp variable
	
	tabulate agegrp
	
	
/* 	estpost is a command that is part of the estout suite. 

	It stores the results of descriptive statistic commands to be used by 
	other commands in the estout suite.										*/
	
	help estpost
	
	estpost tabulate agegrp
	
	
/* 	We can store these results under the name table1 using.					*/

	help estimates store
	
	estimates store table1
	
	
/* 	We can then use the esttab command to output the results.

	Here we output the frequency (b) and percent (pct)						*/	
	
	esttab table1 using "table.rtf", ///
	cell(b pct) replace
	
	
/*	We can use the esttab options to change the formatting of the table.	*/
	
	esttab table1 using "table.rtf", ///
	cells("b(label(Freq)) pct(f(2) label(%))") ///
	title("Table 1: Descriptive Statistics") ///
	addnote("Data Source: nhanes2b.")	///
	noobs replace
	


/*	We could also produce a table of svy adjusted results.					*/

	estpost svy: tabulate agegrp, obs percent
	
	estimates store table2
	
	esttab table2 using "table.rtf", ///
	cells("obs(label(Freq)) b(f(2))") ///
	title("Descriptive Statistics") ///
	addnote("Note: Percentages are adjusted for survey design.")	///
	noobs replace
	
	
	
/*	We can also output descriptive statistics for continuous variables using
	esttab.																	*/	
	
	summarize height weight
	estpost summarize height weight
	estimates store table3

	esttab table3 using "table.rtf", replace ///
			cells("mean sd min max") nomtitle nonumber
	
	
/*	The estout suite of commands can be used to produce various tables of 
	descriptive statistics, including svy adjusted results.
	
	For further examples see the detailed help files and the estout webpage:
	
	http://repec.org/bocode/e/estout/esttab.html							*/
		



	
********************************************************************************
**#	The asdoc command


/*	An alternative command to produce automated tables of descriptive
	statistics is asdoc.
	
	These commands were developed by Professor Attaullah Shah:
	
	Shah, A. (2018). "asdoc:  Create high-quality tables in MS Word 
	from Stata output"

	
	This website provides a number of asdoc tutorials:
	https://fintechprofessor.com/2018/01/31/asdoc/
	
	To use asdoc you will need to first install it if you have not done
	so already.   															*/	

	
	ssc install asdoc
	
	help asdoc
	
	

*	First we start we clear any stored estimates from Stata's memory

	estimates clear
	
	
/*	To produce tables with asdoc you simply need to add the asdoc prefix to
	your code.
	
	You can also use the save option to export the table to Word.			*/

	asdoc summarize weight height, save(table.rtf) replace
	
	asdoc tabulate sex, save(table.rtf) append
	
	asdoc tabulate agegrp, save(table.rtf) append
	
	asdoc tabulate rural, save(table.rtf) append
	
	
/*	A limitation of the asdoc command is that is does not work with svy 
	when producing tables of descriptive statistics. Adding the svy prefix to 
	the code above does not produce an error message but the results
	are not adjusted.
	
	For further examples using the asdoc command you can view the very 
	detailed asdoc help files, or the asdoc webpage:
	
	https://fintechprofessor.com/2018/01/31/asdoc/							*/	

	
	

	
********************************************************************************
**#	END OF FILE
***	Roxanne Connelly, University of Edinburgh