%let name=cdc_smoking_2014; filename odsout '.'; /* Imitating graph from: http://flowingdata.com/2016/06/20/who-still-smokes/ Using data from: http://www.cdc.gov/brfss/annual_data/annual_2014.html */ /* * I edited the "filename datain" and "libname dataout" in this cdc-supplied sas job; %include 'd:\Public\CDC\brfss\sasout14_llcp.sas'; */ data anno_legend; length text $50; xsys='1'; ysys='1'; hsys='3'; when='a'; function='label'; y=100; x=.5; position='3'; text='Smokers'; output; x=50; position='2'; text='Non-Smokers'; output; x=99.5; position='1'; text='No Response'; output; run; proc format; value stk_fmt 1='Smokers' 2='Non-Smokers' 3='No Response' ; run; * This is just a 'proc format' that sets up the cdc's user-defined formats; %include 'd:\Public\CDC\brfss\format14.sas'; libname dataout 'd:\Public\CDC\brfss\'; data my_data; set dataout.sasdata; * Assign the formats to the variables; %include 'd:\Public\CDC\brfss\formas14.sas'; smoking_text=put(_rfsmok3,_3RFSMOK.); format stack_order stk_fmt.; if smoking_text="Yes" then stack_order=1; else if smoking_text="No" then stack_order=2; else stack_order=3; run; ODS LISTING CLOSE; ODS html path=odsout body="&name..htm" (title="Smoking Prevalence - US CDC data") style=htmlblue; goptions gunit=pct ftitle='albany amt' ftext='albany amt' htitle=15pt htext=10pt; goptions ctext=gray33; footnote link="http://www.cdc.gov/brfss/annual_data/annual_2014.html" c=gray "Data source: CDC Behavioral Risk Factor Surveillance System - 2014 data"; pattern1 v=s c=cxfb9a99; pattern2 v=s c=cxb2df8a; pattern3 v=s c=grayee; axis2 label=none style=0 order=(0 to 1 by .2) minor=none offset=(0,0); %let var=sex; %let fmt=sex.; %let ypix=280; proc sql noprint; create table plot_data as select unique &var, stack_order, count(*) as count from my_data group by &var, stack_order; create table plot_data as select unique *, sum(count) as group_count from plot_data group by &var; quit; run; data plot_data; set plot_data; format subgroup_percent percent7.0; subgroup_percent=count/group_count; length my_html $300; my_html='title='||quote( trim(left(put(&var,&fmt)))||': '|| trim(left(put(subgroup_percent,percent7.1)))||' '||trim(left(put(stack_order,stk_fmt.))) ); run; axis1 label=none; title1 j=l move=(+17,+0) ls=2.5 "Smoking Prevalence by Gender"; title2 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' "A higher percentage of males smoke than females."; title3 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' "The difference between the two increased during 1994-2014."; title4 a=90 h=18pct ' '; goptions ypixels=&ypix xpixels=700; ods html anchor='gender'; proc gchart data=plot_data anno=anno_legend; format &var &fmt; hbar &var / discrete type=sum sumvar=subgroup_percent nostats subgroup=stack_order nolegend maxis=axis1 raxis=axis2 noframe space=0 coutline=white html=my_html des='' name="&name._&var"; run; /* The official text was a bit long/wordy, so I use the shortened text they used on FlowingData */ proc format; value edu_fmt 1='None' 2='Elementary' 3='Some High School' 4='High School Grad' 5='Some College' 6='College Grad' ; run; %let var=educa; /* %let fmt=educa.; */ %let fmt=edu_fmt.; %let ypix=450; proc sql noprint; create table plot_data as select unique &var, stack_order, count(*) as count from my_data group by &var, stack_order; create table plot_data as select unique *, sum(count) as group_count from plot_data group by &var; quit; run; /* I'm leaving out category #9, where people didn't respond with what their education level was */ data plot_data; set plot_data (where=(&var in (1 2 3 4 5 6))); format subgroup_percent percent7.0; subgroup_percent=count/group_count; length my_html $300; my_html='title='||quote( trim(left(put(&var,&fmt)))||': '|| trim(left(put(subgroup_percent,percent7.1)))||' '||trim(left(put(stack_order,stk_fmt.))) ); run; axis1 label=none order=(6 to 1 by -1); title1 j=l move=(+17,+0) ls=2.5 "Smoking Prevalence by Education Level"; title2 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' "Smokers with a college education decreased by almost half during 1994-2014."; title3 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' "Those with only some high school decreased by only a few percentage points."; goptions ypixels=&ypix xpixels=700; ods html anchor='education'; proc gchart data=plot_data anno=anno_legend; format &var &fmt; /* format &var comma8.0; */ hbar &var / discrete type=sum sumvar=subgroup_percent nostats subgroup=stack_order nolegend maxis=axis1 raxis=axis2 noframe space=0 coutline=white html=my_html des='' name="&name._&var"; run; /* The official text was a bit long/wordy, so I shortened it */ proc format; value inc_fmt 1='Less than $10k' 2='$10-$15k' 3='$15-$20k' 4='$20-$25k' 5='$25-$35k' 6='$35-$50k' 7='$50-$75k' 8='Over $75k' ; run; %let var=income2; /* %let fmt=in2come.; */ %let fmt=inc_fmt.; %let ypix=500; proc sql noprint; create table plot_data as select unique &var, stack_order, count(*) as count from my_data group by &var, stack_order; create table plot_data as select unique *, sum(count) as group_count from plot_data group by &var; quit; run; /* Leaving out 77 and 99, where people didn't provide income level */ data plot_data; set plot_data (where=(&var in (1 2 3 4 5 6 7 8))); format subgroup_percent percent7.0; subgroup_percent=count/group_count; length my_html $300; my_html='title='||quote( trim(left(put(&var,&fmt)))||': '|| trim(left(put(subgroup_percent,percent7.1)))||' '||trim(left(put(stack_order,stk_fmt.))) ); run; axis1 label=none order=(8 to 1 by -1); title1 j=l move=(+17,+0) ls=2.5 "Smoking Prevalence by Income Level"; title2 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' "Lower household income is related to higher smoker rates."; title3 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' "The trend is more evident in recent years."; title4 a=90 h=3pct ' '; goptions ypixels=&ypix xpixels=700; ods html anchor='income'; proc gchart data=plot_data anno=anno_legend; format &var &fmt; /* format &var comma8.0; */ hbar &var / discrete type=sum sumvar=subgroup_percent nostats subgroup=stack_order nolegend maxis=axis1 raxis=axis2 noframe space=0 coutline=white html=my_html des='' name="&name._&var"; run; /* Many race variables to choose from... _RACE RRCLASS _IMPCRAC _IMPRACE _P1RACE */ /* The official text was a bit long/wordy, so I shortened it */ proc format; value race_fmt 1='White' 2='Black' 3='American Indian' 4='Asian' 8='Hispanic' ; run; %let var=_race; %let fmt=race_fmt.; %let ypix=400; proc sql noprint; create table plot_data as select unique &var, stack_order, count(*) as count from my_data group by &var, stack_order; create table plot_data as select unique *, sum(count) as group_count from plot_data group by &var; quit; run; /* Leaving out #5, which is multi-race */ /* White, Black, American Indian, Asian, Hispanic */ data plot_data; set plot_data (where=(&var in (1 2 3 4 8))); format subgroup_percent percent7.0; subgroup_percent=count/group_count; length my_html $300; my_html='title='||quote( trim(left(put(&var,&fmt)))||': '|| trim(left(put(subgroup_percent,percent7.1)))||' '||trim(left(put(stack_order,stk_fmt.))) ); run; axis1 label=none; title1 j=l move=(+17,+0) ls=2.5 "Smoking Prevalence by Race and Origin"; title2 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' "American Indian is the only race group with increased smoking prevalence"; title3 j=l move=(+17,+0) ls=0.8 font='albany amt/italic' "between 1994 and 2014."; title4 a=90 h=2.5pct ' '; goptions ypixels=&ypix xpixels=700; ods html anchor='race'; proc gchart data=plot_data anno=anno_legend; format &var &fmt; /* format &var comma8.0; */ hbar &var / discrete type=sum sumvar=subgroup_percent nostats subgroup=stack_order nolegend maxis=axis1 raxis=axis2 noframe space=0 coutline=white html=my_html des='' name="&name._&var"; run; quit; ODS HTML CLOSE; ODS LISTING;