%let name=rubella_incidence; filename odsout '.'; /* SAS enhancement of graphs from: http://graphics.wsj.com/infectious-diseases-and-vaccines/ Using data downloaded from: http://www.tycho.pitt.edu/l1advanced.php#results Note that their FAQ says ... http://www.tycho.pitt.edu/faq.php The "-" value indicates that there is no data for that particular week, disease, and location. The "0" value indicates a report of zero cases or deaths for that particular week, disease, and location. */ data my_data; infile 'tycho_data\RUBELLA_Incidence_1966-2003_20150213100712.csv' delimiter=',' MISSOVER DSD lrecl=32767 firstobs=4; input year week ALABAMA ALASKA ARIZONA ARKANSAS CALIFORNIA COLORADO CONNECTICUT DELAWARE DISTRICT_OF_COLUMBIA FLORIDA GEORGIA HAWAII IDAHO ILLINOIS INDIANA IOWA KANSAS KENTUCKY LOUISIANA MAINE MARYLAND MASSACHUSETTS MICHIGAN MINNESOTA MISSISSIPPI MISSOURI MONTANA NEBRASKA NEVADA NEW_HAMPSHIRE NEW_JERSEY NEW_MEXICO NEW_YORK NORTH_CAROLINA NORTH_DAKOTA OHIO OKLAHOMA OREGON PENNSYLVANIA RHODE_ISLAND SOUTH_CAROLINA SOUTH_DAKOTA TENNESSEE TEXAS UTAH VERMONT VIRGINIA WASHINGTON WEST_VIRGINIA WISCONSIN WYOMING; run; proc transpose data=my_data out=my_data (rename=(_name_=state col1=incidence_per_100k)); by year week; run; /* There are a *lot* of missing '-' values in the data, and simply summinng the 52 weeks to get an annual incidence would produce an incorrect result (for any year/state that had a missing value ... which would be most of them). Therefore I'm using the annual average instead of the sum. */ proc sql noprint; create table summary_data as select unique year, state, avg(incidence_per_100k) as avg_incidence_per_100k from my_data group by year, state; quit; run; data summary_data; set summary_data; state=propcase(translate(state,' ','_')); if state='District Of Columbia' then state='District of Columbia'; run; /* Number each state, the way you want them to appear in the y-axis (y_order) - basically ascending or descending */ proc sql noprint; create table states as select unique state from summary_data order by state desc; quit; run; data states; set states; y_order=_n_; run; /* Merge y_order back into summary_data */ proc sql noprint; create table summary_data as select unique summary_data.*, states.y_order from summary_data left join states on summary_data.state=states.state; quit; run; /* Create a custom legend, by defining your own 'buckets' */ %let break_1=.1; %let break_2=.5; %let break_3=1; %let break_4=2; data summary_data; set summary_data; if avg_incidence_per_100k<&break_1 then bucket=1; else if avg_incidence_per_100k<&break_2 then bucket=2; else if avg_incidence_per_100k<&break_3 then bucket=3; else if avg_incidence_per_100k<&break_4 then bucket=4; else /* if avg_incidence_per_100k>=&break_4 then */ bucket=5; if avg_incidence_per_100k=. then bucket=6; run; proc format; value ranges 1 = "< &break_1" 2 = "< &break_2" 3 = "< &break_3" 4 = "< &break_4" 5 = ">= &break_4" 6 = "N/A" ; run; /* Add html hover-text */ data summary_data; set summary_data; length my_html $200; my_html='title='||quote('Year '||trim(left(year))||' weekly average in '||trim(left(state))||' = '|| trim(left(put(avg_incidence_per_100k,comma8.3))) ); run; /* Now, create a grid of X/Y polygons, suitable for use with gmap */ data map_grid; set summary_data; x=year; y=y_order; output; x=year+1; y=y_order; output; x=year+1; y=y_order+1; output; x=year; y=y_order+1; output; run; /* Annotate labels for state names */ proc sql noprint; create table anno_states as select unique state as text, min(x)-.2 as x, avg(y) as y from map_grid group by state; quit; run; data anno_states; length function $8 text $100; set anno_states; xsys='2'; ysys='2'; hsys='d'; when='a'; function='label'; position='<'; size=9; run; /* Annotate labels for years */ proc sql noprint; create table anno_years as select unique year, avg(x) as x from map_grid where (year/10)=int(year/10) and y_order=1 group by year; quit; run; data anno_years; set anno_years; length function $8 text $100; xsys='2'; ysys='2'; hsys='d'; when='a'; function='move'; y=1; output; function='draw'; y=y-.5; size=.001; output; function='label'; y=y-.7; position='+'; size=9; text=trim(left(year)); output; run; data all_anno; set anno_states anno_years; run; goptions device=png; goptions xpixels=750 ypixels=850; goptions border; ODS LISTING CLOSE; ODS HTML path=odsout body="&name..htm" (title="Rubella Incidence") style=htmlblue; goptions ftitle="albany amt/bold" ftext="albany amt" gunit=pct htitle=18pt htext=9pt ctext=gray33; pattern1 v=s c=cxfee5d9; pattern2 v=s c=cxfcae91; pattern3 v=s c=cxfb6a4a; pattern4 v=s c=cxde2d26; pattern5 v=s c=cxa50f15; pattern6 v=m4n45 c=gray44; legend1 label=none value=(justify=left) position=(top) shape=bar(.13in,.13in) order=(5 4 3 2 1 6); title1 link='http://www.tycho.pitt.edu/l1advanced.php' ls=1.5 c=cxa50f15 "Rubella: " c=gray77 font='albany amt' "Average Weekly Incidence per 100,000"; title2 a=90 h=12 ' '; footnote h=1 ' '; footnote2 link='http://www.tycho.pitt.edu/l1advanced.php' ls=1.0 c=gray h=11pt 'Using data from Project Tycho: http://www.tycho.pitt.edu'; proc gmap data=summary_data map=map_grid all; format bucket ranges.; id state year; choro bucket / midpoints = 1 2 3 4 5 6 cdefault=yellow coutline=gray77 legend=legend1 anno=all_anno html=my_html des='' name="&name"; run; quit; ODS HTML CLOSE; ODS LISTING;