%let name=bachelor_degrees_women_usa2; filename odsout '.'; /* SAS imitation of: http://www.randalolson.com/2014/06/14/percentage-of-bachelors-degrees-conferred-to-women-by-major-1970-2012 But using totals, rather than percent Using data from (tables 325.*): https://nces.ed.gov/programs/digest/2016menu_tables.asp */ PROC IMPORT OUT=tabn325_10 DATAFILE="../democd93/nces_data/tabn325.10.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.10$A6:F58"; GETNAMES=NO; RUN; data tabn325_10 (keep = graduation_year females field); set tabn325_10 (rename=(e=females)); field='Agriculture and Natural Resources'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_15 DATAFILE="../democd93/nces_data/tabn325.15.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.15$A6:F63"; GETNAMES=NO; RUN; data tabn325_15 (keep = graduation_year females field); set tabn325_15 (rename=(e=females)); field='Architecture and related services'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_20 DATAFILE="../democd93/nces_data/tabn325.20.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.20$A6:F70"; GETNAMES=NO; RUN; data tabn325_20 (keep = graduation_year females field); set tabn325_20 (rename=(e=females)); field='Biological and Biomedical Sciences'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_25 DATAFILE="../democd93/nces_data/tabn325.25.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.25$A6:F68"; GETNAMES=NO; RUN; data tabn325_25 (keep = graduation_year females field); set tabn325_25 (rename=(e=females)); field='Business'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_30 DATAFILE="../democd93/nces_data/tabn325.30.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.30$A6:F58"; GETNAMES=NO; RUN; data tabn325_30 (keep = graduation_year females field); set tabn325_30 (rename=(e=females)); field='Communication, Journalism, and related Comm. Technologies'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_35 DATAFILE="../democd93/nces_data/tabn325.35.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.35$A6:F58"; GETNAMES=NO; RUN; data tabn325_35 (keep = graduation_year females field); set tabn325_35 (rename=(e=females)); field='Computer and Information Sciences'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_40 DATAFILE="../democd93/nces_data/tabn325.40.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.40$A6:F63"; GETNAMES=NO; RUN; data tabn325_40 (keep = graduation_year females field); set tabn325_40 (rename=(e=females)); field='Education'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_45 DATAFILE="../democd93/nces_data/tabn325.45.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.45$A6:F62"; GETNAMES=NO; RUN; data tabn325_45 (keep = graduation_year females field); set tabn325_45 (rename=(e=females)); field='Engineering and Engineering Technologies'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_50 DATAFILE="../democd93/nces_data/tabn325.50.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.50$A6:F63"; GETNAMES=NO; RUN; data tabn325_50 (keep = graduation_year females field); set tabn325_50 (rename=(e=females)); field='English Language and Literature/Letters'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_55 DATAFILE="../democd93/nces_data/tabn325.55.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.55$A6:F62"; GETNAMES=NO; RUN; data tabn325_55 (keep = graduation_year females field); set tabn325_55 (rename=(e=females)); field='Foreign Languages and Literatures'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_60 DATAFILE="../democd93/nces_data/tabn325.60.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.60$A6:F58"; GETNAMES=NO; RUN; data tabn325_60 (keep = graduation_year females field); set tabn325_60 (rename=(e=females)); field='Health Professions and related programs'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_65 DATAFILE="../democd93/nces_data/tabn325.65.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.65$A6:F63"; GETNAMES=NO; RUN; data tabn325_65 (keep = graduation_year females field); set tabn325_65 (rename=(e=females)); field='Mathematics and Statistics'; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_70 DATAFILE="../democd93/nces_data/tabn325.70.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.70$A6:F62"; GETNAMES=NO; RUN; data tabn325_70 (keep = graduation_year females field); set tabn325_70 (rename=(e=females)); field="Physical Sciences and Science Technologies"; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_80 DATAFILE="../democd93/nces_data/tabn325.80.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.80$A6:F63"; GETNAMES=NO; RUN; data tabn325_80 (keep = graduation_year females field); set tabn325_80 (rename=(e=females)); field="Psychology"; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_85 DATAFILE="../democd93/nces_data/tabn325.85.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.85$A6:F58"; GETNAMES=NO; RUN; data tabn325_85 (keep = graduation_year females field); set tabn325_85 (rename=(e=females)); field="Public Administration and Social Services"; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_90 DATAFILE="../democd93/nces_data/tabn325.90.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.90$A6:F58"; GETNAMES=NO; RUN; data tabn325_90 (keep = graduation_year females field); set tabn325_90 (rename=(e=females)); field="Social Sciences and History"; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; PROC IMPORT OUT=tabn325_95 DATAFILE="../democd93/nces_data/tabn325.95.xls" DBMS=XLS REPLACE; RANGE="Digest 2016 Table 325.95$A6:F58"; GETNAMES=NO; RUN; data tabn325_95 (keep = graduation_year females field); set tabn325_95 (rename=(e=females)); field="Visual and Performing Arts"; graduation_year=.; graduation_year=scan(a,1,'-')+1; if graduation_year^=. then output; run; data all_data; length field $100; set tabn325_10 tabn325_15 tabn325_20 tabn325_25 tabn325_30 tabn325_35 tabn325_40 tabn325_45 tabn325_50 tabn325_55 tabn325_60 tabn325_65 tabn325_70 tabn325_80 tabn325_85 tabn325_90 tabn325_95 ; length my_html $300; my_html='title='||quote( trim(left(field))||'0d'x|| trim(left(graduation_year))||'0d'x|| trim(left(put(females,comma10.0))) ); run; %let rgb01=6a3d9a; %let rgb02=1f78b4; %let rgb03=33a02c; %let rgb04=fdbf6f; %let rgb05=fb9a99; %let rgb06=e31a1c; %let rgb07=b2df8a; %let rgb08=ff7f00; %let rgb09=cab2d6; %let rgb10=a6cee3; %let rgb11=888888; %let rgb12=b15928; %let rgb13=444444; %let rgb14=dca181; %let rgb15=fccde5; %let rgb16=87e6e5; %let rgb17=bebada; proc sql noprint; create table anno_fields as select unique field, graduation_year, females from all_data where graduation_year=2015; quit; run; data anno_fields; set anno_fields; xsys='2'; ysys='2'; hsys='3'; when='a'; x=graduation_year; y=females; text='a0a0'x||trim(left(field)); function='label'; position='>'; style='albany amt/bold'; /* adjust a few of the labels up/down so they don't collide */ if field="Physical Sciences and Science Technologies" then y=y+300; if field="Visual and Performing Arts" then y=y-1500; if field="English Language and Literature/Letters" then y=y+500; if field="Public Administration and Social Services" then y=y-500; if field="Engineering and Engineering Technologies" then y=y+1000; if field="Agriculture and Natural Resources" then y=y-500; /* make colors of labels match colors of lines */ if field="Agriculture and Natural Resources" then color="cx&rgb01"; if field="Architecture and related services" then color="cx&rgb02"; if field="Biological and Biomedical Sciences" then color="cx&rgb03"; if field="Business" then color="cx&rgb04"; if field="Communication, Journalism, and related Comm. Technologies" then color="cx&rgb05"; if field="Computer and Information Sciences" then color="cx&rgb06"; if field="Education" then color="cx&rgb07"; if field="Engineering and Engineering Technologies" then color="cx&rgb08"; if field="English Language and Literature/Letters" then color="cx&rgb09"; if field="Foreign Languages and Literatures" then color="cx&rgb10"; if field="Health Professions and related programs" then color="cx&rgb11"; if field="Mathematics and Statistics" then color="cx&rgb12"; if field="Physical Sciences and Science Technologies" then color="cx&rgb13"; if field="Psychology" then color="cx&rgb14"; if field="Public Administration and Social Services" then color="cx&rgb15"; if field="Social Sciences and History" then color="cx&rgb16"; if field="Visual and Performing Arts" then color="cx&rgb17"; run; goptions device=png; goptions xpixels=900 ypixels=600; goptions xpixels=1100 ypixels=700; goptions noborder; ODS LISTING CLOSE; ODS HTML path=odsout body="&name..htm" (title="Women & Bachelor Degrees in the US") style=htmlblue; goptions gunit=pct htitle=3.0 htext=2.0 ftitle='albany amt/bold' ftext='albany amt'; goptions ctext=gray33; /* proc print data=all_data; run; proc print data=tabn325_10; run; proc print data=tabn325_15; run; */ symbol1 value=circle height=1.5 interpol=join mode=include ci=cx&rgb01; symbol2 value=circle height=1.5 interpol=join mode=include ci=cx&rgb02; symbol3 value=circle height=1.5 interpol=join mode=include ci=cx&rgb03; symbol4 value=circle height=1.5 interpol=join mode=include ci=cx&rgb04 cv=Affffff00; symbol5 value=circle height=1.5 interpol=join mode=include ci=cx&rgb05 cv=Affffff00; symbol6 value=circle height=1.5 interpol=join mode=include ci=cx&rgb06; symbol7 value=circle height=1.5 interpol=join mode=include ci=cx&rgb07 cv=Affffff00; symbol8 value=circle height=1.5 interpol=join mode=include ci=cx&rgb08; symbol9 value=circle height=1.5 interpol=join mode=include ci=cx&rgb09 cv=Affffff00; symbol10 value=circle height=1.5 interpol=join mode=include ci=cx&rgb10 cv=Affffff00; symbol11 value=circle height=1.5 interpol=join mode=include ci=cx&rgb11 cv=Affffff00; symbol12 value=circle height=1.5 interpol=join mode=include ci=cx&rgb12; symbol13 value=circle height=1.5 interpol=join mode=include ci=cx&rgb13; symbol14 value=circle height=1.5 interpol=join mode=include ci=cx&rgb14 cv=Affffff00; symbol15 value=circle height=1.5 interpol=join mode=include ci=cx&rgb15 cv=Affffff00; symbol16 value=circle height=1.5 interpol=join mode=include ci=cx&rgb16 cv=Affffff00; symbol17 value=circle height=1.5 interpol=join mode=include ci=cx&rgb17 cv=Affffff00; axis1 label=none style=0 order=(0 to 200000 by 50000) major=(c=white) minor=none offset=(0,0); axis2 label=none style=0 order=(1970 to 2015 by 5) major=(c=white) minor=none offset=(0,0) value=( t=2 ' ' t=4 ' ' t=6 ' ' t=8 ' ' t=10 ' ' ); title1 ls=1.5 move=(-16,+0) "Number of Bachelor's degrees conferred to women in the US"; title2 ls=0.8 move=(-16,+0) h=2.5 "Lines with circular markers are STEM fields"; title3 a=-90 h=55 ' '; footnote link='https://nces.ed.gov/programs/digest/current_tables.asp' ls=0.8 c=gray move=(-16,+0) "Data source: National Center for Education Statistics (NCES Table 325)"; proc gplot data=all_data anno=anno_fields; format females comma10.0; plot females*graduation_year=field / nolegend vaxis=axis1 haxis=axis2 noframe autovref lvref=33 cvref=gray33 href=(1970 1980 1990 2000 2010) lhref=(34 34 34 34 34) chref=(gray33 gray33 gray33 gray33 gray33) html=my_html des='' name="&name"; run; title3 ls=0.8 move=(-16,+0) h=2.0 "Zoomed-in to show just the data under 50,000"; title4 a=-90 h=55 ' '; axis3 label=none style=0 order=(0 to 50000 by 10000) major=(c=white) minor=none offset=(0,0); ods html anchor='zoomed'; proc gplot data=all_data anno=anno_fields; format females comma10.0; plot females*graduation_year=field / nolegend vaxis=axis3 haxis=axis2 noframe autovref lvref=33 cvref=gray33 href=(1970 1980 1990 2000 2010) lhref=(34 34 34 34 34) chref=(gray33 gray33 gray33 gray33 gray33) html=my_html des='' name="&name._zoomed"; run; /* proc sql noprint; create table foo as select unique field from all_data order by field; quit; run; proc print data=foo; run; */ quit; ODS HTML CLOSE; ODS LISTING;