SAS-resources


What is SAS?

 

According to the SAS website:

SAS is an integrated system of software solutions that enables you to perform the following tasks:

--data entry, retrieval, and management
--report writing and graphics design
--statistical and mathematical analysis
--business forecasting and decision support
--operations research and project management
--applications development

How you use SAS depends on what you want to accomplish. Some people use many of the capabilities of the SAS System, and others use only a few.

In this class, we will use SAS to perform statistical analysis, specifically regression and ANOVA. As you proceed through the master's program, you will learn more capabilities of SAS.

Many graduates of our program eventually work as statisticans, but some find jobs as SAS programmers. You are encouraged to learn as much as you can on your own, and maybe get certified as SAS Base or Advanced programmers. It will strengthen your skills as a statistician and add SAS programmer to your career options.


There are tutorial videos in SAS training website. For example

 

 

DATA INPUT

 

/************************************************/
/******* Reading space-delimited data ******/
/************************************************/

DATA scores1;
          INPUT $ hw1 hw2 quiz1 quiz2 final;
          DATALINES;
    Keenan 9 8 8 6 75
    Wu 10 10 9 9 88
    Gold 5 5 7 9 78
    Williams 10 5 10 9 80
    Alonzo 10 10 10 10 95   
    ;
          PROC PRINT;
          RUN;
QUIT;

 

  • /************************************************/
    /******* Reading data from web page ******/
    /************************************************/
    filename egdata URL "http://www.stat.wmich.edu/naranjo/stat5680/nhanes.txt";
    DATA one;
              INFILE egdata FIRSTOBS=3;
              INPUT gender $ age race $ 19-37 education $ 38-54 fam_inc poverty weight height bmi sys_bp dias_bp cholest glucose;
    PROC PRINT DATA=one (obs=20);
              VAR gender age race education bmi cholest glucose; /* OPTIONAL */
              RUN;QUIT;


    /************************************************/
    /***** Reading data from computer file *****/
    /************************************************/
    FILENAME egdata "C:\Users\naranjo\Documents\My SAS Files(32)\nhanes.txt";
    DATA two;
              INFILE egdata;
              INPUT gender $ age race $ 19-37 education $ 38-54 fam_inc poverty weight height bmi sys_bp dias_bp cholest glucose;
    PROC PRINT DATA=two (obs=20);
              VAR gender age race education bmi cholest glucose;     /* OPTIONAL */
              RUN;
    QUIT;


    Quick summary statistics for quantitative data


    PROC MEANS DATA=one;
            VAR bmi sys_bp dias_bp cholest glucose;
            TITLE 'Summary Statistics using PROC MEANS';
        RUN;

    /* Most PROCs allow a BY option, but data needs to be sorted first */
    PROC SORT DATA=one;
        BY gender;
        RUN;

    PROC MEANS DATA=one;
            VAR bmi sys_bp dias_bp cholest glucose;
            TITLE 'Summary Statistics by gender';
        BY gender;
        RUN;

    /*PROC TABULATE allows customized tables of summary statistics*/
    PROC TABULATE DATA=one;
            CLASS education;
            VAR bmi;
            TABLE (education all), bmi*(n mean std);
            TITLE 'Table of Summary Statistics for BMI';
        RUN;




    Quick summary statistics for categorical data


    /*ONE-WAY FREQUENCY TABLE*/
    /*  (See SAS Procedures Guide, Version 6, Third Edition, Chapter 20)*/
    PROC FREQ DATA=one;
            tables education;
            TITLE 'One-Way Frequency Table By Education';
        RUN;

    /*TWO-WAY FREQUENCY TABLE*/
    PROC FREQ DATA=one;
            TABLES education*gender;
            TITLE 'Two-Way Frequency Table: Education by Gender';
        RUN;

    QUIT;


    DATA STEP TOOLS

FILENAME egdata URL "http://www.stat.wmich.edu/naranjo/sas/nhanes.txt";
DATA one;
    INFILE egdata FIRSTOBS=3;
    INPUT gender $ age race $ 19-37 education $ 38-54 fam_inc poverty
              weight height bmi sys_bp dias_bp cholest glucose;

/*CREATE NEW DATA SET WITH NEW VARIABLES AS FUNCTIONS OF EXISTING VARIABLES*/
DATA two;
    SET one;
    bpdiff=sys_bp-dias_bp;
    bpratio = sys_bp/dias_bp;
    IF bmi<30.0 THEN weight2='Normal';
        ELSE weight2='Obese';
    RUN;

/*CREATE SUBSET DATA SET (SELECTED VARIABLES, SELECTED OBSERVATIONS)*/
DATA three;   
        SET two (keep = age gender race sys_bp dias_bp bpdiff bpratio bmi weight2);
        IF race="Black" or race="White";
        IF 20<age<60;
    RUN;

PROC PRINT DATA=three;
    VAR age gender race sys_bp dias_bp bpdiff bpratio bmi weight2;
    RUN;
QUIT;

Tip sheets