2024年4月26日发(作者:)

运用import过程进行SAS数据导入完全实用教程

1 单个规范格式文件导入。

1.1 对指定分隔符(’|’,’ ’,’!’,’ab’等)数据的导入,这里以’!’为例delimiter='!'

进行说明:

data _null_;

file 'c:';

put"X1!X2!X3!X4";

put "11!22!.! ";

put "111!.!333!apple";

run;

proc import

datafile='c:'

out=

dbms=dlm

replace;

delimiter='!';

GUESSINGROWS=2000;

DATAROW=2;

getnames=yes;

run;

注意GUESSINGROWS的值阈为1 到 3276

1.2 对CSV格式的数据进行导入:

data _null_;

file 'c:';

put "Fruit1,Fruit2,Fruit3,Fruit4";

put "apple,banana,coconut,date";

put "apricot,berry,crabapple,dewberry";

run;

proc import

datafile='c:'

out=

dbms=csv

replace;

run;

1.3 对tab分隔数据的导入:

data _null_;

file 'c:';

put "cereal" "09"x "eggs" "09"x "bacon";

put "muffin" "09"x "berries" "09"x "toast";

run;

proc import

datafile='c:'

out=ast

dbms=tab

replace;

getnames=no;

run;

1.4 对dbf数据库数据进行导入:

proc import datafile="/myfiles/"

out=

dbms=dbf

replace;

run;

1.5对excel数据进行导入:

PROC IMPORT OUT= hospital1

DATAFILE= " C:My DocumentsExcel "

DBMS=EXCEL REPLACE;

SHEET="Sheet1$";

GETNAMES=YES;

MIXED=NO;

SCANTEXT=YES;

USEDATE=YES;

SCANTIME=YES;

RUN;

1.6对access数据进行导入:

PROC IMPORT DBMS=ACCESS TABLE="customers" OUT=;

DATABASE="c:";

UID="bob";

PWD="cat";

WGDB="c:";

RUN;

proc print data=;

run;

1.7 import过程步中,dbms选项汇总:

Identifier Input Data Source

ACCESS Microsoft Access 2000 or

2002 table

Extension Host Availability

.mdb Microsoft Windows *

Microsoft Windows *

Microsoft Windows *

Microsoft Windows *

UNIX

OpenVMS Alpha, UNIX,

Microsoft Windows

UNIX, Microsoft Windows

OpenVMS Alpha, UNIX,

Microsoft Windows

Microsoft Windows *

Microsoft Windows

Microsoft Windows

Microsoft Windows *

ACCESS97 Microsoft Access 97 table .mdb

ACCESS2000 Microsoft Access 2000 table .mdb

ACCESS2002 Microsoft Access 2002 table .mdb

ACCESSCS Microsoft Access table

CSV

DBF

DLM

EXCEL

EXCEL4

EXCEL5

EXCEL97

delimited file

(comma-separated values)

.mdb

.csv

dBASE 5.0, IV, III+, and III .dbf

files

delimited file (default

delimiter is a blank)

Excel 2000 or 2002

spreadsheet

Excel 4.0 spreadsheet

Excel 5.0 or 7.0 (95)

spreadsheet

Excel 97 or 7.0 (95)

spreadsheet

.xls

.xls

.xls

.*

.xls

EXCEL2000 Excel 2000 spreadsheet

EXCELCS

JMP

PCFS

TAB

WK1

WK3

WK4

Excel spreadsheet

JMP table

Files on PC server

delimited file

(tab-delimited values)

Lotus 1-2-3 Release 2

spreadsheet

Lotus 1-2-3 Release 3

spreadsheet

.xls

.xls

.jmp

.*

.txt

.wk1

.wk3

Microsoft Windows *

UNIX

UNIX, Microsoft Windows

UNIX

OpenVMS Alpha, UNIX,

Microsoft Windows

Microsoft Windows

Microsoft Windows

Microsoft Windows Lotus 1-2-3 Release 4 or 5 .wk4

spreadsheet

2 导入一个文件夹下的所有文件的数据。

2.1下面的代码导入一个文件夹下的所有文件的数据,要使用本代码需注意几点:首先,这

个文件夹下的数据文件必须是同一类型分隔的数据,比如例子中都是tab分隔的txt文件,

当然也可以对本代码进行改进,例如中间的proc import的dbms改为excel,就可以导入

excel文件了。其次,本代码直接将文件名作为SAS数据集的名字,因此文件名必须是英文,

且满足SAS命名规则。

%macro directory(dir=);

%let rs=%sysfunc(filename(filref,&dir));

%let did=%sysfunc(dopen(&filref));

%let nobs=%sysfunc(dnum(&did));

%do i=1 %to &nobs.;

%let name=%qscan(%qsysfunc(dread(&did,&i)),1,.);

%let ext=%qscan(%qsysfunc(dread(&did,&i)),-1,.);

proc import out=&name. datafile="&dir.&name..&ext" dbms=tab replace;

getnames=no;

datarow=1;

run;

%end;

%let rc=%sysfunc(dclose(&did));

%mend;

%

directory

(dir=C:PRIVATE);

如果要将数据集进行汇总到一张表,或者则可以直接将proc import out=&name中的&name

改为a&i,然后对所有的a:数据集进行set操作。除此之外,我们还可以对&ext进行设置

来达到读取指定文件格式的数据。

2.2 这里运用pipe读取到文件名称,再读取数据。首先建立三个数据集:

data _null_;

file 'c:';

put "05JAN2001 6 W12301 1.59 9.54";

put "12JAN2001 3 P01219 2.99 8.97";

run;

data _null_;

file 'c:';

put "02FEB2001 1 P01219 2.99 2.99";

put "05FEB2001 3 A00901 1.99 5.97";

put "07FEB2001 2 C21135 3.00 6.00";

run;

data _null_;

file 'c:';

put "06MAR2001 4 A00101 3.59 14.36";

put "12MAR2001 2 P01219 2.99 5.98";

run;

filename blah pipe 'dir C:Junk /b';

data _null_;

infile blah truncover end=last;

length fname $20;

input fname;

i+1;

call symput('fname'||trim(left(put(i,8.))),scan(trim(fname),1,'.'));

call symput('pext'||trim(left(put(i,8.))),trim(fname));

if last then call symput('total',trim(left(put(i,8.))));

run;

%macro

test

;

%do i=1 %to &total;

proc import datafile="c:Junk&&pext&i"

out=work.&&fname&i

dbms=dlm replace;

delimiter=' ';

getnames=no ;

run;

proc print data=work.&&fname&i;;

title &&fname&i;

run;

%end;

%mend;

%

test

;

这里,如果要导入指定文件类型的数据,例如txt,则只需要将filename blah pipe 'dir

C:Junk /b';改为filename blah pipe 'dir C:Junk.*.txt /b';即可。

除了用filename blah pipe 'dir C:Junk.*.txt /b';得到指定类型的文件名,我们还可

以%sysexec dir *.xls /b/o:n > ;来将xls文件输出到指定的文件中,供读取操

作用。这个将在下面的内容作介绍。

3 导入excel表中的所有sheet的数据,并将其汇总到一个数据表中。

3.1 Excel表是sas导入导出最多的数据表之一,本例中,我们将导入一个excel中的不同

的数据

%let dir=C:ExcelFiles;

%macro ReadXls (inf);

libname excellib excel "&dir.&inf";

proc sql noprint;

create table sheetname as

select tranwrd(memname, "''", "'") as sheetname

from w

where libname="EXCELLIB";

select count(DISTINCT sheetname) into :cnt_sht

from sheetname;

select DISTINCT sheetname into :sheet1 - :sheet%

left

(&cnt_sht)

from sheetname;

quit;

libname excellib clear;

%do i=1 %to &cnt_sht;

proc import datafile="&dir.&inf"

out=sheet&i replace;

sheet="&&sheet&i";

getnames=yes;

mixed=yes;

run;

proc append base=master data=sheet&i force;

run;

%end;

%mend ReadXls;

%

ReadXls

();

这样,我们可以通过%

ReadXls

(); %

ReadXls

();等来得到多个excel文件

的所有数据集。

3.2 我们可以结合3.1和2.1或2.2的方法来读取多个文件中的多个表。这里再介绍一种新

的读取多个文件的方法:

options noxwait;

%macro ReadXls (dir=);

%sysexec cd &dir; %sysexec dir *.xls /b/o:n > ;

data _indexfile;

length filen $200;

infile "&dir./";

input filen $;

run;

proc sql noprint;

select count(filen) into :cntfile from _indexfile;

%if &cntfile>=1 %then %do;

select filen into :filen1-:filen%

left

(&cntfile)

from _indexfile;

%end;

quit;

%do i=1 %to &cntfile;

libname excellib excel "&dir.&&filen&i";

proc sql noprint;

create table sheetname as

select tranwrd(memname, "''", "'") as sheetname

from w

where libname="EXCELLIB";

select count(DISTINCT sheetname) into :cnt_sht

from sheetname;

select DISTINCT sheetname into :sheet1 - :sheet%

left

(&cnt_sht)

from sheetname;

quit;

%do j=1 %to &cnt_sht;

proc import datafile="&dir.&&filen&i"

out=sheet&j replace;

sheet="&&sheet&j";

getnames=yes;

mixed=yes;

run;

data sheet&j;

length _excelfilename $100 _sheetname $32;

set sheet&j;

_excelfilename="&&filen&z";

_sheetname="&&sheet&j";

run;

proc append base=master data=sheet&j force;

run;

%end;

libname excellib clear;

%end;

%mend ReadXls;

%

readxls

(dir=C:ExcelFiles);

4 从多个文件夹下读取多个数据。

直接给源代码吧。

%macro etl(ds, ds2,path);

data &ds &ds2;

LENGTH DateTime 8

UserName $ 20

Submit $ 10

SentNumber $ 11

IP $ 15

MessageID $ 15

SendingMode $ 6

Contents $ 160 ;

%let filrf=mydir;

%let rc=%sysfunc(filename(filrf,"&path"));

%let did=%sysfunc(dopen(&filrf));

%let memcount=%sysfunc(dnum(&did));

%do i=1 %to &memcount;

AccountNum+1;

%let counter = AccountNum;

%let username&i=%sysfunc(dread(&did,&i));

%let filref=mydir2;

%let file=%sysfunc(filename(filref,"&path&&username&i"));

%let op=%sysfunc(dopen(&filref));

%let flcount=%sysfunc(dnum(&op));

filename FT77F001 "D:SMSGatewayData2USERS&&username&i*.log";

%do j=1 %to &flcount;

%let trans&j=%sysfunc(dread(&op,&j));

%put '&&username&i = ' &&username&i '&&trans&j= ' &&trans&j '&flcount = ' &flcount

'&filref = ' &filref '&filrf = ' &filrf;

infile FT77F001 filename=filename eov=eov end = done length=L DSD;

INPUT DateTime : ANYDTDTM19.

UserName $

Submit $

SentNumber $

IP $

MessageID $

SendingMode $

Contents $;

output;

%end;

%end;

run;

%mend;

%

etl

(sms2, sms,D:SMSGatewayData2USERS)