2024年4月26日发(作者:)
运用import过程进行SAS数据导入完全实用教程
1 单个规范格式文件导入。
1.1 对指定分隔符(’|’,’ ’,’!’,’ab’等)数据的导入,这里以’!’为例delimiter='!'
进行说明:
data _null_;
file 'c:';
put"X1!X2!X3!X4";
put "11!22!.! ";
put "111!.!333!apple";
run;
proc import
datafile='c:'
out=
dbms=dlm
replace;
delimiter='!';
GUESSINGROWS=2000;
DATAROW=2;
getnames=yes;
run;
注意GUESSINGROWS的值阈为1 到 3276
1.2 对CSV格式的数据进行导入:
data _null_;
file 'c:';
put "Fruit1,Fruit2,Fruit3,Fruit4";
put "apple,banana,coconut,date";
put "apricot,berry,crabapple,dewberry";
run;
proc import
datafile='c:'
out=
dbms=csv
replace;
run;
1.3 对tab分隔数据的导入:
data _null_;
file 'c:';
put "cereal" "09"x "eggs" "09"x "bacon";
put "muffin" "09"x "berries" "09"x "toast";
run;
proc import
datafile='c:'
out=ast
dbms=tab
replace;
getnames=no;
run;
1.4 对dbf数据库数据进行导入:
proc import datafile="/myfiles/"
out=
dbms=dbf
replace;
run;
1.5对excel数据进行导入:
PROC IMPORT OUT= hospital1
DATAFILE= " C:My DocumentsExcel "
DBMS=EXCEL REPLACE;
SHEET="Sheet1$";
GETNAMES=YES;
MIXED=NO;
SCANTEXT=YES;
USEDATE=YES;
SCANTIME=YES;
RUN;
1.6对access数据进行导入:
PROC IMPORT DBMS=ACCESS TABLE="customers" OUT=;
DATABASE="c:";
UID="bob";
PWD="cat";
WGDB="c:";
RUN;
proc print data=;
run;
1.7 import过程步中,dbms选项汇总:
Identifier Input Data Source
ACCESS Microsoft Access 2000 or
2002 table
Extension Host Availability
.mdb Microsoft Windows *
Microsoft Windows *
Microsoft Windows *
Microsoft Windows *
UNIX
OpenVMS Alpha, UNIX,
Microsoft Windows
UNIX, Microsoft Windows
OpenVMS Alpha, UNIX,
Microsoft Windows
Microsoft Windows *
Microsoft Windows
Microsoft Windows
Microsoft Windows *
ACCESS97 Microsoft Access 97 table .mdb
ACCESS2000 Microsoft Access 2000 table .mdb
ACCESS2002 Microsoft Access 2002 table .mdb
ACCESSCS Microsoft Access table
CSV
DBF
DLM
EXCEL
EXCEL4
EXCEL5
EXCEL97
delimited file
(comma-separated values)
.mdb
.csv
dBASE 5.0, IV, III+, and III .dbf
files
delimited file (default
delimiter is a blank)
Excel 2000 or 2002
spreadsheet
Excel 4.0 spreadsheet
Excel 5.0 or 7.0 (95)
spreadsheet
Excel 97 or 7.0 (95)
spreadsheet
.xls
.xls
.xls
.*
.xls
EXCEL2000 Excel 2000 spreadsheet
EXCELCS
JMP
PCFS
TAB
WK1
WK3
WK4
Excel spreadsheet
JMP table
Files on PC server
delimited file
(tab-delimited values)
Lotus 1-2-3 Release 2
spreadsheet
Lotus 1-2-3 Release 3
spreadsheet
.xls
.xls
.jmp
.*
.txt
.wk1
.wk3
Microsoft Windows *
UNIX
UNIX, Microsoft Windows
UNIX
OpenVMS Alpha, UNIX,
Microsoft Windows
Microsoft Windows
Microsoft Windows
Microsoft Windows Lotus 1-2-3 Release 4 or 5 .wk4
spreadsheet
2 导入一个文件夹下的所有文件的数据。
2.1下面的代码导入一个文件夹下的所有文件的数据,要使用本代码需注意几点:首先,这
个文件夹下的数据文件必须是同一类型分隔的数据,比如例子中都是tab分隔的txt文件,
当然也可以对本代码进行改进,例如中间的proc import的dbms改为excel,就可以导入
excel文件了。其次,本代码直接将文件名作为SAS数据集的名字,因此文件名必须是英文,
且满足SAS命名规则。
%macro directory(dir=);
%let rs=%sysfunc(filename(filref,&dir));
%let did=%sysfunc(dopen(&filref));
%let nobs=%sysfunc(dnum(&did));
%do i=1 %to &nobs.;
%let name=%qscan(%qsysfunc(dread(&did,&i)),1,.);
%let ext=%qscan(%qsysfunc(dread(&did,&i)),-1,.);
proc import out=&name. datafile="&dir.&name..&ext" dbms=tab replace;
getnames=no;
datarow=1;
run;
%end;
%let rc=%sysfunc(dclose(&did));
%mend;
%
directory
(dir=C:PRIVATE);
如果要将数据集进行汇总到一张表,或者则可以直接将proc import out=&name中的&name
改为a&i,然后对所有的a:数据集进行set操作。除此之外,我们还可以对&ext进行设置
来达到读取指定文件格式的数据。
2.2 这里运用pipe读取到文件名称,再读取数据。首先建立三个数据集:
data _null_;
file 'c:';
put "05JAN2001 6 W12301 1.59 9.54";
put "12JAN2001 3 P01219 2.99 8.97";
run;
data _null_;
file 'c:';
put "02FEB2001 1 P01219 2.99 2.99";
put "05FEB2001 3 A00901 1.99 5.97";
put "07FEB2001 2 C21135 3.00 6.00";
run;
data _null_;
file 'c:';
put "06MAR2001 4 A00101 3.59 14.36";
put "12MAR2001 2 P01219 2.99 5.98";
run;
filename blah pipe 'dir C:Junk /b';
data _null_;
infile blah truncover end=last;
length fname $20;
input fname;
i+1;
call symput('fname'||trim(left(put(i,8.))),scan(trim(fname),1,'.'));
call symput('pext'||trim(left(put(i,8.))),trim(fname));
if last then call symput('total',trim(left(put(i,8.))));
run;
%macro
test
;
%do i=1 %to &total;
proc import datafile="c:Junk&&pext&i"
out=work.&&fname&i
dbms=dlm replace;
delimiter=' ';
getnames=no ;
run;
proc print data=work.&&fname&i;;
title &&fname&i;
run;
%end;
%mend;
%
test
;
这里,如果要导入指定文件类型的数据,例如txt,则只需要将filename blah pipe 'dir
C:Junk /b';改为filename blah pipe 'dir C:Junk.*.txt /b';即可。
除了用filename blah pipe 'dir C:Junk.*.txt /b';得到指定类型的文件名,我们还可
以%sysexec dir *.xls /b/o:n > ;来将xls文件输出到指定的文件中,供读取操
作用。这个将在下面的内容作介绍。
3 导入excel表中的所有sheet的数据,并将其汇总到一个数据表中。
3.1 Excel表是sas导入导出最多的数据表之一,本例中,我们将导入一个excel中的不同
的数据
%let dir=C:ExcelFiles;
%macro ReadXls (inf);
libname excellib excel "&dir.&inf";
proc sql noprint;
create table sheetname as
select tranwrd(memname, "''", "'") as sheetname
from w
where libname="EXCELLIB";
select count(DISTINCT sheetname) into :cnt_sht
from sheetname;
select DISTINCT sheetname into :sheet1 - :sheet%
left
(&cnt_sht)
from sheetname;
quit;
libname excellib clear;
%do i=1 %to &cnt_sht;
proc import datafile="&dir.&inf"
out=sheet&i replace;
sheet="&&sheet&i";
getnames=yes;
mixed=yes;
run;
proc append base=master data=sheet&i force;
run;
%end;
%mend ReadXls;
%
ReadXls
();
这样,我们可以通过%
ReadXls
(); %
ReadXls
();等来得到多个excel文件
的所有数据集。
3.2 我们可以结合3.1和2.1或2.2的方法来读取多个文件中的多个表。这里再介绍一种新
的读取多个文件的方法:
options noxwait;
%macro ReadXls (dir=);
%sysexec cd &dir; %sysexec dir *.xls /b/o:n > ;
data _indexfile;
length filen $200;
infile "&dir./";
input filen $;
run;
proc sql noprint;
select count(filen) into :cntfile from _indexfile;
%if &cntfile>=1 %then %do;
select filen into :filen1-:filen%
left
(&cntfile)
from _indexfile;
%end;
quit;
%do i=1 %to &cntfile;
libname excellib excel "&dir.&&filen&i";
proc sql noprint;
create table sheetname as
select tranwrd(memname, "''", "'") as sheetname
from w
where libname="EXCELLIB";
select count(DISTINCT sheetname) into :cnt_sht
from sheetname;
select DISTINCT sheetname into :sheet1 - :sheet%
left
(&cnt_sht)
from sheetname;
quit;
%do j=1 %to &cnt_sht;
proc import datafile="&dir.&&filen&i"
out=sheet&j replace;
sheet="&&sheet&j";
getnames=yes;
mixed=yes;
run;
data sheet&j;
length _excelfilename $100 _sheetname $32;
set sheet&j;
_excelfilename="&&filen&z";
_sheetname="&&sheet&j";
run;
proc append base=master data=sheet&j force;
run;
%end;
libname excellib clear;
%end;
%mend ReadXls;
%
readxls
(dir=C:ExcelFiles);
4 从多个文件夹下读取多个数据。
直接给源代码吧。
%macro etl(ds, ds2,path);
data &ds &ds2;
LENGTH DateTime 8
UserName $ 20
Submit $ 10
SentNumber $ 11
IP $ 15
MessageID $ 15
SendingMode $ 6
Contents $ 160 ;
%let filrf=mydir;
%let rc=%sysfunc(filename(filrf,"&path"));
%let did=%sysfunc(dopen(&filrf));
%let memcount=%sysfunc(dnum(&did));
%do i=1 %to &memcount;
AccountNum+1;
%let counter = AccountNum;
%let username&i=%sysfunc(dread(&did,&i));
%let filref=mydir2;
%let file=%sysfunc(filename(filref,"&path&&username&i"));
%let op=%sysfunc(dopen(&filref));
%let flcount=%sysfunc(dnum(&op));
filename FT77F001 "D:SMSGatewayData2USERS&&username&i*.log";
%do j=1 %to &flcount;
%let trans&j=%sysfunc(dread(&op,&j));
%put '&&username&i = ' &&username&i '&&trans&j= ' &&trans&j '&flcount = ' &flcount
'&filref = ' &filref '&filrf = ' &filrf;
infile FT77F001 filename=filename eov=eov end = done length=L DSD;
INPUT DateTime : ANYDTDTM19.
UserName $
Submit $
SentNumber $
IP $
MessageID $
SendingMode $
Contents $;
output;
%end;
%end;
run;
%mend;
%
etl
(sms2, sms,D:SMSGatewayData2USERS)
发布评论