自我愚见,望有错指出改之。
/*SQL 学习*/
/*数据定义语言(DDL):create、drop、alter*/
/*数据操作语言(DML):insert、update、delete*/
/*数据查询语言(DQL):select*/
/*数据控制语言(DCL):grant、remove、commit、rollback*/
/*SELECT语句*/
proc sql;
select ...
from..
where..
group by..
having..根据group
by得到子数据集
order by..;
/*EX1*/
data a;
set sashelp.class;
where a<70;
run;
proc sql;
select sex='F'
from sashelp.classfit;
quit;
proc sql;
select sex='F'
from sashelp.classfit;
quit; /*s输出的结果是111000.... 表示如果是符合sex=‘F’的就是1*/
/*利用SQL创建新表*/
proc sql;
create table
new(表的名字) as
select * (代表选择全部)
from ..;
quit;
/*利用列创建新变量*/
proc sql;
create table
new as
select*,
weight/height as ratio /*这个蛮有意思
ratio是新的列名*/
format=5.2 label='体重:身高比'
from
sashelp.classfit;
quit;
/*利用SQL进行数据统计 类似Proc
summary */
proc sql;
create table
new1 as
select min(age) as youngest,
max(age) as oldest,
mean(height) as h format=5.1
from sashelp.classfit;
quit;
/*等同于下面的语句*/
proc summary data=sashelp.classfit;
var age height;
output out
=new2(drop=_type_ _freq_)/*删去两个列的统计量*/
min(age)=yougest
max(age)=oldgest
mean(height)=h; /*记住分号别忘了*/
run;
/*利用SQL进行分组统计 加入了group
by*/
proc sql;
create table
new3 as
select sex,
min(age) as youngest,
max(age) as oldest,
mean(height) as h format=5.1
from sashelp.classfit
group by
sex;
quit;
/*等同于下面的语句*/
proc summary data=sashelp.classfit;
class sex;
var age height;
output out
=new4(drop=_type_ _freq_)
min(age)=yougest
max(age)=oldgest
mean(height)=h;
run;
/*根据条件语句创建数据集
CASE WHEN ..THEN*/
proc sql;
create table
new5 as
select name,age,sex,
case when
age=11 then 'zoo'
when sex='F' then 'man'
else '[none]'
end
as nenene /*是对新的 zoo man 这列命名*/
from new;
quit;
/*利用where创建子数据集 简单*/
proc sql;
create table
new6 as
select *
from new
where sex='F';
quit;
/*利用having从句代替where从句
*/
proc sql;
create table
new7 as
select*
from new
group by
sex,age
having sex='F';
quit;
/**必记:having只能在group
by 之后 where必须在其之前**/
/*利用distinct剔除重复记录 **这个有陷阱见笔记*/
proc sql;
create table
new8 as
select distinct*
from new;
quit; /*这个例子看不出效果 是以行为标准*/
/*****************************************************/
/*SQL提升*/
/*一.将原始变量和二次分析生成的变量进行合并*/
proc sql;
create table
new9 as
select name,sex,
count(*) as
many label='统计'
from
new
group by age
order by name ;
quit;
proc sql;
create table
new10 as
select name,sex,
count(distinct
sex) as many label='统计'
from
new
group by age
order by name ;
quit; /** Really 看不懂这的结果
为什么输出来那样???怎么做到的统计
哦哦 看明白了 是利用年龄的统计 后面distinct age 剔除重复的年龄再统计*/
/*二.SQL语言的嵌套*/
proc sql;
create table
new11 as
select *
from(select
age, /*标点*/
count(*)as
many
from new
group by age
)
having many=max(many);/*重点*/
quit;
/*下面是不嵌套的*/
proc sql;
create table
new12 as
select age,
count(*)as
many
from new
group by age;
proc sql;
create table
new13 as
select *
from new12
having many=max(many);
quit;
/*三、连接 难难难~~~~*/
/*简单合并用merge在data步*/
/*交叉合并
cross join 、 full join(key)、nature
full join(key)
inner join、left
join、right join笔记本上*/
data one;
do a=1,2;
output;
end;
run;
data two;
do b=10,9,8;
output;
end;
run;
data combined;
merge one two;
run;
proc sql;
create table
combined as
select *
from one cross join two;
quit; /*两种合并处理效果不一样!!!*/
/*四、where exists、where in(where any)查询数据库中已有的观测*/
data H;
input name $age;
cards;
John 12
;
proc sql;
create table
HH as
select *
from H
where exists
(select*
from new
where h.name=new.name
);
quit; /*有趣哈哈哈哈*/
proc sql;
create table
HHH as
select *
from h
where name in
(select name
from new
);
quit;
proc sql;
create table
HHHH as
select *
from h
where name=any /*补充ANY用法 >any(20,30)相当于>20
(select name
from new
);
quit;
/*五、intersect(交集),union(并集),expect(补集),
余集(得另外编程没有特定的代码)、union all(全集)、的区别*/
/*补充ALL用法 >all(20,30)相当于大于30,
/*any就是大于小的小于大的,all就是小于小的大于大的*/
/*小结:对小型表连接 数据步更有效,其余肯定SQL更有效啦*/
/*六、去掉标题(title;),显示程序效率 (stimer)*/
TITLE;
proc sql feedback stimer;
select *
from new
order by
age;
reset nostimer outobs=3;
select *
from new
order by
age;
quit; /*厉害这样显示*/
/*SQL在几个主要程序测试与效率
exec/noexec:控制SQL语句是否在执行
nostimer/stimer:在sas日志中报告每个SQL语句的效率统计数据
noerrorstop/errorstop:批处理时控制当有错误发生时,是否要检查语法
*/
/*七、利用宏变量*/
data w;
input ID $ a1-a4;
cards;
A 11 12 13 14
B 21 22 23 24
;
/*计算各个变量之和*/
proc sql;
create table
sums as
select sum(a1) as sum1,
sum(a2) as sum2,
sum(a3) as sum3,
sum(a4) as sum4 /*记住这没有点号!!!*/
from W
;
quit;
/*等价*/
proc summary data=w;
var a1-a4;
output out=sumss(drop=_type_
_freq_)
sum=sum1-sum4;
run;
data sumsss;
set w end=last;
ARRAY _a{*}a1-a4; /*定义数组*/
ARRAY _sum{*}sum1-sum4;
keep sum1-sum4;
do i=1 to 4;
_sum{i]+_a{i};
end;
if last then output;
run;
/*变量更多时 就用宏*/
%macro selectsums(maxindex=);
%do n=1 %to &maxindex;
sum(a&n)
as sum&n
%if &n NE &maxindex %then ,
;
%END;
%mend selectsums;
PROC SQL;
create table
sumssss as
select %selectsums(maxindex=4)
from w
;
quit; /*完美!*/
/*八、利用SQL生成报表*/
/*九、混合复杂操作*/
/*行列转置 常规的转置有proc transpose ..*/
proc sql;
create table
ww as
select id,'a1' as item,
a1 as vlaue
from w
union all
select id,'a2' as item,
a2
from w
union all
select id,'a3' as item,
a3
from w
union all
select id,'a4' as item,
a4
from w;
quit;
proc sql;
create table
verticalsums as
select *
from ww;
group by
item;
quit;
by tt.