1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
| %macro NB(train=,score=,nclass=,target=,inputs=);
%let error=0;
%if %length(&train) = 0 %then %do;
%put ERROR: Value for macro parameter TRAIN is missing ;
%let error=1;
%end;
%if %length(&score) = 0 %then %do;
%put ERROR: Value for macro parameter SCORE is missing ;
%let error=1;
%end;
%if %length(&nclass) = 0 %then %do;
%put ERROR: Value for macro parameter NCLASS is missing ;
%let error=1;
%end;
%if %length(&target) = 0 %then %do;
%put ERROR: Value for macro parameter TARGET is missing ;
%let error=1;
%end;
%if %length(&inputs) = 0 %then %do;
%put ERROR: Value for macro parameter INPUTS is missing ;
%let error=1;
%end;
%if &error=1 %then %goto finish;
%if %sysfunc(exist(&train)) = 0 %then %do;
%put ERROR: data set &train does not exist ;
%let error=1;
%end;
%if %sysfunc(exist(&score)) = 0 %then %do;
%put ERROR: data set &score does not exist ;
%let error=1;
%end;
%if &error=1 %then %goto finish;
%LET nvar=0;
%do %while (%length(%scan(&inputs,&nvar+1))>0);
%LET nvar=%eval(&nvar+1);
%end;
proc freq data=&train noprint;
tables &target / out=_priors_ ;
run;
%do k=1 %to &nclass;
/*NESUG 17 Posters*/
proc sql noprint;
select percent, count into :Prior&k, :Count&k
from _priors_
where &target=&k;
quit;
%end k;
%do i=1 %to &nvar;
%LET var=%scan(&inputs,&i);
%do j=1 %to &nclass;
proc freq data=&train noprint;
tables &var / out=_&var.&j (drop=count) missing;
where &target=&j;
run;
%end j;
data _&var ;
merge %do k=1 %to &nclass;
_&var.&k (rename=(percent=percent&k))
%end; ;
by &var;
%do k=1 %to &nclass; if percent&k=. then percent&k=0; %end;
run;
proc sql;
create table &score AS
select a.*
%do k=1 %to &nclass;
, b.percent&k as percent&k._&var
%end;
from &score as a left join _&var as b
on a.&var=b.&var;
quit;
%end i;
data &score (drop=L product maxprob
%do i=1 %to &nclass; percent&i._: %end;);
set &score;
maxprob=0;
%do k=1 %to &nclass;
array vars&k(&nvar) %do i=1 %to &nvar; percent&k._%scan(&inputs,&i) %end; ;
product=log(&&Prior&k);
%do L=1 %to &nvar;
%if (vars&k(&L) > 0) %then product=product+log(vars&k(&L)); %else
product=product+log(0.5)-log(&&count&k);
%end;
%if product>maxprob %then %do; maxprob=product; _class_=&k; %end;
%end k;
run;
%finish: ;
%mend NB;
libname EQPD 'C:\temp\dataset';
%NB(train=EQPD.adult,score=EQPD.adult_test,nclass=2,target=n,inputs=age sex education occupation workclass race); |
Partager