1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
| int parse_document(char *line, WORD *words, double *label,
long *queryid, long *slackid, double *costfactor,
long int *numwords, long int max_words_doc,
char **comment)
{
register long wpos,pos;
long wnum;
double weight;
int numread;
char featurepair[1000],junk[1000];
(*queryid)=0;
(*slackid)=0;
(*costfactor)=1;
pos=0;
(*comment)=NULL;
while(line[pos] ) { /* cut off comments */
if((line[pos] == '#') && (!(*comment))) {
line[pos]=0;
(*comment)=&(line[pos+1]);
}
if(line[pos] == '\n') { /* strip the CR */
line[pos]=0;
}
pos++;
}
if(!(*comment)) (*comment)=&(line[pos]);
/* printf("Comment: '%s'\n",(*comment)); */
wpos=0;
/* check, that line starts with target value or zero, but not with
feature pair */
if(sscanf(line,"%s",featurepair) == EOF) return(0);
pos=0;
while((featurepair[pos] != ':') && featurepair[pos]) pos++;
if(featurepair[pos] == ':') {
perror ("Line must start with label or 0!!!\n");
printf("LINE: %s\n",line);
exit (1);
}
/* read the target value */
if(sscanf(line,"%lf",label) == EOF) return(0);
pos=0;
while(space_or_null((int)line[pos])) pos++;
while((!space_or_null((int)line[pos])) && line[pos]) pos++;
while(((numread=sscanf(line+pos,"%s",featurepair)) != EOF) &&
(numread > 0) &&
(wpos<max_words_doc)) {
/* printf("%s\n",featurepair); */
while(space_or_null((int)line[pos])) pos++;
while((!space_or_null((int)line[pos])) && line[pos]) pos++;
if(sscanf(featurepair,"qid:%ld%s",&wnum,junk)==1) {
/* it is the query id */
(*queryid)=(long)wnum;
}
else if(sscanf(featurepair,"sid:%ld%s",&wnum,junk)==1) {
/* it is the slack id */
if(wnum > 0)
(*slackid)=(long)wnum;
else {
perror ("Slack-id must be greater or equal to 1!!!\n");
printf("LINE: %s\n",line);
exit (1);
}
}
else if(sscanf(featurepair,"cost:%lf%s",&weight,junk)==1) {
/* it is the example-dependent cost factor */
(*costfactor)=(double)weight;
}
else if(sscanf(featurepair,"%ld:%lf%s",&wnum,&weight,junk)==2) {
/* it is a regular feature */
if(wnum<=0) {
perror ("Feature numbers must be larger or equal to 1!!!\n");
printf("LINE: %s\n",line);
exit (1);
}
if((wpos>0) && ((words[wpos-1]).wnum >= wnum)) {
perror ("Features must be in increasing order!!!\n");
printf("LINE: %s\n",line);
exit (1);
}
(words[wpos]).wnum=wnum;
(words[wpos]).weight=(FVAL)weight;
wpos++;
}
else {
perror ("Cannot parse feature/value pair!!!\n");
printf("'%s' in LINE: %s\n",featurepair,line);
exit (1);
}
}
(words[wpos]).wnum=0;
(*numwords)=wpos+1;
return(1); |
Partager