#include #include #include #include #include #define OUT 0 #define ENTER 1 #define END_TAG 2 #define IMG_TAG 3 #define A_TAG 4 #define HREF_SRC_CONTENT 5 #define BUFFER_LENGTH 256 #define URL_LENGTH 1024 char my_get_char(){ static char buff[BUFFER_LENGTH]; static int i = 0; static int end = 0; if( i < end ){ return buff[i++]; } else { end = read( 0, buff, BUFFER_LENGTH ); if( !end ) exit(EXIT_SUCCESS); if( end == -1 ){ printf("Echec de la lecture sur l'entrée standard %d !\n", errno); perror(""); exit(EXIT_FAILURE); } i = 1; return buff[0]; } } void parse() { register int state = OUT; register int cur; char url[URL_LENGTH + 1]; int i; while(1) { switch( state ){ case OUT : while( my_get_char() != '<' ); state = ENTER; break; case ENTER : cur = my_get_char(); if( cur == '/' ){ state = END_TAG; break; } if( cur == 'a' || cur == 'A' ){ if( isspace(cur = my_get_char()) ){ state = A_TAG; break; } } else if( cur == 'i' || cur == 'I' ){ cur = my_get_char(); if( cur == 'm' || cur == 'M' ){ cur = my_get_char(); if( cur == 'g' || cur == 'G' ){ if( isspace(cur = my_get_char()) ){ state = IMG_TAG; break; } } } } // ici je croise les doigts pour que le document // respecte la norme et ne mette pas de > // dans les attributs... while( cur != '>' ) cur = my_get_char(); state = OUT; break; case END_TAG : while( my_get_char() != '>' ); state = OUT; break; case A_TAG : while( 1 ){ if( isspace(cur) ){ cur = my_get_char(); if( cur == 'h' || cur == 'H' ){ cur = my_get_char(); if( cur == 'r' || cur == 'R' ){ cur = my_get_char(); if( cur == 'e' || cur == 'E' ){ cur = my_get_char(); if( cur == 'f' || cur == 'F' ){ if( (cur = my_get_char()) == '=' ){ state = HREF_SRC_CONTENT; break; } } } } } } if( cur == '>' ){ state = OUT; break; } cur = my_get_char(); } break; case IMG_TAG : while( 1 ){ if( isspace(cur) ){ cur = my_get_char(); if( cur == 's' || cur == 'S' ){ cur = my_get_char(); if( cur == 'r' || cur == 'R' ){ cur = my_get_char(); if( cur == 'c' || cur == 'C' ){ if( (cur = my_get_char()) == '=' ){ state = HREF_SRC_CONTENT; break; } } } } } if( cur == '>' ){ state = OUT; break; } cur = my_get_char(); } break; case HREF_SRC_CONTENT : i = 0; cur = my_get_char(); while( isspace(cur) ) cur = my_get_char(); if( cur == '"' ){ while( 1 ){ cur = my_get_char(); if( cur == '"' ){ url[i] = 0; printf("%s\n", url); state = OUT; break; } if( i < URL_LENGTH ) url[i++] = cur; else { url[i++] = 0; printf("URL trop longue : %s\n", url); state = OUT; break; } } } else { while( 1 ){ if( isspace(cur) || cur == '>' ){ url[i] = 0; printf("%s\n", url); state = OUT; break; } if( i < URL_LENGTH ) url[i++] = cur; else { url[i++] = 0; printf("URL trop longue : %s\n", url); state = OUT; break; } cur = my_get_char(); } } } } return; } int main() { parse(); return EXIT_SUCCESS; }