1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
|
public ArrayList recuperer_site(String url) throws IOException, ParserException{
String s="";
ArrayList liste= new ArrayList();
String images = null;
HttpClient client = new HttpClient();
//System.out.println(url);
// Create a method instance.
GetMethod method = new GetMethod(url);
// Provide custom retry handler is necessary
method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(3, false));
SAXBuilder sxb = new SAXBuilder();
try {
// Execute the method.
int statusCode = client.executeMethod(method);
// System.out.println(statusCode);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: " + method.getStatusLine());
}
// Read the response body.
byte[] responseBody = method.getResponseBody();
s= new String(responseBody);
Parser parse= new Parser(s);
NodeList liste_de_noeuds = parse.parse(new TagNameFilter("img"));
NodeList mes_noeuds_script = liste_de_noeuds.extractAllNodesThatMatch (new TagNameFilter("img"));
int i=0;
//System.out.println(mes_noeuds_script.size());
try{
while(i<mes_noeuds_script.size()){
/*.out.println(mes_noeuds_script.toHtml()+'\n');*/
String k = mes_noeuds_script.elementAt(i).toHtml();
// Pour extraire l'attribut src de la balise image
String k1 = nom.getAttribute("src").toString();
// cette if est pour verifier si le chemin de src est relatif ou absolu
if(!k1.contains("http://")){
}else {
images += '\n';
liste.add(mes_noeuds_script.elementAt(i).toHtml());
}
images= mes_noeuds_script.elementAt(i).toHtml();
i++;
}
}catch(Exception e1){
e1.printStackTrace();
}
} catch (HttpException e) {
System.err.println("Fatal protocol violation: " + e.getMessage());
e.printStackTrace();
} catch (IOException e) {
System.err.println("Fatal transport error: " + e.getMessage());
e.printStackTrace();
}
return liste;
} |
Partager