1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
|
import net.htmlparser.jericho.*;
import java.util.*;
import java.io.*;
import java.net.*;
public class test2 {
public static void main(String[] args) throws Exception {
String sourceUrlString="test.htm";
if (args.length==0)
System.err.println("Using default argument of \""+sourceUrlString+'"');
else
sourceUrlString=args[0];
if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString;
MicrosoftTagTypes.register();
MasonTagTypes.register();
Source source=new Source(new URL(sourceUrlString));
System.out.println("\n*******************************************************************************\n");
System.out.println("Tags starting with <td class="match"");
displaySegments(source.getAllStartTags("td class="match""));
//System.out.println("CDATA sections:");
//displaySegments(source.getAllTags(StartTagType.CDATA_SECTION));
//System.out.println("Common server tags: (eg ASP, JSP, PSP, ASP-style PHP or Mason substitution tag)");
//displaySegments(source.getAllTags(StartTagType.SERVER_COMMON));
//System.out.println("Tags starting with <%=var");
//displaySegments(source.getAllStartTags("%=var"));
//System.out.println("HTML Comments:");
//displaySegments(source.getAllTags(StartTagType.COMMENT));
//System.out.println("Elements in namespace \"o\" (generated by MS-Word):");
//displaySegments(source.getAllElements("o:"));
//System.out.println("Tags starting with <![ (commonly generated by MS-Word):");
//displaySegments(source.getAllStartTags("!["));
// Note: The end of a PHP tag can not be reliably found without the use of a PHP parser,
// meaning any PHP tag found by this library is not guaranteed to have the correct end position.
// System.out.println("Standard PHP tags:");
// displaySegments(source.getAllTags(PHPTagTypes.PHP_STANDARD));
// System.out.println("Short PHP tags:");
// displaySegments(source.getAllTags(PHPTagTypes.PHP_SHORT));
// System.out.println("Mason Component Calls:");
// displaySegments(source.getAllTags(MasonTagTypes.MASON_COMPONENT_CALL));
//System.out.println("Mason Components Called With Content:");
//displaySegments(source.getAllElements(MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT));
//System.out.println("Mason Named Blocks:");
//displaySegments(source.getAllElements(MasonTagTypes.MASON_NAMED_BLOCK));
// System.out.println("Unregistered start tags:");
// displaySegments(source.getAllTags(StartTagType.UNREGISTERED));
//System.out.println("Unregistered end tags:");
// displaySegments(source.getAllTags(EndTagType.UNREGISTERED));
System.out.println(source.getCacheDebugInfo());
}
private static void displaySegments(List<? extends Segment> segments) {
for (Segment segment : segments) {
System.out.println("-------------------------------------------------------------------------------");
System.out.println(segment.getDebugInfo());
System.out.println(segment);
}
System.out.println("\n*******************************************************************************\n");
}
} |
Partager