1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
| function CleanWord( html )
{
var bIgnoreFont = document.getElementById('chkRemoveFont').checked ;
var bRemoveStyles = document.getElementById('chkRemoveStyles').checked ;
html = html.replace(/<o:p>\s*<\/o:p>/g, "") ;
html = html.replace(/<o:p>.*?<\/o:p>/g, " ") ;
// Remove mso-xxx styles.
html = html.replace( /\s*mso-[^:]+:[^;"]+;?/gi, "" ) ;
// Remove margin styles.
html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, "" ) ;
html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ;
html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, "" ) ;
html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ;
html = html.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ;
html = html.replace( /\s*tab-stops:[^;"]*;?/gi, "" ) ;
html = html.replace( /\s*tab-stops:[^"]*/gi, "" ) ;
// Remove FONT face attributes.
if ( bIgnoreFont )
{
html = html.replace( /\s*face="[^"]*"/gi, "" ) ;
html = html.replace( /\s*face=[^ >]*/gi, "" ) ;
html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, "" ) ;
}
// Remove Class attributes
html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ;
// Remove styles.
if ( bRemoveStyles )
html = html.replace( /<(\w[^>]*) style="([^\"]*)"([^>]*)/gi, "<$1$3" ) ;
// Remove empty styles.
html = html.replace( /\s*style="\s*"/gi, '' ) ;
html = html.replace( /<SPAN\s*[^>]*>\s* \s*<\/SPAN>/gi, ' ' ) ;
html = html.replace( /<SPAN\s*[^>]*><\/SPAN>/gi, '' ) ;
// Remove Lang attributes
html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;
html = html.replace( /<SPAN\s*>(.*?)<\/SPAN>/gi, '$1' ) ;
html = html.replace( /<FONT\s*>(.*?)<\/FONT>/gi, '$1' ) ;
// Remove XML elements and declarations
html = html.replace(/<\\?\?xml[^>]*>/gi, "") ;
// Remove Tags with XML namespace declarations: <o:p></o:p>
html = html.replace(/<\/?\w+:[^>]*>/gi, "") ;
html = html.replace( /<H\d>\s*<\/H\d>/gi, '' ) ;
html = html.replace( /<H1([^>]*)>/gi, '<div$1><b><font size="6">' ) ;
html = html.replace( /<H2([^>]*)>/gi, '<div$1><b><font size="5">' ) ;
html = html.replace( /<H3([^>]*)>/gi, '<div$1><b><font size="4">' ) ;
html = html.replace( /<H4([^>]*)>/gi, '<div$1><b><font size="3">' ) ;
html = html.replace( /<H5([^>]*)>/gi, '<div$1><b><font size="2">' ) ;
html = html.replace( /<H6([^>]*)>/gi, '<div$1><b><font size="1">' ) ;
html = html.replace( /<\/H\d>/gi, '</font></b></div>' ) ;
html = html.replace( /<(U|I|STRIKE)> <\/\1>/g, ' ' ) ;
// Remove empty tags (three times, just to be sure).
html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
html = html.replace( /<([^\s>]+)[^>]*>\s*<\/\1>/g, '' ) ;
// Transform <P> to <DIV>
var re = new RegExp("(<P)([^>]*>.*?)(<\/P>)","gi") ; // Different because of a IE 5.0 error
html = html.replace( re, "<div$2</div>" ) ;
return html ;
} |
Partager