import java.io.File;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hssf.converter.AbstractExcelConverter;
import org.apache.poi.hssf.converter.ExcelToHtmlUtils;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFFont;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.HSSFColor;
import org.apache.poi.hwpf.converter.HtmlDocumentFacade;
import org.apache.poi.ss.formula.eval.ErrorEval;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.util.Beta;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Text;
@Beta
public class ExcelToHtmlConverter extends AbstractExcelConverter
{
private static final POILogger logger = POILogFactory
.getLogger( ExcelToHtmlConverter.class );
public static final String EMPTY = "";
public static boolean isEmpty( String str )
{
return str == null || str.length() == 0;
}
public static boolean isNotEmpty( String str )
{
return !isEmpty( str );
}
/**
* Java main() interface to interact with {@link ExcelToHtmlConverter}
*
*
* Usage: ExcelToHtmlConverter infile outfile
*
* Where infile is an input .xls file ( Word 97-2007) which will be rendered
* as HTML into outfile
*/
public static void main( String[] args )
{
if ( args.length < 2 )
{
System.err
.println( "Usage: ExcelToHtmlConverter " );
return;
}
System.out.println( "Converting " + args[0] );
System.out.println( "Saving output to " + args[1] );
try
{
Document doc = ExcelToHtmlConverter.process( new File( args[0] ) );
FileWriter out = new FileWriter( args[1] );
DOMSource domSource = new DOMSource( doc );
StreamResult streamResult = new StreamResult( out );
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
// TODO set encoding from a command argument
serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
serializer.setOutputProperty( OutputKeys.INDENT, "no" );
serializer.setOutputProperty( OutputKeys.METHOD, "html" );
serializer.transform( domSource, streamResult );
out.close();
}
catch ( Exception e )
{
e.printStackTrace();
}
}
/**
* Converts Excel file (97-2007) into HTML file.
*
* @param xlsFile
* file to process
* @return DOM representation of result HTML
*/
public static Document process( File xlsFile ) throws Exception
{
final HSSFWorkbook workbook = ExcelToHtmlUtils.loadXls( xlsFile );
ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument() );
excelToHtmlConverter.processWorkbook( workbook, new Configuration() );
return excelToHtmlConverter.getDocument();
}
private String cssClassContainerCell = null;
private String cssClassContainerDiv = null;
private String cssClassPrefixCell = "c";
private String cssClassPrefixDiv = "d";
private String cssClassPrefixRow = "r";
private String cssClassPrefixTable = "t";
private Map excelStyleToClass = new LinkedHashMap();
private final HtmlDocumentFacade htmlDocumentFacade;
private boolean useDivsToSpan = false;
public ExcelToHtmlConverter( Document doc )
{
htmlDocumentFacade = new HtmlDocumentFacade( doc );
}
public ExcelToHtmlConverter( HtmlDocumentFacade htmlDocumentFacade )
{
this.htmlDocumentFacade = htmlDocumentFacade;
}
protected String buildStyle( HSSFWorkbook workbook, HSSFCellStyle cellStyle )
{
StringBuilder style = new StringBuilder();
style.append( "white-space:pre-wrap;" );
ExcelToHtmlUtils.appendAlign( style, cellStyle.getAlignment() );
if ( cellStyle.getFillPattern() == 0 )
{
// no fill
}
else if ( cellStyle.getFillPattern() == 1 )
{
final HSSFColor foregroundColor = cellStyle
.getFillForegroundColorColor();
if ( foregroundColor != null )
style.append( "background-color:"
+ ExcelToHtmlUtils.getColor( foregroundColor ) + ";" );
}
else
{
final HSSFColor backgroundColor = cellStyle
.getFillBackgroundColorColor();
if ( backgroundColor != null )
style.append( "background-color:"
+ ExcelToHtmlUtils.getColor( backgroundColor ) + ";" );
}
buildStyle_border( workbook, style, "top", cellStyle.getBorderTop(),
cellStyle.getTopBorderColor() );
buildStyle_border( workbook, style, "right",
cellStyle.getBorderRight(), cellStyle.getRightBorderColor() );
buildStyle_border( workbook, style, "bottom",
cellStyle.getBorderBottom(), cellStyle.getBottomBorderColor() );
buildStyle_border( workbook, style, "left", cellStyle.getBorderLeft(),
cellStyle.getLeftBorderColor() );
HSSFFont font = cellStyle.getFont( workbook );
buildStyle_font( workbook, style, font );
return style.toString();
}
private void buildStyle_border( HSSFWorkbook workbook, StringBuilder style,
String type, short xlsBorder, short borderColor )
{
if ( xlsBorder == HSSFCellStyle.BORDER_NONE )
return;
StringBuilder borderStyle = new StringBuilder();
borderStyle.append( getBorderWidth( xlsBorder ) );
borderStyle.append( ' ' );
borderStyle.append( ExcelToHtmlUtils.getBorderStyle( xlsBorder ) );
final HSSFColor color = workbook.getCustomPalette().getColor(
borderColor );
if ( color != null )
{
borderStyle.append( ' ' );
borderStyle.append( ExcelToHtmlUtils.getColor( color ) );
}
style.append( "border-" + type + ":" + borderStyle + ";" );
}
public static String getBorderWidth( short xlsBorder )
{
final String borderWidth;
switch ( xlsBorder )
{
case HSSFCellStyle.BORDER_THICK:
case HSSFCellStyle.BORDER_DOUBLE:
borderWidth="thick";
break;
case HSSFCellStyle.BORDER_MEDIUM_DASH_DOT:
case HSSFCellStyle.BORDER_MEDIUM_DASH_DOT_DOT:
case HSSFCellStyle.BORDER_MEDIUM_DASHED:
case HSSFCellStyle.BORDER_MEDIUM:
borderWidth = "medium";
break;
default:
borderWidth = "thin";
break;
}
return borderWidth;
}
void buildStyle_font( HSSFWorkbook workbook, StringBuilder style,
HSSFFont font )
{
switch ( font.getBoldweight() )
{
case HSSFFont.BOLDWEIGHT_BOLD:
style.append( "font-weight:bold;" );
break;
case HSSFFont.BOLDWEIGHT_NORMAL:
// by default, not not increase HTML size
// style.append( "font-weight: normal; " );
break;
}
final HSSFColor fontColor = workbook.getCustomPalette().getColor(
font.getColor() );
if ( fontColor != null )
style.append( "color: " + ExcelToHtmlUtils.getColor( fontColor )
+ "; " );
if ( font.getFontHeightInPoints() != 0 )
style.append( "font-size:" + font.getFontHeightInPoints() + "pt;" );
if ( font.getItalic() )
{
style.append( "font-style:italic;" );
}
}
public String getCssClassPrefixCell()
{
return cssClassPrefixCell;
}
public String getCssClassPrefixDiv()
{
return cssClassPrefixDiv;
}
public String getCssClassPrefixRow()
{
return cssClassPrefixRow;
}
public String getCssClassPrefixTable()
{
return cssClassPrefixTable;
}
public Document getDocument()
{
return htmlDocumentFacade.getDocument();
}
protected String getStyleClassName( HSSFWorkbook workbook,
HSSFCellStyle cellStyle )
{
final Short cellStyleKey = Short.valueOf( cellStyle.getIndex() );
String knownClass = excelStyleToClass.get( cellStyleKey );
if ( knownClass != null )
return knownClass;
String cssStyle = buildStyle( workbook, cellStyle );
String cssClass = htmlDocumentFacade.getOrCreateCssClass(
cssClassPrefixCell, cssStyle );
excelStyleToClass.put( cellStyleKey, cssClass );
return cssClass;
}
public boolean isUseDivsToSpan()
{
return useDivsToSpan;
}
protected boolean processCell( HSSFCell cell, Element tableCellElement,
int normalWidthPx, int maxSpannedWidthPx, float normalHeightPt )
{
final HSSFCellStyle cellStyle = cell.getCellStyle();
String value;
switch ( cell.getCellType() )
{
case HSSFCell.CELL_TYPE_STRING:
// XXX: enrich
value = cell.getRichStringCellValue().getString();
break;
case HSSFCell.CELL_TYPE_FORMULA:
switch ( cell.getCachedFormulaResultType() )
{
case HSSFCell.CELL_TYPE_STRING:
HSSFRichTextString str = cell.getRichStringCellValue();
if ( str != null && str.length() > 0 )
{
value = ( str.toString() );
}
else
{
value = EMPTY;
}
break;
case HSSFCell.CELL_TYPE_NUMERIC:
HSSFCellStyle style = cellStyle;
if ( style == null )
{
value = String.valueOf( cell.getNumericCellValue() );
}
else
{
value = ( _formatter.formatRawCellContents(
cell.getNumericCellValue(), style.getDataFormat(),
style.getDataFormatString() ) );
}
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
value = String.valueOf( cell.getBooleanCellValue() );
break;
case HSSFCell.CELL_TYPE_ERROR:
value = ErrorEval.getText( cell.getErrorCellValue() );
break;
default:
logger.log(
POILogger.WARN,
"Unexpected cell cachedFormulaResultType ("
+ cell.getCachedFormulaResultType() + ")" );
value = EMPTY;
break;
}
break;
case HSSFCell.CELL_TYPE_BLANK:
value = EMPTY;
break;
case HSSFCell.CELL_TYPE_NUMERIC:
value = _formatter.formatCellValue( cell );
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
value = String.valueOf( cell.getBooleanCellValue() );
break;
case HSSFCell.CELL_TYPE_ERROR:
value = ErrorEval.getText( cell.getErrorCellValue() );
break;
default:
logger.log( POILogger.WARN,
"Unexpected cell type (" + cell.getCellType() + ")" );
return true;
}
final boolean noText = isEmpty( value );
final boolean wrapInDivs = !noText && isUseDivsToSpan()
&& !cellStyle.getWrapText();
final short cellStyleIndex = cellStyle.getIndex();
if ( cellStyleIndex != 0 )
{
HSSFWorkbook workbook = cell.getRow().getSheet().getWorkbook();
String mainCssClass = getStyleClassName( workbook, cellStyle );
if ( wrapInDivs )
{
tableCellElement.setAttribute( "class", mainCssClass + " "
+ cssClassContainerCell );
}
else
{
tableCellElement.setAttribute( "class", mainCssClass );
}
if ( noText )
{
/*
* if cell style is defined (like borders, etc.) but cell text
* is empty, add " " to output, so browser won't collapse
* and ignore cell
*/
value = "\u00A0";
}
}
if ( isOutputLeadingSpacesAsNonBreaking() && value.startsWith( " " ) )
{
StringBuilder builder = new StringBuilder();
for ( int c = 0; c < value.length(); c++ )
{
if ( value.charAt( c ) != ' ' )
break;
builder.append( '\u00a0' );
}
if ( value.length() != builder.length() )
builder.append( value.substring( builder.length() ) );
value = builder.toString();
}
Text text = htmlDocumentFacade.createText( value );
if ( wrapInDivs )
{
Element outerDiv = htmlDocumentFacade.createBlock();
outerDiv.setAttribute( "class", this.cssClassContainerDiv );
Element innerDiv = htmlDocumentFacade.createBlock();
StringBuilder innerDivStyle = new StringBuilder();
innerDivStyle.append( "position:absolute;min-width:" );
innerDivStyle.append( normalWidthPx );
innerDivStyle.append( "px;" );
if ( maxSpannedWidthPx != Integer.MAX_VALUE )
{
innerDivStyle.append( "max-width:" );
innerDivStyle.append( maxSpannedWidthPx );
innerDivStyle.append( "px;" );
}
innerDivStyle.append( "overflow:hidden;max-height:" );
innerDivStyle.append( normalHeightPt );
innerDivStyle.append( "pt;white-space:nowrap;" );
ExcelToHtmlUtils.appendAlign( innerDivStyle,
cellStyle.getAlignment() );
htmlDocumentFacade.addStyleClass( outerDiv, cssClassPrefixDiv,
innerDivStyle.toString() );
innerDiv.appendChild( text );
outerDiv.appendChild( innerDiv );
tableCellElement.appendChild( outerDiv );
}
else
{
tableCellElement.appendChild( text );
}
return isEmpty( value ) && cellStyleIndex == 0;
}
protected void processColumnHeaders( HSSFSheet sheet, int maxSheetColumns,
Element table )
{
Element tableHeader = htmlDocumentFacade.createTableHeader();
table.appendChild( tableHeader );
Element tr = htmlDocumentFacade.createTableRow();
if ( isOutputRowNumbers() )
{
// empty row at left-top corner
tr.appendChild( htmlDocumentFacade.createTableHeaderCell() );
}
for ( int c = 0; c < maxSheetColumns; c++ )
{
if ( !isOutputHiddenColumns() && sheet.isColumnHidden( c ) )
continue;
Element th = htmlDocumentFacade.createTableHeaderCell();
String text = getColumnName( c );
th.appendChild( htmlDocumentFacade.createText( text ) );
tr.appendChild( th );
}
tableHeader.appendChild( tr );
}
/**
* Creates COLGROUP element with width specified for all columns. (Except
* first if {@link #isOutputRowNumbers()}==true)
*/
protected void processColumnWidths( HSSFSheet sheet, int maxSheetColumns,
Element table )
{
// draw COLS after we know max column number
Element columnGroup = htmlDocumentFacade.createTableColumnGroup();
if ( isOutputRowNumbers() )
{
columnGroup.appendChild( htmlDocumentFacade.createTableColumn() );
}
for ( int c = 0; c < maxSheetColumns; c++ )
{
if ( !isOutputHiddenColumns() && sheet.isColumnHidden( c ) )
continue;
Element col = htmlDocumentFacade.createTableColumn();
col.setAttribute( "width",
String.valueOf( getColumnWidth( sheet, c ) ) );
columnGroup.appendChild( col );
}
table.appendChild( columnGroup );
}
protected void processDocumentInformation(
SummaryInformation summaryInformation )
{
if ( isNotEmpty( summaryInformation.getTitle() ) )
htmlDocumentFacade.setTitle( summaryInformation.getTitle() );
if ( isNotEmpty( summaryInformation.getAuthor() ) )
htmlDocumentFacade.addAuthor( summaryInformation.getAuthor() );
if ( isNotEmpty( summaryInformation.getKeywords() ) )
htmlDocumentFacade.addKeywords( summaryInformation.getKeywords() );
if ( isNotEmpty( summaryInformation.getComments() ) )
htmlDocumentFacade
.addDescription( summaryInformation.getComments() );
}
/**
* @return maximum 1-base index of column that were rendered, zero if none
*/
protected int processRow( CellRangeAddress[][] mergedRanges, HSSFRow row,
Element tableRowElement )
{
final HSSFSheet sheet = row.getSheet();
final short maxColIx = row.getLastCellNum();
if ( maxColIx <= 0 )
return 0;
final List emptyCells = new ArrayList( maxColIx );
if ( isOutputRowNumbers() )
{
Element tableRowNumberCellElement = htmlDocumentFacade
.createTableHeaderCell();
processRowNumber( row, tableRowNumberCellElement );
emptyCells.add( tableRowNumberCellElement );
}
int maxRenderedColumn = 0;
for ( int colIx = 0; colIx < maxColIx; colIx++ )
{
if ( !isOutputHiddenColumns() && sheet.isColumnHidden( colIx ) )
continue;
CellRangeAddress range = ExcelToHtmlUtils.getMergedRange(
mergedRanges, row.getRowNum(), colIx );
if ( range != null
&& ( range.getFirstColumn() != colIx || range.getFirstRow() != row
.getRowNum() ) )
continue;
HSSFCell cell = row.getCell( colIx );
int divWidthPx = 0;
if ( isUseDivsToSpan() )
{
divWidthPx = getColumnWidth( sheet, colIx );
boolean hasBreaks = false;
for ( int nextColumnIndex = colIx + 1; nextColumnIndex < maxColIx; nextColumnIndex++ )
{
if ( !isOutputHiddenColumns()
&& sheet.isColumnHidden( nextColumnIndex ) )
continue;
if ( row.getCell( nextColumnIndex ) != null
&& !isTextEmpty( row.getCell( nextColumnIndex ) ) )
{
hasBreaks = true;
break;
}
divWidthPx += getColumnWidth( sheet, nextColumnIndex );
}
if ( !hasBreaks )
divWidthPx = Integer.MAX_VALUE;
}
Element tableCellElement = htmlDocumentFacade.createTableCell();
if ( range != null )
{
if ( range.getFirstColumn() != range.getLastColumn() )
tableCellElement.setAttribute(
"colspan",
String.valueOf( range.getLastColumn()
- range.getFirstColumn() + 1 ) );
if ( range.getFirstRow() != range.getLastRow() )
tableCellElement.setAttribute(
"rowspan",
String.valueOf( range.getLastRow()
- range.getFirstRow() + 1 ) );
}
boolean emptyCell;
if ( cell != null )
{
emptyCell = processCell( cell, tableCellElement,
getColumnWidth( sheet, colIx ), divWidthPx,
row.getHeight() / 20f );
}
else
{
emptyCell = true;
}
if ( emptyCell )
{
emptyCells.add( tableCellElement );
}
else
{
for ( Element emptyCellElement : emptyCells )
{
tableRowElement.appendChild( emptyCellElement );
}
emptyCells.clear();
tableRowElement.appendChild( tableCellElement );
maxRenderedColumn = colIx;
}
}
return maxRenderedColumn + 1;
}
protected void processRowNumber( HSSFRow row,
Element tableRowNumberCellElement )
{
tableRowNumberCellElement.setAttribute( "class", "rownumber" );
Text text = htmlDocumentFacade.createText( getRowName( row ) );
tableRowNumberCellElement.appendChild( text );
}
protected void processSheet( HSSFSheet sheet, Configuration configuration )
{
if ( configuration.withTitle ) {
processSheetHeader( htmlDocumentFacade.getBody(), sheet );
}
final int physicalNumberOfRows = sheet.getPhysicalNumberOfRows();
if ( physicalNumberOfRows <= 0 )
return;
Element table = htmlDocumentFacade.createTable();
htmlDocumentFacade.addStyleClass( table, cssClassPrefixTable,
"border-collapse:collapse;border-spacing:0;" );
Element tableBody = htmlDocumentFacade.createTableBody();
final CellRangeAddress[][] mergedRanges = ExcelToHtmlUtils
.buildMergedRangesMap( sheet );
final List emptyRowElements = new ArrayList(
physicalNumberOfRows );
int maxSheetColumns = 1;
for ( int r = sheet.getFirstRowNum(); r <= sheet.getLastRowNum(); r++ )
{
HSSFRow row = sheet.getRow( r );
if ( row == null )
continue;
if ( !isOutputHiddenRows() && row.getZeroHeight() )
continue;
Element tableRowElement = htmlDocumentFacade.createTableRow();
htmlDocumentFacade.addStyleClass( tableRowElement,
cssClassPrefixRow, "height:" + ( row.getHeight() / 20f )
+ "pt;" );
int maxRowColumnNumber = processRow( mergedRanges, row,
tableRowElement );
if ( maxRowColumnNumber == 0 )
{
emptyRowElements.add( tableRowElement );
}
else
{
if ( !emptyRowElements.isEmpty() )
{
for ( Element emptyRowElement : emptyRowElements )
{
tableBody.appendChild( emptyRowElement );
}
emptyRowElements.clear();
}
tableBody.appendChild( tableRowElement );
}
maxSheetColumns = Math.max( maxSheetColumns, maxRowColumnNumber );
}
processColumnWidths( sheet, maxSheetColumns, table );
if ( isOutputColumnHeaders() )
{
processColumnHeaders( sheet, maxSheetColumns, table );
}
table.appendChild( tableBody );
htmlDocumentFacade.getBody().appendChild( table );
}
protected void processSheetHeader( Element htmlBody, HSSFSheet sheet )
{
Element h2 = htmlDocumentFacade.createHeader2();
h2.appendChild( htmlDocumentFacade.createText( sheet.getSheetName() ) );
htmlBody.appendChild( h2 );
}
public void processWorkbook( HSSFWorkbook workbook, Configuration configuration )
{
final SummaryInformation summaryInformation = workbook
.getSummaryInformation();
if ( summaryInformation != null )
{
processDocumentInformation( summaryInformation );
}
if ( isUseDivsToSpan() )
{
// prepare CSS classes for later usage
this.cssClassContainerCell = htmlDocumentFacade
.getOrCreateCssClass( cssClassPrefixCell,
"padding:0;margin:0;align:left;vertical-align:top;" );
this.cssClassContainerDiv = htmlDocumentFacade.getOrCreateCssClass(
cssClassPrefixDiv, "position:relative;" );
}
for ( int s = 0; s < workbook.getNumberOfSheets(); s++ )
{
if ( configuration.includeSheet(s) ) {
HSSFSheet sheet = workbook.getSheetAt( s );
processSheet( sheet , configuration);
}
}
htmlDocumentFacade.updateStylesheet();
}
public void setCssClassPrefixCell( String cssClassPrefixCell )
{
this.cssClassPrefixCell = cssClassPrefixCell;
}
public void setCssClassPrefixDiv( String cssClassPrefixDiv )
{
this.cssClassPrefixDiv = cssClassPrefixDiv;
}
public void setCssClassPrefixRow( String cssClassPrefixRow )
{
this.cssClassPrefixRow = cssClassPrefixRow;
}
public void setCssClassPrefixTable( String cssClassPrefixTable )
{
this.cssClassPrefixTable = cssClassPrefixTable;
}
/**
* Allows converter to wrap content into two additional DIVs with tricky
* styles, so it will wrap across empty cells (like in Excel).
*
* Warning: after enabling this mode do not serialize result HTML
* with INDENT=YES option, because line breaks will make additional
* (unwanted) changes
*/
public void setUseDivsToSpan( boolean useDivsToSpan )
{
this.useDivsToSpan = useDivsToSpan;
}
public static class Configuration {
public boolean withTitle=true;
public boolean includeSheet(int index) {
return true;
}
}
}