Read .XLS files in Mule 4
- December 05, 2022
The .XLS file is a file extension used by Microsoft to save its spreadsheet untilMicrosoft Excel 2003. The format is also known as Binary Interchange File Format (BIFF) in Microsoft Documentation.
The early XLS formats, used for Excel 2.0 (1987) through Excel 4.0 (1992), allowed only a single worksheet. The corresponding file formats were single BIFF streams.
- BIFF2 for Excel 2.0 (1987)
- BIFF3 for Excel 3.0 (1990)
- BIFF4 for Excel 4.0 (1992)
- BIFF5 for Excel 5.0 (1993) and Excel 95 (1995)
- BIFF7 for Excel 97.
- BIFF8 for Excel 98 (1998) through Excel 2003.
- Note that BIFF12 is used in a different binary file format, using a different container file and the file extension .xlsb. It has been available as an alternative to the XML-based XLSX since Excel 2007. See MS-XLSB.
More information on https://www.loc.gov/preservation/digital/formats/fdd/fdd000510.shtml
How to read a .xls file in MuleSoft?
By default, MuleSoft doesn’t support .xls files, as stated in the documentation.
To read BIFF8 and BIFF5 files, we’ll have to use a custom java activity.
There are two libraries in java that can be used to read .xls files.
- Jxl – used for reading BIFF5
- Org.apache.poi – used for reading BIFF8
Please refer to the sample code for reading BIFF5 files using jxl library.
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Base64;
import java.util.List;
import org.json.JSONArray;
import org.json.JSONObject;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
public class ReadExcelBiff5 {
public static String readExcelFile(String fileString) throws BiffException, IOException {
String value = "";
byte[] decoded = Base64.getDecoder().decode(fileString);
// Steam decoded file to an input stream (as if we were reading it from disk)
InputStream targetStream = new ByteArrayInputStream(decoded);
JSONArray arr = new JSONArray();
JSONArray temp = new JSONArray();
Workbook wb = Workbook.getWorkbook(targetStream);
// TO get the access to the sheet
Sheet sh = wb.getSheet(0);
// To get the number of rows present in sheet
int totalNoOfRows = sh.getRows();
// To get the number of columns present in sheet
int totalNoOfCols = sh.getColumns();
for (int row = 0; row < totalNoOfRows; row++) {
for (int col = 0; col < totalNoOfCols; col++) {
value = value + sh.getCell(col, row).getContents().toString() + ",";
}
value = value.substring(0, value.length() - 1) + "\n";
}
return value;
}
}
And sample code for reading BIFF5 files using org.apache.poi library
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.Base64;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.sl.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import com.sun.rowset.internal.Row;
public class ReadExcelBiff8 {
public static String transform(String file) {
String cellValue = "";
String csvOutput = "";
String value = "";
try {
// Decode base64:
byte[] decoded = Base64.getDecoder().decode(file);
// Steam decoded file to an input stream (as if we were reading it from disk)
InputStream targetStream = new ByteArrayInputStream(decoded);
// Create the .xls Apache POI object
HSSFWorkbook workbook = new HSSFWorkbook(targetStream);
// Process the rows/cells etc...
HSSFWorkbook wb = new HSSFWorkbook();
wb = workbook;
HSSFSheet sheet = wb.getSheetAt(0);
System.out.println("sheet");
System.out.println(sheet);
// For example...
DataFormatter formatter = new DataFormatter();
for (org.apache.poi.ss.usermodel.Row r : sheet) {
for (Cell c : r) {
//value = value + "," + formatter.formatCellValue(c);
value = value + formatter.formatCellValue(c) + ",";
}
value = value.substring(0, value.length() - 1) + "\n";
}
} catch (Exception e) {
System.out.println("FAIL" + e.getMessage());
}
return value;
}
}
You can create a sample flow in MuleSoft to read .xls files and pass the payload as a Base64 string to this function.
Refer to the attached config.xml
<?xml version="1.0" encoding="UTF-8"?>
<mule xmlns:ldap="http://www.mulesoft.org/schema/mule/ldap" xmlns:java="http://www.mulesoft.org/schema/mule/java"
xmlns:file="http://www.mulesoft.org/schema/mule/file"
xmlns:crypto="http://www.mulesoft.org/schema/mule/crypto" xmlns:http="http://www.mulesoft.org/schema/mule/http" xmlns:ee="http://www.mulesoft.org/schema/mule/ee/core" xmlns:email="http://www.mulesoft.org/schema/mule/email" xmlns="http://www.mulesoft.org/schema/mule/core" xmlns:doc="http://www.mulesoft.org/schema/mule/documentation" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mulesoft.org/schema/mule/core http://www.mulesoft.org/schema/mule/core/current/mule.xsd
http://www.mulesoft.org/schema/mule/email http://www.mulesoft.org/schema/mule/email/current/mule-email.xsd
http://www.mulesoft.org/schema/mule/ee/core http://www.mulesoft.org/schema/mule/ee/core/current/mule-ee.xsd
http://www.mulesoft.org/schema/mule/http http://www.mulesoft.org/schema/mule/http/current/mule-http.xsd
http://www.mulesoft.org/schema/mule/crypto http://www.mulesoft.org/schema/mule/crypto/current/mule-crypto.xsd
http://www.mulesoft.org/schema/mule/file http://www.mulesoft.org/schema/mule/file/current/mule-file.xsd
http://www.mulesoft.org/schema/mule/java http://www.mulesoft.org/schema/mule/java/current/mule-java.xsd
http://www.mulesoft.org/schema/mule/ldap http://www.mulesoft.org/schema/mule/ldap/current/mule-ldap.xsd">
<flow name="BIFF8_Flow" doc:id="41b61724-f733-4d3e-ba1e-dbf541cd9944" >
<scheduler doc:name="Scheduler" doc:id="4f926d32-6f59-4d78-90f7-0d8828168d38" >
<scheduling-strategy >
<fixed-frequency />
</scheduling-strategy>
</scheduler>
<file:read doc:name="Read" doc:id="c0d2de1a-3dc4-412e-be66-add3a1364471" path="test.xls"/>
<ee:transform doc:name="Transform Message" doc:id="5b90aea5-7548-42e9-b7b0-2e626dca5f96">
<ee:message>
</ee:message>
<ee:variables >
<ee:set-variable variableName="fileInput" ><![CDATA[%dw 2.0
import * from dw::core::Binaries
output text/plain
---
toBase64(payload as Binary)]]></ee:set-variable>
</ee:variables>
</ee:transform>
<java:new doc:name="New" doc:id="14117dcb-6144-44af-8012-0fcf74bdfa3e" class="ReadExcelBiff8" constructor="ReadExcelBiff8()"/>
<try doc:name="Try" doc:id="ca853444-611b-4023-9087-6a505348ac44" >
<java:invoke-static doc:name="Invoke static" doc:id="22ebd2e5-1cd1-404e-8198-8f184645399c" class="ReadExcelBiff8" method="transform(java.lang.String)">
<java:args><![CDATA[#[{arg0: vars.fileInput}]]]></java:args>
</java:invoke-static>
</try>
<ee:transform doc:name="Convert to JAVA" doc:id="9ade9557-a3c8-4d65-9d56-d2a157b931f7" >
<ee:message >
<ee:set-payload ><![CDATA[%dw 2.0
output application/java
---
write( (read(payload,"application/csv",{"header" : false})),"application/csv",{"quoteValues" : "false","header" : false})]]></ee:set-payload>
</ee:message>
</ee:transform>
<ee:transform doc:name="Convert to CSV/JSON" doc:id="7a58b017-1843-4a82-861a-e42b813561f4" >
<ee:message >
<ee:set-payload ><![CDATA[output application/json
---
read(payload,"application/csv",{"header" : true})]]></ee:set-payload>
</ee:message>
</ee:transform>
<logger level="INFO" doc:name="Logger" doc:id="7be8f95b-e695-494d-a817-671df8971af2" />
</flow>
<flow name="BIFF5_flow" doc:id="d032f9ed-365e-4ce5-bbc6-79767b50fcb9" >
<scheduler doc:name="Scheduler" doc:id="a61dff19-3696-46c3-b46e-bba54a16f8e7">
<scheduling-strategy>
<fixed-frequency />
</scheduling-strategy>
</scheduler>
<file:read doc:name="Read" doc:id="4645a2e9-301e-469c-ad78-8c57c71ba6c3" path="test.xls" />
<ee:transform doc:name="Transform Message" doc:id="95a8beba-1fbd-486d-8ed1-1cf4b3f33411" >
<ee:message />
<ee:variables >
<ee:set-variable variableName="fileInput" ><![CDATA[%dw 2.0
import * from dw::core::Binaries
output text/plain
---
toBase64(payload as Binary)]]></ee:set-variable>
</ee:variables>
</ee:transform>
<java:new constructor="ReadExcelBiff5()" doc:name="New" doc:id="33b63d13-15de-4a97-889f-b023b4583817" class="ReadExcelBiff5" />
<java:invoke-static method="readExcelFile(java.lang.String)" doc:name="Invoke static" doc:id="a3594c9e-279f-4095-a6a7-9ba797c550e8" class="ReadExcelBiff5" >
<java:args ><![CDATA[#[{arg0: vars.fileInput}]]]></java:args>
</java:invoke-static>
<ee:transform doc:name="Convert to JAVA" doc:id="f4fde9a2-e850-4587-990a-8131ade16eb1" >
<ee:message >
<ee:set-payload ><![CDATA[%dw 2.0
output application/java
---
write( (read(payload,"application/csv",{"header" : false})),"application/csv",{"quoteValues" : "false","header" : false})]]></ee:set-payload>
</ee:message>
</ee:transform>
<ee:transform doc:name="Convert to CSV/JSON" doc:id="bcd30dfc-9c07-4f76-bc03-cbf28c2c3c29" >
<ee:message >
<ee:set-payload ><![CDATA[output application/json
---
read(payload,"application/csv",{"header" : true})]]></ee:set-payload>
</ee:message>
</ee:transform>
<logger level="INFO" doc:name="Logger" doc:id="66285d73-c303-4659-be87-0c2133d69ac0" />
</flow>
</mule>
<?xml version="1.0" encoding="UTF-8"?>
<mule xmlns:ldap="http://www.mulesoft.org/schema/mule/ldap" xmlns:java="http://www.mulesoft.org/schema/mule/java"
xmlns:file="http://www.mulesoft.org/schema/mule/file"
xmlns:crypto="http://www.mulesoft.org/schema/mule/crypto" xmlns:http="http://www.mulesoft.org/schema/mule/http" xmlns:ee="http://www.mulesoft.org/schema/mule/ee/core" xmlns:email="http://www.mulesoft.org/schema/mule/email" xmlns="http://www.mulesoft.org/schema/mule/core" xmlns:doc="http://www.mulesoft.org/schema/mule/documentation" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mulesoft.org/schema/mule/core http://www.mulesoft.org/schema/mule/core/current/mule.xsd
http://www.mulesoft.org/schema/mule/email http://www.mulesoft.org/schema/mule/email/current/mule-email.xsd
http://www.mulesoft.org/schema/mule/ee/core http://www.mulesoft.org/schema/mule/ee/core/current/mule-ee.xsd
http://www.mulesoft.org/schema/mule/http http://www.mulesoft.org/schema/mule/http/current/mule-http.xsd
http://www.mulesoft.org/schema/mule/crypto http://www.mulesoft.org/schema/mule/crypto/current/mule-crypto.xsd
http://www.mulesoft.org/schema/mule/file http://www.mulesoft.org/schema/mule/file/current/mule-file.xsd
http://www.mulesoft.org/schema/mule/java http://www.mulesoft.org/schema/mule/java/current/mule-java.xsd
http://www.mulesoft.org/schema/mule/ldap http://www.mulesoft.org/schema/mule/ldap/current/mule-ldap.xsd">
<flow name="BIFF8_Flow" doc:id="41b61724-f733-4d3e-ba1e-dbf541cd9944" >
<scheduler doc:name="Scheduler" doc:id="4f926d32-6f59-4d78-90f7-0d8828168d38" >
<scheduling-strategy >
<fixed-frequency />
</scheduling-strategy>
</scheduler>
<file:read doc:name="Read" doc:id="c0d2de1a-3dc4-412e-be66-add3a1364471" path="test.xls"/>
<ee:transform doc:name="Transform Message" doc:id="5b90aea5-7548-42e9-b7b0-2e626dca5f96">
<ee:message>
</ee:message>
<ee:variables >
<ee:set-variable variableName="fileInput" ><![CDATA[%dw 2.0
import * from dw::core::Binaries
output text/plain
---
toBase64(payload as Binary)]]></ee:set-variable>
</ee:variables>
</ee:transform>
<java:new doc:name="New" doc:id="14117dcb-6144-44af-8012-0fcf74bdfa3e" class="ReadExcelBiff8" constructor="ReadExcelBiff8()"/>
<try doc:name="Try" doc:id="ca853444-611b-4023-9087-6a505348ac44" >
<java:invoke-static doc:name="Invoke static" doc:id="22ebd2e5-1cd1-404e-8198-8f184645399c" class="ReadExcelBiff8" method="transform(java.lang.String)">
<java:args><![CDATA[#[{arg0: vars.fileInput}]]]></java:args>
</java:invoke-static>
</try>
<ee:transform doc:name="Convert to JAVA" doc:id="9ade9557-a3c8-4d65-9d56-d2a157b931f7" >
<ee:message >
<ee:set-payload ><![CDATA[%dw 2.0
output application/java
---
write( (read(payload,"application/csv",{"header" : false})),"application/csv",{"quoteValues" : "false","header" : false})]]></ee:set-payload>
</ee:message>
</ee:transform>
<ee:transform doc:name="Convert to CSV/JSON" doc:id="7a58b017-1843-4a82-861a-e42b813561f4" >
<ee:message >
<ee:set-payload ><![CDATA[output application/json
---
read(payload,"application/csv",{"header" : true})]]></ee:set-payload>
</ee:message>
</ee:transform>
<logger level="INFO" doc:name="Logger" doc:id="7be8f95b-e695-494d-a817-671df8971af2" />
</flow>
<flow name="BIFF5_flow" doc:id="d032f9ed-365e-4ce5-bbc6-79767b50fcb9" >
<scheduler doc:name="Scheduler" doc:id="a61dff19-3696-46c3-b46e-bba54a16f8e7">
<scheduling-strategy>
<fixed-frequency />
</scheduling-strategy>
</scheduler>
<file:read doc:name="Read" doc:id="4645a2e9-301e-469c-ad78-8c57c71ba6c3" path="test.xls" />
<ee:transform doc:name="Transform Message" doc:id="95a8beba-1fbd-486d-8ed1-1cf4b3f33411" >
<ee:message />
<ee:variables >
<ee:set-variable variableName="fileInput" ><![CDATA[%dw 2.0
import * from dw::core::Binaries
output text/plain
---
toBase64(payload as Binary)]]></ee:set-variable>
</ee:variables>
</ee:transform>
<java:new constructor="ReadExcelBiff5()" doc:name="New" doc:id="33b63d13-15de-4a97-889f-b023b4583817" class="ReadExcelBiff5" />
<java:invoke-static method="readExcelFile(java.lang.String)" doc:name="Invoke static" doc:id="a3594c9e-279f-4095-a6a7-9ba797c550e8" class="ReadExcelBiff5" >
<java:args ><![CDATA[#[{arg0: vars.fileInput}]]]></java:args>
</java:invoke-static>
<ee:transform doc:name="Convert to JAVA" doc:id="f4fde9a2-e850-4587-990a-8131ade16eb1" >
<ee:message >
<ee:set-payload ><![CDATA[%dw 2.0
output application/java
---
write( (read(payload,"application/csv",{"header" : false})),"application/csv",{"quoteValues" : "false","header" : false})]]></ee:set-payload>
</ee:message>
</ee:transform>
<ee:transform doc:name="Convert to CSV/JSON" doc:id="bcd30dfc-9c07-4f76-bc03-cbf28c2c3c29" >
<ee:message >
<ee:set-payload ><![CDATA[output application/json
---
read(payload,"application/csv",{"header" : true})]]></ee:set-payload>
</ee:message>
</ee:transform>
<logger level="INFO" doc:name="Logger" doc:id="66285d73-c303-4659-be87-0c2133d69ac0" />
</flow>
</mule>
Here’s the input file –
Here’s the java output –
Thus, .xls files can be read in Mule 4 by using custom java code.
— By Ashish Shivatare