javascript - Filereader read file using correct encoding when read as readAsArrayBuffer

Question

Welcome To Ask or Share your Answers For Others

javascript - Filereader read file using correct encoding when read as readAsArrayBuffer

posted Oct 24, 2021 in Technique[技术] by 深蓝 (71.8m points)

javascript - Filereader read file using correct encoding when read as readAsArrayBuffer

I am working on reading .csv /xlsx file uploaded using javaScript and get the result as array containing each row . I was able to read the file and get data using FileReader and SheetJs with following code.

// code for the new excel reader
$scope.do_file =  function(files)
{
    $scope.fileContent  = [];
    var X = XLSX;
    var global_wb;
    var f = files[0];
    var reader = new FileReader();
    reader.onload = function(e)
    {
        var data = e.target.result;console.log(data);
        global_wb = X.read(data, {type: 'array'});
        var output = "";
        var result = {};
        global_wb.SheetNames.forEach(function(sheetName) {
            var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
            if(roa.length) result[sheetName] = roa;
        });
        $scope.fileContent =  result["Sheet1"];
        if(!result["Sheet1"])
        {
            $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
        }
    };
    reader.readAsArrayBuffer(f);
};

For reading most of the files the code works , but when file containing Hebrew text with Windows-1255 encoding i get gibberish data.

Looking in for more options i tried to read the file as text using reader.readAsText and change the encoding as necessary , check the following code:

function is_Hebrew(data)
{
    var position = data.search(/[u0590-u05FF]/);
    return position >= 0;
}

 $scope.do_file =  function(files)
 {
    var fullResult = [];
    var file =files[0];
      var reader = new FileReader();
        reader.onload = function(e){
            var data = e.target.result;
                if(!is_Hebrew(data.toString()))
                {
                  reader.readAsText(file,'ISO-8859-8');   
                }
            };
        reader.readAsText(file);
        reader.onloadend = function(){
            var lines = reader.result.split('
');
            console.log(lines);
            lines.forEach(element => {
                var cell = element.split(',');
                fullResult.push(cell);
            });

             console.log(reader);
        };
    };

but the above code is not suitable as it does not read the file as each row identifying each cell. if any one of cell contains string with coma separated value (for example if a cell contains a string value such as "25,28,29" ) the array output gives wrong data as it considers each values as each cell.

So i decided to stick with first method but i am not able to change the encoding .Is there a possible way to change encoding in the first code where i have used the readAsArrayBuffer to read the file data ?

See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙；凝视深渊过久,深渊将回以凝视…

1 Reply

深蓝 · Answer 1 · 2021-10-23T17:55:44+0000

After going through lot of possible solutions i found that answer to the above question was to combine the above two methods. The first method for reading the xlsx files and second method for reading csv files. Also i have used an additional javaScript library called papaparse in the second method to solve the problem of reading data in each cell

$scope.is_Hebrew = function($data){
var position = $data.search(/[u0590-u05FF]/);
return position >= 0;
}

// code for the new excel reader
$scope.do_file =  function(files)
{
    var config = {
    delimiter: "",  // auto-detect
    newline: "",    // auto-detect
    quoteChar: '"',
    escapeChar: '"',
    header: false,
    trimHeader: false,
    dynamicTyping: false,
    preview: 0,
    encoding: "",
    worker: false,
    comments: false,
    step: undefined,
    complete: undefined,
    error: undefined,
    download: false,
    skipEmptyLines: false,
    chunk: undefined,
    fastMode: undefined,
    beforeFirstChunk: undefined,
    withCredentials: undefined
    };

    $scope.fileContent  = [];
    var f = files[0];
    var fileExtension = f.name.replace(/^.*./, '');
    if(fileExtension == 'xlsx')
    {
        var X = XLSX;
        var global_wb;
        var reader = new FileReader();
        reader.onload = function(e)
        {
            var data = e.target.result;
            global_wb = X.read(data, {type: 'array'});
            var result = {};
            global_wb.SheetNames.forEach(function(sheetName) {
               var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
               if(roa.length) result[sheetName] = roa;
            });
            $scope.fileContent =  result["Sheet1"];
            if(!result["Sheet1"])
            {
               $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
            }

        };
        reader.readAsArrayBuffer(f);

    }
    else if(fileExtension == 'csv')
    {
    var reader = new FileReader();
    reader.onload = function(e)
    {
        var data = e.target.result;
        console.log(f);
        console.log($scope.is_Hebrew(data.toString()));
        if(!$scope.is_Hebrew(data.toString()))
        {
           reader.readAsText(f,'ISO-8859-8');   
        }
    };

    reader.readAsText(f);
    reader.onloadend = function(e){
        var c =  Papa.parse(reader.result,[ config])
        console.log(c);
        $scope.fileContent =  c["data"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });

    };

    }
    else
    {
       alert("File Not supported!");
    }

$scope.fileContent.push([]);
};

Categories

javascript - Filereader read file using correct encoding when read as readAsArrayBuffer

javascript - Filereader read file using correct encoding when read as readAsArrayBuffer

Please log in or register to add a comment.

Please log in or register to reply this article.

1 Reply

Please log in or register to add a comment.

Just Browsing Browsing

Most popular tags