基于node编写excel读取和导出的小工具_Node.js

背景

领导给同事安排了一个体力活，根据表1（7k+条）筛选表2(4w+条)，然后合并两个表相同的数据。听起来是挺简单的需求，加个外表筛选就完事了么，基础的excel操作他还是会的，但是仔细研究了下，发现行不通，filter和match都会有一定的局限。并且最后的和并新表也是个困难。于是来求助，看能不能写个小脚本，跑一下解决问题。我了解了下需求，觉得可行，于是便有了这个脚本

需求描述

表1格式

姓名	年龄	住址
张三	20	上海外滩18号
李四	21	上海浦东新区
王五	22	御青路1号
。。。	。。。	。。。

表2格式

昵称	注册时间	住址
二蛋	2021	18号
双下巴大圆脸	2015	上海
狗子	2022	御青路
。。。	。。。	。。。

他的需求，就是根据表2的住址模糊筛选出表格1住址列所有匹配的数据。然后合并信息。

介绍结束、开始码代码

1、 node读取excel并解析数据

我这里是用了node的xlsx

// 安装xlsx
npm install --save xlsx

xlsx本身提供了一个读取文件方法，就不需要用fs的读取了，核心代码如下

 const workbook = xlsx.readfile(filepath);

 // 我这里只需要第一个，对于多个sheet的情况，这里加个循环就好了
 const sheetname = workbook.sheetnames[0];
 const sheet = workbook.sheets[sheetname];

 // 将表内容转换为 json 数据
 const data = xlsx.utils.sheet_to_json(sheet);

这里推荐使用vscode的调试模式，可以很方便的查看变量的实际值，不用console一遍一遍查看

2、过滤数据,并合并需要的数据

拿到表1和表2的数据后，就是过滤数据的逻辑了

// 过滤文件
const processingdata = ({filterfiledata,resourcefiledata}) => {
    // 创建一个包含所有 filterfiledata.地址 的正则表达式
    const areapattern = new regexp(filterfiledata.map(item => item['地址']).join('|'));
    
    // 记录匹配结果
    const matches = resourcefiledata.reduce((acc, item2) => {
        const match = item2['地址'].match(areapattern);
        if (match) {
            // 找到匹配的 arr1 项
            const matcheditem1 = filterfiledata.find(item1 => item1['地址'] === match[0]);
            
            acc.push({
                filterfiledata: matcheditem1,
                resourcefiledata: item2
            });
        }
        return acc;
    }, []);
    
    
    return matches
}

对于这种大数据过滤，尽可能的减少循环套循环，我这里是直接把表2住址拼接了一个正则，用match去校验表1的地址列，是否包含表2字符串，有更好的方案，可以在评论区打出来交流下

3、导出excel

在第二步，我把匹配成功的表1和表2数据都存在了一个数组，然后再第三步使用,这里就是拼数据和写数据了

// 生成筛选后的excel
const writeexcel = ({writedata,resourcefilename}) => {
    const data = writedata.map((item, index) => {
        const {filterfiledata,resourcefiledata} = item
        return {
            '序号': index + 1,
            '所属区县': filterfiledata['所属区县'],
            '户号': resourcefiledata['户号'],
            '户名': resourcefiledata['户名'],
            '户号地址': resourcefiledata['地址'],
            '机构全称': filterfiledata['机构全称'],
            '机构类型': filterfiledata['机构类型'],
            '机构地址': filterfiledata['地址'],
        }
    })
    // 将数据转换为 worksheet
    const worksheet = xlsx.utils.json_to_sheet(data);

    // 创建一个新的 workbook，并附加 worksheet
    const workbook = xlsx.utils.book_new();
    xlsx.utils.book_append_sheet(workbook, worksheet, 'sheet1');

    let outputfilename = `${resourcefilename.split('.')[0]}-筛选结果.xlsx`
    let outputfilepath = path.join(outputfiledirpath,`./${outputfilename}`)

    // 保存 excel 文件
    xlsx.writefile(workbook, outputfilepath);
    
    console.log(`导出文件成功,地址在:${outputfilepath}`)
}

完整代码

// 1、读取filterfile和resourcefile目录下的文件
// 2、过滤源数据
// 3、将过滤好的数据，写入一个excel

const fs = require("fs");
const xlsx = require("xlsx");
const path = require("path");

const resourcefiledirpath = path.join(__dirname, "./resourcefile");
const filterfilename = "filter.xlsx";
const filterfiledirpath = path.join(__dirname, `./filterfile/${filterfilename}`);
const outputfiledirpath = path.join(__dirname, `./outputfile`);

// 读取excel文件
const readfile = (filepath) => {
  return new promise((resolve, reject) => {
    const workbook = xlsx.readfile(filepath);
    
    // 暂时只解析第一个sheet
    const sheetname = workbook.sheetnames[0];
    const sheet = workbook.sheets[sheetname];
    
    // 将表内容转换为 json 数据
    const data = xlsx.utils.sheet_to_json(sheet);
    
    resolve(data);
  });
};

// 过滤文件
const processingdata = ({filterfiledata,resourcefiledata}) => {
    // 创建一个包含所有 filterfiledata.地址 的正则表达式
    const areapattern = new regexp(filterfiledata.map(item => item['地址']).join('|'));

    // 记录匹配结果
    const matches = resourcefiledata.reduce((acc, item2) => {
        const match = item2['地址'].match(areapattern);
        if (match) {
            // 找到匹配的 arr1 项
            const matcheditem1 = filterfiledata.find(item1 => item1['地址'] === match[0]);
            acc.push({
                filterfiledata: matcheditem1,
                resourcefiledata: item2
            });
        }
        return acc;
    }, []);

    return matches
}
// 生成筛选后的excel
const writeexcel = ({writedata,resourcefilename}) => {
    const data = writedata.map((item, index) => {
        const {filterfiledata,resourcefiledata} = item
        return {
            '序号': index + 1,
            '所属区县': filterfiledata['所属区县'],
            '户号': resourcefiledata['户号'],
            '户名': resourcefiledata['户名'],
            '户号地址': resourcefiledata['地址'],
            '机构全称': filterfiledata['机构全称'],
            '机构类型': filterfiledata['机构类型'],
            '机构地址': filterfiledata['地址'],
        }
    })
    // 将数据转换为 worksheet
    const worksheet = xlsx.utils.json_to_sheet(data);

    // 创建一个新的 workbook，并附加 worksheet
    const workbook = xlsx.utils.book_new();
    xlsx.utils.book_append_sheet(workbook, worksheet, 'sheet1');

    let outputfilename = `${resourcefilename.split('.')[0]}-筛选结果.xlsx`
    let outputfilepath = path.join(outputfiledirpath,`./${outputfilename}`)

    // 保存 excel 文件
    xlsx.writefile(workbook, outputfilepath);
    
    console.log(`导出文件成功,地址在:${outputfilepath}`)
}

const main = async () => {
  
  console.time("总用时");
  const files = fs.readdirsync(resourcefiledirpath);
  let filterfile = await readfile(filterfiledirpath);
//   过滤掉没有地址 和指定关键字的文件
  filterfile = filterfile.filter(item => {
    let area = item['地址']
    let blacklist = ['0','无', '无地址']
    return area && !blacklist.includes(area)
  })


  console.log(files);
  for (const filename of files) {
    
    console.time(`${filename}用时：`);
    console.log(`正在读取文件${filename}`)
    // 读取待筛选数据
    let resourcefile = await readfile(
      path.resolve(resourcefiledirpath, filename)
    );
    console.log(`读取文件成功，正在解析文件`)
    // 解析文件
    const writedata = processingdata({
        filterfiledata: filterfile,
        resourcefiledata: resourcefile,
    })
    console.log(`解析文件成功，正在导出文件`)
    // 写入数据
    writeexcel({
        writedata,
        resourcefilename: filename
    })
    console.timeend(`${filename}用时：`);

  }

  console.timeend("总用时");
};

// 执行
try {
  main();
  
} catch (error) {
    console.log('error',error)
}