`
netskys
  • 浏览: 47774 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类

poi 解析百M Excel ,bs架构 在线概算导入

阅读更多
  今天碰上有人问POI的性能问题  哎,当初哥们做导入概算数据为POI 头疼了N久,终归功夫不负苦心人,最后解决了如下问题
  1 excel 概算文件超大 近 100m (我测试用了个200m的 都不会 OOM  哈哈)
  2 要检查excel中的 #REF 等错误计算公式,因为要入库,同时需要检查 数据的合法性 如 Number类型对象数字等
  3 因为 几万条数据 太过繁琐,所以需要 一次性报告给用户 所有的错误,并且定位错误行列,以供用户差错
  4 允许用户对单独sheet的批量导入  比如 5个sheet 用户高兴倒几个就倒几个 高兴倒哪个就倒哪个
   5 因是web架构,解析时提供 进度条提示 否则180m 几乎5分钟客户会crazy

哎 对此解决问题如下   最下提供代码仅供参考
  1  以前用poi 通用的user model 解析2m以下的还凑合对大数据 直接OOM,经官方查询发现 有个Event modle 并供有demo 小试一下下  ,very goode 详情参考http://poi.apache.org/spreadsheet/how-to.html#event_api

2 没办法 只能对每个cell进行验证 不过大家肯定发愁 我如何知道 excel的列对应 db的哪个column 啊?呵呵 对于这点 哥们我提出了规则 给用户个 匹配管理页面,让用户自己去定义 excel的哪个sheet 对应哪个表  哪个列对用哪个column  当然 在此肯定少不了用反射,哈哈我的最爱,自己 db的 type length 等 以及 entity 的property  这些对应是麻烦点 不过 可以连水平哦
3 报错给用户 这个肯定不能一碰上错 就alert,这样 不友好,也不现实,最好的就是 把所有的错误给用户生成一个文件,可供在线打开以及下载,客户对照着文件改excel  哈哈 要是你愿意的话 就把excel 的cell全称红色也行,不过 那就没办法提示用户 这行 的错误信息, 比如 “xxx表 1100行 20列 xx错误,应xxx ” 那么 在这里就只能用我们的异常机制了,不合法的抛出异常,自己捕获处理,写文件,render view 都可
4 既然用户想选择 那肯定 要给用户提供一个 复选框吧,当然如果 sheet个数不定  那就只能靠ajax 去捞 复选框的个数了,哈哈比较麻烦,但是道理很简单, 选哪个解析哪个啊

5 幸好前台用的ext 找个 process tip 很容易,不过关键是 如何切分任务来定制进度条的进度,刚开始想到了监听进程执行百分比,不过jdk不知道提供这个接口否,问 n多高人,都不知道,其实想想一个线程执行的百分比确实难以预测啊,毕竟cpu 的轮回,况且时关系,我就用了简单的自定制任务比,整个任务量100  完成某个阶段是多少 然后在不同的过程中 ++  呵呵 任务量放在 http session 里,前台的 进度条通过定时器触发ajax 呵呵


呵呵 下面是我写的一些 解析excel的代码 很乱啊  小心点砸
package com.hollysys.ipmip.util;

import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.servlet.http.HttpSession;

import org.apache.log4j.Logger;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.CellValueRecordInterface;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RowRecord;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.EOFRecord;

import com.hollysys.ipmip.common.CommonDeclare;
import com.hollysys.ipmip.entity.business.estimateimport.EstimateAttributes;
import com.hollysys.ipmip.entity.business.estimateimport.EstimateCalc;
import com.hollysys.ipmip.exceptions.ImportEstimateException;
import com.hollysys.ipmip.service.business.estimateimport.EstimateImportService;

public class EventModelUtil implements HSSFListener {
	private SSTRecord sstrec;
	//存放所选sheet
	private  List<Integer> sheetNums = new ArrayList<Integer>();
	//当前的sheet序号
	private int sheetNum = 0;
	//保存excel的实际sheet个数
	private int count = 0;
	//存放sheet的map
	private Map<Integer, EstimateCalc> sheetMap = null;
	//判断首行
	private int firstRowNum = 0;
	//最终存放首行
	private int firstRowFinal = 0;
	// 存放导入配置文件
	private Map<Integer, Map<Integer, String>> typeRelationMap = null;
	// 存放表的详细信息
	private Map<Integer, Map<String, EstimateAttributes>> tableAttribMap = null;
	// 存放异常信息
	private Exception exception=null;
	private EstimateImportService estimateImportService;
	private HttpSession session;
	private Long userId;
	private OutputStreamWriter fw=null;
	private int countLine=1;
	private Random random=new Random();
	//存放保存数据库间断个数
//	private int bound=0;
	private Logger logger=Logger.getLogger(EventModelUtil.class);
	/**
	 * This method listens for incoming records and handles them as required.
	 * 
	 * @param record
	 *            The record that was found while reading.
	 */
	public EventModelUtil() {

	}

	public EventModelUtil(List<Integer> sheetNums,
			Map<Integer, Map<Integer, String>> typeRelationMap,
			Map<Integer, Map<String, EstimateAttributes>> tableAttribMap,
			Exception e, EstimateImportService estimateImportService,
			HttpSession session, Long userId,OutputStreamWriter fw) {
		this.sheetNums = sheetNums;
		this.typeRelationMap = typeRelationMap;
		this.tableAttribMap = tableAttribMap;
		this.exception = e;
		this.estimateImportService = estimateImportService;
		this.session = session;
		this.userId = userId;
		this.fw=fw;
	}

	@SuppressWarnings("static-access")
	public void processRecord(Record record) {
		java.text.DecimalFormat formatter2 = new java.text.DecimalFormat("#.00");
		if (record.getSid() == BOFRecord.sid) {
			BOFRecord bof = (BOFRecord) record;
			if (bof.getType() == bof.TYPE_WORKBOOK) {
				String str="3,4,5,6";
				changeSession(session, CommonDeclare.ESTIMATE_PROCESS_CONST, Integer.valueOf(createRandom(str, false)), CommonDeclare.ESTIMATE_UPFLAG_ADD_CONST);
				logger.info("新workBook" + bof.getRecordSize());
			} else if (bof.getType() == bof.TYPE_WORKSHEET) {
				logger.info("新的sheet:" + sheetNum);
				bof.getRecordSize();
				isRightSheetNum();
				sheetMap = new LinkedHashMap<Integer, EstimateCalc>();
				sheetNum++;
			}
		}
		try {
			if (record.getSid() == EOFRecord.sid) {
				if (sheetNum > 0) {
					//取出解析的最后一个sheet
					int lastSheetNum=sheetNums.get(sheetNums.size()-1);
					int count=sheetNum*Integer.valueOf(createRandom("19,20,21", false))-10;
					if(sheetNum==lastSheetNum){
						count=Integer.valueOf(createRandom("90,91,92", false));
					}
					changeSession(session, CommonDeclare.ESTIMATE_PROCESS_CONST, count, CommonDeclare.ESTIMATE_UPFLAG_ADD_CONST);
					logger.info("当前sheet首行:" + firstRowFinal);
					logger.info(sheetNum - 1 + "sheet解析结束");
					EOFRecord eof=(EOFRecord)record;
					eof.getRecordSize();
					if(sheetNums.contains(sheetNum - 1)&&sheetMap!=null){
						if(firstRowFinal<=0){
							String str="在"+CommonDeclare.tableName.get(sheetNum-1)+"中配置有问题,请重新配置";
							logger.error(str);
							throw new RuntimeException(str);
						}else{
							if(session.getAttribute("importExceptionMsg")==null||!session.getAttribute("importExceptionMsg").equals("1")){
								changeSession(session, CommonDeclare.ESTIMATE_PROCESS_CONST, Integer.valueOf(createRandom("1,2,3", false)), CommonDeclare.ESTIMATE_UPFLAG_UPDATE_CONST);
								estimateImportService.saveCurrentSheetToTempTable(sheetMap,sheetNum-1, session, userId);
							}
							sheetMap.clear();
						}
					}
					firstRowFinal=0;
					countLine=1;
				}
			}
			if (record.getSid() == BoundSheetRecord.sid) {
				//获得sheet个数
				count++;
			}
			if (record.getSid() == SSTRecord.sid) {
				sstrec = (SSTRecord) record;
			}

			if (sheetNums.contains(sheetNum - 1)) {
				getSwitchRecord(record, formatter2, sheetNum - 1,
						typeRelationMap.get(sheetNum - 1), tableAttribMap
								.get(sheetNum - 1), CommonDeclare.tableName
								.get(sheetNum - 1));
			}
		}catch(RuntimeException e){
			throw e;
		} 
		catch (Exception e) {
			e.printStackTrace();
			setException(e);
			session.setAttribute("importExceptionMsg", "1");
			try {
				if(countLine==1){
					String str="***********************"+CommonDeclare.tableName.get(sheetNum - 1)+"*******************************"+System.getProperty("line.separator");
					fw.write(str);
					logger.error(str);
				}
				String strDetial=countLine+":"+e.getMessage()+System.getProperty("line.separator");
				fw.write(strDetial);
				logger.error(strDetial);
				countLine++;
				fw.flush();
			} catch (Exception e1) {
				logger.error(e1.getMessage());
				e1.printStackTrace();
			}
		}
	}
	private void getSwitchRecord(Record record,
			java.text.DecimalFormat formatter2, int sheetNum,
			Map<Integer, String> typeRelation,
			Map<String, EstimateAttributes> tableAttrib, String tablename)
			throws Exception {
		EstimateCalc calc = null;
		try {
			switch (record.getSid()) {
			case RowRecord.sid:
				RowRecord rowrec = (RowRecord) record;
				int exclude=(sheetNum+1)*20;
				if(exclude>=100) exclude=99;
				changeSession(session, CommonDeclare.ESTIMATE_PROCESS_CONST, Integer.valueOf(createRandom("1,2", false)), CommonDeclare.ESTIMATE_UPFLAG_UPDATE_CONST,exclude);
				calc = (EstimateCalc) CommonDeclare.tempEstimateMClass.get(
						CommonDeclare.typeBrief.get(sheetNum)).newInstance();
				sheetMap.put(rowrec.getRowNumber(), calc);
				break;
			case NumberRecord.sid:
				NumberRecord numrec = (NumberRecord) record;
				Object obj = formatter2.format(numrec.getValue());
				//当为数字的时候判断首行
				if (numrec.getValue() == (numrec.getColumn() + 1.00)) {
					firstRowNum++;
				}
				if (firstRowNum == CommonDeclare.ESTIMATE_COLUMN_CONST) {
					firstRowFinal = numrec.getRow();
				}
				//第一列为序号 统一数字的格式
				if(firstRowFinal>0&&numrec.getColumn()==0){
					java.text.DecimalFormat formatter = new java.text.DecimalFormat("#");
					obj=formatter.format(numrec.getValue());
				}
				EstimateAttributes attrib=getAttrib(typeRelation, tableAttrib,(CellValueRecordInterface) numrec);
				//如果是poi读出是数字而实际上不是数字的就不做格式转化
				if(attrib!=null&&!attrib.getData_type().trim().equals("NUMBER")){
					obj=numrec.getValue();
					if(obj.toString().endsWith(".0")){
						obj=obj.toString().substring(0,obj.toString().indexOf("."));
					}
				}
				calc = sheetMap.get(numrec.getRow());
				lessColumnLength(obj, calc, tablename,
						(CellValueRecordInterface) numrec,attrib,getCellTitle(typeRelation, (CellValueRecordInterface) numrec));
				break;
			case LabelSSTRecord.sid:
				LabelSSTRecord lrec = (LabelSSTRecord) record;
				obj = sstrec.getString(lrec.getSSTIndex());
				if(obj!=null&&firstRowFinal==0&&isNumber(obj.toString().trim())){
					if (Long.valueOf(obj.toString()) == (lrec.getColumn() + 1.0)) {
						firstRowNum++;
					}
					if (firstRowNum == CommonDeclare.ESTIMATE_COLUMN_CONST) {
						firstRowFinal = lrec.getRow();
					}
				}else{
					firstRowNum = 0;
				}
				calc = sheetMap.get(lrec.getRow());
				isNumberColumn(obj,tablename, (CellValueRecordInterface) lrec,
						getAttrib(typeRelation, tableAttrib,
								(CellValueRecordInterface) lrec),getCellTitle(typeRelation, (CellValueRecordInterface) lrec));
				lessColumnLength(obj, calc, tablename,
						(CellValueRecordInterface) lrec, getAttrib(
								typeRelation, tableAttrib,
								(CellValueRecordInterface) lrec),getCellTitle(typeRelation, (CellValueRecordInterface) lrec));

				break;
			case BlankRecord.sid:
				firstRowNum = 0;
				break;
			case FormulaRecord.sid:
				FormulaRecord furd = (FormulaRecord) record;
				firstRowNum = 0;
				Double b = furd.getValue();
				Object tb = formatter2.format(b);
				calc=sheetMap.get(furd.getRow());
				lessColumnLength(tb, calc, tablename,
						(CellValueRecordInterface) furd, getAttrib(
								typeRelation, tableAttrib,
								(CellValueRecordInterface) furd),getCellTitle(typeRelation, (CellValueRecordInterface) furd));
				break;
			default:
				firstRowNum = 0;
			}
		} catch (ImportEstimateException e){
			logger.error(e.getMessage());
			e.printStackTrace();
			throw e;
		}
	}

	/**
	 * 获得配置文件
	 */
	public EstimateAttributes getAttrib(Map<Integer, String> typeRelation,
			Map<String, EstimateAttributes> tableAttrib,
			CellValueRecordInterface record) {
		String str=typeRelation.get(record.getColumn() + 1);
		if(str!=null){
			return tableAttrib.get(str.substring(0,str.indexOf(",")));
		}else{
			return null;
		}
	}
	
	/**
	 * 获得cell 的标题
	 */
	public String getCellTitle(Map<Integer, String> typeRelation,CellValueRecordInterface record){
		String str=typeRelation.get(record.getColumn() + 1);
		if(str!=null){
			return str.substring(str.indexOf(",")+1);
		}else{
			return null;
		}
	}

	/**
	 * 判断cell里内容小于数据库字段的值
	 */
	public void lessColumnLength(Object obj, EstimateCalc calc,
			String tablename, CellValueRecordInterface record,
			EstimateAttributes attrib,String cellTitle) throws Exception {
		if(firstRowFinal>0&&attrib!=null){
			int length = 0;
			Pattern p = Pattern.compile("^[\u0080-\u07ff\u0800-\uffff]+$");
			if (obj != null && !obj.equals("")) {
				for (int k = 0; k < obj.toString().trim().length(); k++) {
					char c = obj.toString().trim().charAt(k);
					Matcher m = p.matcher(c + "");
					if (m.find()) {
						length += 2;
					} else {
						length++;
					}
				}
			}
			if (length <= attrib.getData_length()) {
					if(obj!=null&&!obj.toString().trim().equals("")){
						if(session.getAttribute("importExceptionMsg")==null||!session.getAttribute("importExceptionMsg").equals("1")){
							org.apache.commons.beanutils.BeanUtils.copyProperty(calc,
									getBeanProperyNameByTableField(attrib.getColumn_name()
											.trim()), obj);
						}
					}
			} else {
				String str="在【" +tablename + " "+cellTitle+"】中【"
				+ (record.getRow() + 1) + "行" + (record.getColumn() + 1)
				+ "列】内容为【"+obj+"】,字符长度【"+length+"】,应填写小于【" + attrib.getData_length() + "】位的内容";
				logger.error(str);
				throw new ImportEstimateException(str);
			}
		}
	}

	/**
	 * 应填写数字的列出现非数字
	 */
	public void isNumberColumn(Object obj,String tablename,
			CellValueRecordInterface record, EstimateAttributes attrib,String cellTitle)
			throws Exception {
		if(firstRowFinal>0&&attrib!=null){
			if(obj!=null){
				//将全角的转化成半角的 否则无法trim
				obj=obj.toString().replaceAll(" ", " ");
				obj = obj.toString().trim();
				if ("NUMBER".equals(attrib.getData_type().trim())&&!obj.toString().equals("")&&!isNumber(obj.toString())) {
					throw new ImportEstimateException("在【" + tablename + " "+cellTitle+"】中【"
							+ (record.getRow()+1) + "行" + (record.getColumn()+1) + "列】内容为【"+obj+"】,应填写数字");
				}
			}
		}
	}
	
	/**
	 * 判断是否为5个sheet
	 */
	public void isRightSheetNum(){
		if (count > 5) {
			String str="导入文件有多余的sheet或者存在隐藏sheet,请检查多出的 " + (count - 5)
			+ "个sheet";
			logger.error(str);
			throw new RuntimeException(str);
		}
		if (count < 5) {
			String str="导入文件格式不符合 矿、土、安、设备、其他 顺序格式, 五个sheet依次排序格式";
			logger.error(str);
			throw new RuntimeException(str);
		}
	}

	/**
	 * get Bean ProperyName By Table Field.
	 * 
	 * @param fieldName
	 */
	private String getBeanProperyNameByTableField(String fieldName) {
		fieldName = fieldName.toLowerCase();
		while (fieldName.indexOf("_") > 0) {
			int pos = fieldName.indexOf("_");
			fieldName = fieldName.substring(0, pos)
					+ fieldName.substring(pos + 1, pos + 2).toUpperCase()
					+ fieldName.substring(pos + 2);
		}
		return fieldName;
	}
	/**
	 * 判断是否为数字 
	 */
	public boolean isNumber(String str){
		//处理全角空格
		str=str.replaceAll(" ", " ");
		return Pattern.matches("^\\d+$|^\\d+\\.\\d+$", str.trim());
	}

	public void setException(Exception exception) {
		this.exception = exception;
	}
	
	/**
	 * 获得session  切改变session
	 * @param HttpSession session 当前用户session
	 * @param String attribute    session的key
	 * @param Object changeValue  session 的value
	 * @param String flag        1:新增 纯赋值 2:修改 在原来的基础上加 3:删除 
	 * @return
	 */
	public  static void changeSession(HttpSession session,String attribute,Object changeValue,String flag,int...exclude){
		if(session!=null&&attribute!=null&&!"".equals(attribute)&&!"".equals(flag)){
			
			if(CommonDeclare.ESTIMATE_UPFLAG_ADD_CONST.equals(flag)){
				session.setAttribute(attribute, changeValue);
			}else if(CommonDeclare.ESTIMATE_UPFLAG_UPDATE_CONST.equals(flag)){
				if(session.getAttribute(attribute)!=null){
					Object obj=session.getAttribute(attribute);
					if(changeValue  instanceof Integer){
						if(exclude!=null&&exclude.length>0){
							int i=0;
							for(int n:exclude){
								i=n;
							}
							if((Integer)obj>=i){
								session.setAttribute(attribute, i);
							}else{
								session.setAttribute(attribute, (Integer)obj+(Integer)changeValue);
							}
						}else{
							session.setAttribute(attribute, (Integer)obj+(Integer)changeValue);
						}
					}else if(changeValue  instanceof String){
						session.setAttribute(attribute, (String)obj+(String)changeValue);
					}
				}
			}else if(CommonDeclare.ESTIMATE_UPFLAG_DELETE_CONST.equals(flag)){
				if(session.getAttribute(attribute)!=null){
					session.removeAttribute(attribute);
				}
			}
		}
	}
	
	/**
	 * 根据要求随机生成随机数(字符)
	 * @param String str="1,2,3,...."or str="1234567" 字符串 如果是数组格式用逗号隔开
	 * @param boolean flag 是否把结果连接成串 true 连接 false 否 
	 * @param int ... n 可选参数 如果有值则按照要求截取 否则按照
	 */
	public static String createRandom(String str , boolean flag,int... n){
		String result="";
		if(str!=null&&!"".equals(str)){
			String[] array=str.split((str.indexOf(",")>-1)?",":"");
			Random random=new Random();
			int i=0;
			if(n!=null&&n.length>0){
				for(int j :n){
					i=j;
				}
			}else{
				i=array.length-1;
			}
			result=array[random.nextInt(i)];
			if(flag){
				StringBuilder builder=new StringBuilder();
				for(int m=0;m<i;m++){
					builder.append(result=array[random.nextInt(i)]);
				}
				result=builder.toString();
			}else{
				result=array[random.nextInt(i)];
			}
		}
		return result;
	}
}

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics