背景
建表SQL
测试写法
非事务,循环单次单条插入
有事务,循环单次单条插入(Simple模式 )
批量操作-BEGIN END
批量操作-foreach batch
JDBC原生batch-prepared
测试结果
结论
近期项目要转数据库,需要转移几个亿的数据。用的Mybatis框架,结果开发人员发现 foreach batch写法,在大量数据数据插入的时候效率很低,而且还出现了 ORA-04036 PGA memory 。
那在大量数据操作的情况,用哪种方式效率最高呢? 我这里单独用了半天的时间研究了一下。
强调:效率跟表大小、字段长度有关系,这里的测试不具有普遍性,仅供参考!
DROP TABLE STUDENT ; CREATE TABLE student ( id number(20) PRIMARY KEY, name varchar(20) not NULL, name2 char(20) DEFAULT 'default' NOT NULL, age number(5) ) tablespace TBS_CUR_DAT; COMMENT ON COLUMN student.name IS '姓名'; COMMENT ON COLUMN student.name2 IS '姓名2'; COMMENT ON COLUMN student.age IS '年龄'; INSERT INTO TEST.STUDENT (ID, NAME, NAME2, AGE) VALUES(1, '1', '1', 1);
@GetMapping(value = "/effective/student/insert/for") public String insertFori(int num) { long start = System.currentTimeMillis(); long maxId = studentService.getMaxId() + 1; for (int i = 0; i < num; i++) { long pId = maxId + i; Student student = getStudent(pId + ""); studentService.insert(student); } long end = System.currentTimeMillis(); System.out.println("循环插入-无事务 执行时间:" + (end - start)); return "ok"; }
<insert id="insert" parameterType="com.batch.entity.Student"> INSERT INTO STUDENT(ID, NAME, AGE) VALUES (#{id,jdbcType=NUMERIC}, #{name,jdbcType=VARCHAR}, #{age,jdbcType=DECIMAL}) </insert>
@Transactional @GetMapping(value = "/effective/student/insert/fortrans") public String insertListTrans(int num) { long start = System.currentTimeMillis(); Long maxId = studentService.getMaxId(); long tempMaxId = (maxId == null) ? 0 : studentService.getMaxId() + 1; for (int i = 0; i < num; i++) { long pId = tempMaxId + i; Student student = getStudent(pId + ""); studentService.insert(student); } long end = System.currentTimeMillis(); logger.info("循环插入-有事务 执行时间:" + (end - start)); return "ok"; }
@GetMapping(value = "/effective/student/insert/beginend") public String insertListBeginEnd(int num) { long start = System.currentTimeMillis(); Long maxId = studentService.getMaxId(); long tempMaxId = (maxId == null) ? 0 : studentService.getMaxId() + 1; List<Student> studentList = new ArrayList<>(); for (int i = 0; i < num; i++) { long pId = tempMaxId + i; Student student = getStudent("" + pId); studentList.add(student); } logger.info("Mybatis SQL return :" + studentService.insertListBeginEnd(studentList)); long end = System.currentTimeMillis(); logger.info("单次多条insert批量插入(BEGIN END) 执行时间:" + (end - start)); return "ok"; }
<insert id="insertListBeginEnd"> <foreach collection="studentList" item="item" index="index" open="begin" close=";end;" separator=";"> insert into STUDENT( ID, NAME, AGE )values( #{item.id}, #{item.name}, #{item.age} ) </foreach> </insert>
@Transactional // 对速度基本无影响 @GetMapping(value = "/effective/student/insert/batch") public String insertList2(int num) { long start = System.currentTimeMillis(); Long maxId = studentService.getMaxId(); long tempMaxId = (maxId == null) ? 0 : studentService.getMaxId() + 1; List<Student> studentList = new ArrayList<>(); for (int i = 0; i < num; i++) { long pId = tempMaxId + i; Student student = getStudent(pId + ""); studentList.add(student); } logger.info("Mybatis SQL return :" + studentService.insertListBatch(studentList)); long end = System.currentTimeMillis(); logger.info("批量插入 执行时间:" + (end - start)); return "ok"; }
<insert id="insertListBatch"> insert into STUDENT( ID, NAME, AGE ) <foreach collection="studentList" item="item" index="index" separator="union all"> ( select #{item.id}, #{item.name,jdbcType=VARCHAR}, #{item.age,jdbcType=DECIMAL} from dual ) </foreach> </insert>
.... @Override public void run() { Connection connection = null; PreparedStatement preparedStatement = null; try { connection = oracleConnect.getConnection(); connection.setAutoCommit(false); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Error"); } // 开始时间 Long begin = System.currentTimeMillis(); // insert sql 前缀 String sqlTemplate = "INSERT INTO TEST.STUDENT(ID, NAME, AGE) VALUES (?,?,?)"; PreparedStatement insertStmt = null; try { insertStmt = connection.prepareStatement(sqlTemplate); } catch (SQLException e) { e.printStackTrace(); throw new RuntimeException("创建statement失败."); } try { // 设置事务为非自动提交 connection.setAutoCommit(false); long tempMaxId = (maxId == null) ? 0 : maxId + 1; for (int i = 0; i < this.num; i++) { long pId = tempMaxId + i; Student student = getStudent(pId + ""); insertStmt.setBigDecimal(1, student.getId()); insertStmt.setString(2, student.getName()); insertStmt.setBigDecimal(3, student.getAge()); insertStmt.addBatch(); } /** * executeBatch 返回的是 int[] 数组,和批处理中的执行的SQL一一对应,值代表影响的行数。 * 元素>=0,影响的行数。 * 元素=-2,执行成功,但无法获取影响的行数。 * 元素=-3,执行失败 */ int[] result = insertStmt.executeBatch(); int updateCount = insertStmt.getUpdateCount(); System.out.println("result= " + JSON.toJSONString(result)); System.out.println("updateCount= " + updateCount); // 提交事务, 先关闭自动提交conn.setAutoCommit(false); connection.commit(); } catch (SQLException e) { DbUtil.rollback(connection); e.printStackTrace(); throw new RuntimeException("SQL执行错误"); } finally { // 关闭 Statement DbUtil.close(insertStmt); // 归还连接 connect oracleConnect.returnConnection(connection); } // 结束时间 Long end = System.currentTimeMillis(); System.out.println("线程" + Thread.currentThread().getName() + "数据插入耗时: " + (end - begin) + " ms"); } .....
每个都进行三次测试, 单位是毫秒(ms)
items | 1000 条 | 2500 条 | 5000 条 |
非事务,循环单次单条插入 | 4493,3174,4109 | 11391,11088,13093 | - |
有事务,循环单次单条插入(Simple模式 ) | 1956,1344,1224 | 3437,3393,3460 | 5967,5700,5844 |
批量操作-BEGIN END | 819,103,102 | 3344,219,237 | 18312,652,462 |
批量操作-foreach batch | 453,45,39 | 4063,396,81 | 35399,174,195 |
数据量很大的情况下,上面的方式都不适合了,耗时很长。用下面两个batch方式。
items | 1万条 | 5万条 | 10万条 |
JDBC原生batch-prepared | 281,1297,299 | 1132,1065,735 | 713,758,886 |
有事务,循环单次单条插入(Batch模式) | 227,163,168 | 1007,892,829 | 2004,1699,1739 |
再次强调:效率跟表大小、字段长度有关系,这里的测试不具有普遍性,仅供参考!
转摘还请注明出处,感谢!