在这个脚本中我使用的是Python的ORM框架SQLAlchemy来连接远程的MySQL数据库的,在写这个脚本的时候为了方便参数的输入,我就给之前和这个脚本添加了一个新的功能,就是在调用这个脚本的时候后面可以传入参数,这样只需要在执行脚本的时候就可以方便的输入所要使用的路径。
那么现在开始对导出的数据进行解析咯~由于之前已经将列名和数据都写入到了文本文件中, 现在只需要一行一行的读取出来,然后和类中的属性一一对应就可以了,那么现在开始读取资料吧~老样子,先用切片将之前写在文本文件中的行列名取出来咯~然后我定义了一个字典,其中key值存入列名,value中存数组的下标,这样在后面赋值的时候就可以一个属性名对应一个列名了,不仅逻辑清晰,代码也好看多了,这样再使用Python中的split函数将字符串转换成list来读取到类中,然后每读取1000次再将session提交到数据库一次,这样可以大幅度的减小对数据库的操作,而提高性能。下面就附上代码了:
from sqlalchemy.orm import mapper, sessionmaker from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData,Double,TIMESTAMP from sqlalchemy.sql.expression import Cast from sqlalchemy.ext.compiler import compiles from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.dialects.mysql import \ BIGINT, BINARY, BIT, BLOB, BOOLEAN, CHAR, DATE, \ DATETIME, DECIMAL, DECIMAL, DOUBLE, ENUM, FLOAT, INTEGER, \ LONGBLOB, LONGTEXT, MEDIUMBLOB, MEDIUMINT, MEDIUMTEXT, NCHAR, \ NUMERIC, NVARCHAR, REAL, SET, SMALLINT, TEXT, TIME, TIMESTAMP, \ TINYBLOB, TINYINT, TINYTEXT, VARBINARY, VARCHAR, YEAR import os import re,pdb,sys,getopt opts, args = getopt.getopt(sys.argv[1:], "hf:u:p:i:d",["help","file=","user=","pass=","ip=","db="]) input_file="" username="" password="" host="" databasename="" def usage(): print """ -h --help print the help -g --gbk The path of the backup file -b --gdb Have to backup the database path """ for op, value in opts: if op in ("-f","--file"): input_file = value elif op in ("-u","--user"): username = value elif op in ("-p","--pass"): password = value elif op in ("-i","--ip"): host = value elif op in ("-d","--db"): databasename = value elif op in ("-h","--help"): usage() sys.exit() Base = declarative_base() mysql_db = create_engine('mysql://'+username+':'+password+'@'+host+':3306/'+databasename+'') metadata.create_all(mysql_db) class User(Base): __tablename__ = 'users' DAY_BATCH_ID = Column(Integer, primary_key=True) ITEMID = Column(Integer, primary_key=True) NETSALES = Column(DOUBLE, nullable=True) NETQTY = Column(DOUBLE, nullable=True) VOIDSALES = Column(DOUBLE, nullable=True) VOIDQTY = Column(DOUBLE, nullable=True) RETURNSALES = Column(DOUBLE, nullable=True) RETURNQTY = Column(DOUBLE, nullable=True) DISCOUNTSALES = Column(DOUBLE, nullable=True) DISCOUNTQTY = Column(DOUBLE, nullable=True) OTHQTY = Column(DOUBLE, nullable=True) STAFFQTY = Column(DOUBLE, nullable=True) WASTEQTY = Column(DOUBLE, nullable=True) MODIFQTY = Column(DOUBLE, nullable=True) COST = Column(DOUBLE, nullable=True) UNITCOST = Column(DOUBLE, nullable=True) SALEQTY = Column(DOUBLE, nullable=True) SALEAMT = Column(DOUBLE, nullable=True) ADJ_AMT = Column(DOUBLE, nullable=True) OVERRIDE_AMT = Column(DOUBLE, nullable=True) OVERRIDE_QTY = Column(Integer, nullable=True) OTHAMT = Column(DOUBLE, nullable=True) WASTEAMT = Column(DOUBLE, nullable=True) TAXAMT = Column(DOUBLE, nullable=True) SELFCOMMISSION = Column(DOUBLE, nullable=True) FULLCOMMISSION = Column(DOUBLE, nullable=True) NET_ITEMS = Column(Integer, nullable=True) VOID_ITEMS = Column(Integer, nullable=True) RETURN_ITEMS = Column(Integer, nullable=True) DISCOUNT_ITEMS = Column(Integer, nullable=True) OTH_ITEMS = Column(Integer, nullable=True) SALE_ITEMS = Column(Integer, nullable=True) MODIF_ITEMS = Column(Integer, nullable=True) WASTE_ITEMS = Column(Integer, nullable=True) TICKETCOUNT = Column(Integer, nullable=True) AVG_COST = Column(DOUBLE, nullable=True) VOIDSALES_TAX = Column(DOUBLE, nullable=True) RETURNSALES_TAX = Column(DOUBLE, nullable=True) DISCOUNTSALES_TAX = Column(DOUBLE, nullable=True) OVERRIDE_AMT_TAX = Column(DOUBLE, nullable=True) OTHAMT_TAX = Column(DOUBLE, nullable=True) WASTEAMT_TAX = Column(DOUBLE, nullable=True) SALESAMT_TAX = Column(DOUBLE, nullable=True) LAST_TRANS_TIME = Column(TIMESTAMP, nullable=True) mapper(User, userTable) Session = sessionmaker() Session.configure(bind=mysql_db) session = Session() def main(): file1 = open(input_file).readlines() #通过切片取得行中的值并将行的字符串组装成数组保持在字典中 file2 = file1[:1] for line in lines2: result = line.split(",") mydic={} for v,k in enumerate(result): mydic[k]=v #取得文件中从第二行开始的所有数据,通过切片去除行的数据 file3 = file1[1:-1] for line1 in file3: commint = 0 u = User() line1 = line1.split(',') u.DAY_BATCH_ID = line1[mydic['DAY_BATCH_ID']] u.ITEMID = line1[mydic['ITEMID']] u.NETSALES = line1[mydic['NETSALES']] u.NETQTY = line1[mydic['NETQTY']] u.VOIDSALES = line1[mydic['VOIDSALES']] u.VOIDQTY = line1[mydic['VOIDQTY']] u.RETURNSALES = line1[mydic['RETURNSALES']] u.RETURNQTY = line1[mydic['RETURNQTY']] u.DISCOUNTSALES = line1[mydic['DISCOUNTSALES']] u.DISCOUNTQTY = line1[mydic['DISCOUNTQTY']] u.OTHQTY = line1[mydic['OTHQTY']] u.STAFFQTY = line1[mydic['STAFFQTY']] u.WASTEQTY = line1[mydic['WASTEQTY']] u.MODIFQTY = line1[mydic['MODIFQTY']] u.COST = line1[mydic['COST']] u.UNITCOST = line1[mydic['UNITCOST']] u.SALEQTY = line1[mydic['SALEQTY']] u.SALEAMT = line1[mydic['SALEAMT']] u.ADJ_AMT = line1[mydic['ADJ_AMT']] u.OVERRIDE_AMT = line1[mydic['OVERRIDE_AMT']] u.OVERRIDE_QTY = line1[mydic['OVERRIDE_QTY']] u.OTHAMT = line1[mydic['THAMT']] u.WASTEAMT = line1[mydic['WASTEAMT']] u.TAXAMT = line1[mydic['TAXAMT']] u.SELFCOMMISSION = line1[mydic['SELFCOMMISSION']] u.FULLCOMMISSION = line1[mydic['FULLCOMMISSION']] u.NET_ITEMS = line1[mydic['NET_ITEMS']] u.VOID_ITEMS = line1[mydic['VOID_ITEMS']] u.RETURN_ITEMS = line1[mydic['RETURN_ITEMS']] u.DISCOUNT_ITEMS = line1[mydic['DISCOUNT_ITEMS']] u.OTH_ITEMS = line1[mydic['OTH_ITEMS']] u.SALE_ITEMS = line1[mydic['SALE_ITEMS']] u.MODIF_ITEMS = line1[mydic['MODIF_ITEMS']] u.WASTE_ITEMS = line1[mydic['WASTE_ITEMS']] u.TICKETCOUNT = line1[mydic['TICKETCOUNT']] u.AVG_COST = line1[mydic['AVG_COST']] u.VOIDSALES_TAX = line1[mydic['VOIDSALES_TAX']] u.RETURNSALES_TAX = line1[mydic['RETURNSALES_TAX']] u.DISCOUNTSALES_TAX = line1[mydic['DISCOUNTSALES_TAX']] u.OVERRIDE_AMT_TAX = line1[mydic['OVERRIDE_AMT_TAX']] u.OTHAMT_TAX = line1[mydic['OTHAMT_TAX']] u.WASTEAMT_TAX = line1[mydic['WASTEAMT_TAX']] u.SALESAMT_TAX = line1[mydic['SALESAMT_TAX']] u.LAST_TRANS_TIME = line1[mydic['LAST_TRANS_TIME']] session.add(u) commint = commint+1 if commint>1000: commint = commint %1000 session.commit() session.close() if __name__ == '__main__': main()