python从入门到实践,文件读写与Excel操作

文件读写操作通过open()函数进行操作,使用with关键字读写文件就不用再惦记着要何时关闭文件 。

先来看一个简单常规的文件读写操作;

f = open('test.txt', mode='w ', encoding='utf-8')f.write(','.join([t for t in range(1000)]))f.close()f = open('test.txt', mode='r', encoding='utf-8')print(f.read())f.close()

再看一个省心的文件读写操作

def write_csv_file(file_path, data, data_type="list"):"""爬虫输入写入Excel文件"""head = ["标题", "小区", "房厅", "面积", "朝向", "楼层", "年份","位置", "总价(万)", "单价(元/平方米)"]keys = ["title", "house", "bedroom", "area", "direction","floor", "year", "location", "total_price", "unit_price"]try:with open(file_path, 'w', newline='', encoding='utf_8_sig') as csv_file:writer = csv.writer(csv_file, dialect='excel')if head is not None:writer.writerow(head)if data_type == "list":for item in data:row_data = []for k in keys:row_data.append(item[k])writer.writerow(row_data)else:# 如果队列不为空,写入每行数据while not data.empty():item = data.get()if item:row_data = []for k in keys:row_data.append(item[k])writer.writerow(row_data)print("Write a CSV file to path %s Successful." % file_path)except Exception as e:print("Fail to write CSV to path: %s, Case: %s" % (file_path, e))def read_file():with open("home.csv", mode='r', encoding='utf-8') as f:for i in f.readlines():print(i)

Excel操作

Excel文件操作常用的库有xlrd,xlwt, openpyxl, xlswriter已经pandas等;
  • pandas:数据处理最常用的分析库之一,可以读取各种各样格式的数据文件,一般输出dataframe格式,功能强大;
  • openpyxl:主要针对xlsx格式的excel进行读取和编辑;
  • xlrd库:从excel中读取数据,支持xls;
  • xlwt库:对excel进行修改操作,不支持对xlsx格式的修改;
  • xlutils库:在xlwt和xlrd中,对一个已存在的文件进行修改
  • xlwings:对xlsx、xls、xlsm格式文件进行读写、格式修改等操作
  • xlsxwriter:用来生成excel表格,插入数据、插入图标等表格操作,不支持读取
  1. xlwt库创建Excel
# 设置表格样式def set_style(name, height, bold=False):style = xlwt.XFStyle()font = xlwt.Font()font.name = namefont.bold = boldfont.color_index = 4font.height = heightstyle.font = fontreturn styledef write_excel(file_name: str, info: List,table_head: Optional[List], sheet: str = "data",save_path: str = "",*args, **kwargs) -> str:""" xlwt库创建Excel"""try:file_name= ".xls"save_file = os.path.join(save_path, file_name) if save_path else file_name# Excel文件保存路径workbook = xlwt.Workbook()# 创建Excelsheet1 = workbook.add_sheet(sheet, cell_overwrite_ok=True)# 增加sheet页for i in range(0, len(table_head)):# 写表头数据sheet1.write(0, i, table_head[i], set_style('Times New Roman', 220, True))for i in range(len(info)):# 从第二行开始写数据item = info[i]for j in range(len(table_head)):field = table_head[j]sheet1.write(i1, j, item[field], set_style('Times New Roman', 220, True))workbook.save(save_file)logger.info(f"Excel文件: {file_name}, 保存成功")return save_fileexcept Exception as e:logger.exception(f"Excel文件处理异常: {str(e)}")return ""
  1. xlrd包读取Excel文件
def read_excel(file_name: str, sheet_name: str = "",sheet_index: int = 0, data_type: int = 0) -> List:""" xlrd包读取Excel文件 只支持xls:param file_name: 文件路径:param sheet_name: 要读取的sheet名称:param sheet_index: 要读取的sheet下标:param data_type: 数据类型 0: list, 1: dict:return:"""wb = xlrd.open_workbook(filename=file_name)# 打开文件sheet1 = wb.sheet_by_name(sheet_name) if sheet_name else wb.sheet_by_index(sheet_index)# 通过sheet名称或索引获取表格rows = sheet1.nrows# sheet页行数colums = sheet1.ncols# sheet页列数table_head = sheet1.row_values(0)# 获取表头logger.info(f"table_head: {table_head}")li = [table_head]for i in range(1, rows):# sheet页数据添加到li中if data_type == 0:li.append(sheet1.row_values(i))# 数据形式为列表else:di = {}for idx, t in enumerate(table_head):di[t] = sheet1.row_values(i)[idx]li.append(di)return li
  1. xlsxwriter库进行创建Excel
def write_excel_xlsx(file_name: str, info: List,table_head: Optional[List], sheet: str = "data",save_path: str = ""):""" 数据写入Excel , 导出Excel文件, 支持xlsx类型使用xlsxwriter库:param file_name: excel文件名称:param info: 写入的数据:param table_head: 数据表头:param sheet: sheet页名称:param save_path: 保存路径:return:"""try:down_path = os.path.expanduser("~")# 用户目录file_name= '.xlsx'down_path_file = os.path.join(save_path, file_name) if save_path elseos.path.join(down_path, file_name)# excel文件地址workbook = xlsxwriter.Workbook(down_path_file)# 创建Excel文件worksheet = workbook.add_worksheet(sheet)# 创建sheet,名为dataformat1 = workbook.add_format({'bold': True, 'font_color': 'black', 'font_size': 13, 'align': 'left', 'font_name': u'宋体'})# 表头格式format2 = workbook.add_format({'font_color': 'black', 'font_size': 11, 'align': 'left', 'font_name': u'宋体'}) # 表头外格式worksheet.set_column("A:A", 10)# A列列宽设置能更好的显示for i in range(0, len(table_head)):# 插入第一行表头标题field = table_head[i]worksheet.write(0, i, field, format1)# 从第二行开始插入数据for i in range(len(info)):item = info[i]# 行数据for j in range(len(table_head)):field = table_head[j]worksheet.write(i1, j, item[field], format2)workbook.close()alert_text = '导出成功,导出地址:{}!'.format(down_path_file)return down_path_fileexcept Exception as e:logger.exception("导出Excel异常: {}".format(e))return ''
  1. openpyxl库进行写Excel
def openpyxl_write_excel(file_name: str, info: List,table_head: Optional[List], sheet: str = "data",save_path: str = ""):""" 数据写入Excel,导出Excel文件, 支持xlsx类型使用openpyxl库raise ValueError("Cannot convert {0!r} to Excel".format(value)) 数据需要为字符串"""down_path = os.path.expanduser("~")# 用户目录file_name= '.xlsx'down_path_file = os.path.join(save_path, file_name) if save_path elseos.path.join(down_path,file_name)# excel文件地址wb = Workbook()# 创建一个新的Excel表, 只写模式write_only=True , 默认读写模式sheet1 = wb.active# 激活当前sheet页# sheet1 = wb.create_sheet(sheet)# sheet页面末尾新建一个sheet页create_sheet(sheet, index=0) 在第0位置新建sheet1.append(table_head)for t in info:if isinstance(t, list):t = [str(s) for s in t]sheet1.append(t)elif isinstance(t, dict):fields = [str(t[s]) for s in table_head]sheet1.append(fields)wb.save(down_path_file)return down_path_file
  1. openpyxl库进行读Excel
def openpyxl_read_excel(file_name: str, sheet_name: str = "", sheet_index: int = 0, data_type: int = 0) -> List:""" 读取Excel文件使用openpyxl库param data_type: 列表中数据类型 0:list, 1: dict"""workbook = load_workbook(file_name)# 加载Excel文件# booksheet = workbook.active# 获取当前活跃的sheet,默认是第一个sheet# 如果想获取别的sheet页采取下面这种方式,先获取所有sheet页名,在通过指定那一页 。sheets = workbook.sheetnames# workbook.get_sheet_names()# 从名称获取sheet# booksheet = workbook.get_sheet_by_name(sheet_name)try:booksheet = workbook[sheet_name] if sheet_name else workbook[sheets[sheet_index]]except Exception as e:booksheet = workbook[sheets[0]]rows = booksheet.rows# 获取sheet页的行数据columns = booksheet.columns# 获取sheet页的列数据li = []table_head = []for idx, row in enumerate(rows):# 迭代所有的行line = [col.value for col in row]if idx == 0:table_head.extend(line)# 列表数据追加到列表continueif data_type == 0:li.append(line)# 数据形式为列表else:di = dict(zip(table_head, line))# 数据转换为字典li.append(di)return li
  1. pandas操作Excel
def pd_excel_write_info(path, file, data, abs_path, **kwargs):info = {}for i in data:info = iinfo['path'] = abs_pathf = os.path.join(path, file)df = pd.DataFrame(info)df.to_excel(f)def pd_excel_info(file, sheetname=0, **kwargs):converters = kwargs.get('converters', {})# 列转码try:data = pd.read_excel(file, sheet_name=sheetname, converters=converters)li = []for ix, row in data.iterrows():try:li.append(dict(row))# 按行添加数据 以dict/list形式添加except Exception as e:logger.error(e)return liexcept Exception as e:return None
根据以上库的使用,可以看出openpyxl和pandas的功能更加健全和强大,尤其是pandas , 对于数据操作那简直方便的飞起!
以下是各个库是性能对比,该图是从知乎网上得来的,如有侵权边删!
python从入门到实践,文件读写与Excel操作



【python从入门到实践,文件读写与Excel操作】#头条创作挑战赛##python#

相关经验推荐