感谢您的功能。我添加了一个ws参数,因为ws不是全局变量,get\u row\u值需要his。
def iter_table_rows(ws:Worksheet, tb:Table) -> Generator[TableRow, None, None]:
"""Iterate over rows from a table with headers (row as dictionary)"""
def get_row_values(ws:Worksheet,row_cell_ref:tuple):
return [ws[c].value for c in row_cell_ref]
iter_rows = rows_from_range(tb.ref)
headers = get_row_values(ws,next(iter_rows))
for row_cells in iter_rows:
yield {h:v for h,v in zip(headers, get_row_values(ws,row_cells))}
目前我还没有找到其他方法。
和
没有“父”属性可以知道在哪个工作表中
是桌子。
最后我得出了一个不同的解决方案。
下面是从json文件中的xlsx表提取数据的函数代码。
我猜这更接近openpyxl的理念。数据存储在工作表中,表似乎是一种指向范围的指针。读取工作表中数据的最有效方法是使用ws。iter\U行指定基于min\U行、max\U行、min\U列、max\U列的范围。
import json
import logging
from argparse import Namespace
from openpyxl import load_workbook
from openpyxl.worksheet.cell_range import CellRange
def xlsx_Json(args: Namespace):
"""Extract table values from an excel workbook in a json file
Args:
args (Namespace): parsed arguments
args.workbook : filename of the excel document
args.worksheet : name of the sheet to read
args.table : name of a table in worksheet
args.output : name of a output file (json format)
Note:
It's assumed that table exists in worksheet, and worksheet exists in workbook, and file exists
"""
#wb = load_workbook(filename=args.workbook,read_only=True) !!! read only worksheet doesn't have table ?
wb = load_workbook(filename=args.workbook)
ws = wb[args.worksheet]
table = ws.tables[args.table]
# table.ref needs to be converted in a CellRange, and use the range to read data in the worksheet
table_range = CellRange(table.ref)
tableJson={ args.table : [] }
header=table.column_names
for nb_rows,row in enumerate(ws.iter_rows(min_row=table_range.min_row+1,
min_col=table_range.min_col,
max_row=table_range.max_row,
max_col=table_range.max_col,
values_only=True )):
# min_row=range.min_row+1 skip the header row
# need to convert row element to string to make a serializablejson object
# value such datetime are not directly serializable
if nb_rows == 0:
for h,x in zip(header,row):
logging.info(f"{h}:type-{type(x)}:value:{x}")
if nb_rows % 100 == 0:
# print row number every 100 lines
logging.info(f"rows:{nb_rows}")
row_Json=dict(zip(header,[str(x) for x in row]))
tableJson[args.table].append(row_Json)
with open(args.json_output,"w") as file_out:
json.dump(tableJson,file_out,indent=4)
logging.info(f"{nb_rows} elements written in {args.json_output}")