import re
if "result" in json_response and "markdown" in json_response["result"]:
markdown_content = json_response["result"]["markdown"]
# 提取所有表格
tables = re.findall(r'(?:\|.*\n)+', markdown_content)
tables_md = '\n'.join(tables)
# 保存为md文件
with open("tables.md", "w", encoding="utf-8") as f:
f.write(tables_md)
tables_json = []
for page in json_response["result"]["pages"]:
for block in page.get("structured", []):
if block.get("type") == "table":
tables_json.append(block)
# 保存为 json 文件
with open("tables.json", "w", encoding="utf-8") as f:
json.dump(tables_json, f, ensure_ascii=False, indent=2)