from PIL import Image, ImageDraw, ImageFont
def visualize_elements_on_page(page_image_path, elements, output_path):
"""
在原始页面上可视化元素位置
Args:
page_image_path: 原始页面图片路径
elements: 元素列表(同一页的元素)
output_path: 输出图片路径
"""
# 加载原始页面图片
image = Image.open(page_image_path)
image_width, image_height = image.size
draw = ImageDraw.Draw(image)
# 定义颜色映射(根据元素类型)
color_map = {
"Title": (255, 0, 0), # 红色
"NarrativeText": (0, 255, 0), # 绿色
"Table": (0, 0, 255), # 蓝色
"Image": (255, 165, 0), # 橙色
"ListItem": (128, 0, 128) # 紫色
}
# 绘制每个元素的边界框
for element in elements:
coords = element.get("metadata", {}).get("coordinates")
if not coords or len(coords) != 8:
continue
# 获取页面尺寸(如果元素中有,否则使用图片尺寸)
page_width = element.get("metadata", {}).get("page_width", image_width)
page_height = element.get("metadata", {}).get("page_height", image_height)
elem_type = element.get("type", "Unknown")
color = color_map.get(elem_type, (128, 128, 128)) # 默认灰色
# 将归一化坐标转换为像素坐标
points = [
(coords[0] * page_width, coords[1] * page_height), # 左上
(coords[2] * page_width, coords[3] * page_height), # 右上
(coords[4] * page_width, coords[5] * page_height), # 右下
(coords[6] * page_width, coords[7] * page_height) # 左下
]
# 绘制边界框
for i in range(4):
start_point = points[i]
end_point = points[(i + 1) % 4]
draw.line([start_point, end_point], fill=color, width=2)
# 可选:添加元素类型标签
try:
font = ImageFont.truetype("arial.ttf", 12)
except:
font = ImageFont.load_default()
label = f"{elem_type}"
draw.text((points[0][0], points[0][1] - 15), label, fill=color, font=font)
# 保存结果
image.save(output_path)
print(f"可视化结果已保存到: {output_path}")
# 使用示例
elements = [
{
"type": "Title",
"text": "第一章 简介",
"metadata": {
"coordinates": [0.1008, 0.1069, 0.8228, 0.1069, 0.8228, 0.1425, 0.1008, 0.1425],
"page_number": 1,
"page_width": 1191,
"page_height": 1684
}
},
{
"type": "NarrativeText",
"text": "这是正文内容...",
"metadata": {
"coordinates": [0.1822, 0.2316, 0.6717, 0.2316, 0.6717, 0.2732, 0.1822, 0.2732],
"page_number": 1,
"page_width": 1191,
"page_height": 1684
}
}
]
visualize_elements_on_page(
page_image_path="page_1.png",
elements=elements,
output_path="annotated_page_1.png"
)