|
|
@@ -128,11 +128,32 @@ class SlidingWindowStitchingNode(BaseNode):
|
|
|
|
|
|
logger.info(f"开始滑动窗口拼接,共 {len(split_pages)} 页,窗口大小: {self.window_size}")
|
|
|
|
|
|
- # 按页码排序
|
|
|
- sorted_pages = sorted(split_pages, key=lambda x: x.get('page_number', 0))
|
|
|
+ # 按页码排序(安全处理)
|
|
|
+ sorted_pages = []
|
|
|
+ for page in split_pages:
|
|
|
+ if isinstance(page, dict) and 'page_number' in page:
|
|
|
+ sorted_pages.append(page)
|
|
|
+ else:
|
|
|
+ logger.warning(f"跳过无效的页面数据: {type(page)}")
|
|
|
+
|
|
|
+ sorted_pages.sort(key=lambda x: x.get('page_number', 0))
|
|
|
+
|
|
|
+ if not sorted_pages:
|
|
|
+ raise ValueError("没有有效的页面数据可以处理")
|
|
|
+
|
|
|
+ # 创建页码到图片的映射(安全处理)
|
|
|
+ page_map = {}
|
|
|
+ for page in sorted_pages:
|
|
|
+ page_num = page.get('page_number')
|
|
|
+ page_img = page.get('image')
|
|
|
+ if page_num is not None and page_img is not None:
|
|
|
+ page_map[page_num] = page_img
|
|
|
+ else:
|
|
|
+ logger.warning(f"页面数据缺少必要字段: page_number={page_num}, has_image={page_img is not None}")
|
|
|
+
|
|
|
+ if not page_map:
|
|
|
+ raise ValueError("没有有效的页码-图片映射")
|
|
|
|
|
|
- # 创建页码到图片的映射
|
|
|
- page_map = {page.get('page_number'): page.get('image') for page in sorted_pages}
|
|
|
page_numbers = sorted(page_map.keys())
|
|
|
|
|
|
windowed_pages = []
|