|
|
@@ -5,6 +5,7 @@ PDF拆分节点
|
|
|
"""
|
|
|
|
|
|
from typing import Dict, Any
|
|
|
+import os
|
|
|
from src.datasets.parser.core.base import BaseNode, BaseState
|
|
|
from src.datasets.parser.core.registry import register_node
|
|
|
from src.common.logging_config import get_logger
|
|
|
@@ -54,8 +55,12 @@ class PDFSplitNode(BaseNode):
|
|
|
else:
|
|
|
logger.info(f"开始拆分PDF: {pdf_path}")
|
|
|
|
|
|
- # 根据pdf_path获取原始文件名(去除后缀)
|
|
|
- original_filename = pdf_path.split('/')[-1].split('.')[0] if pdf_path else None
|
|
|
+ # 根据pdf_path获取原始文件名(去除后缀)需要同时适配windows和linux
|
|
|
+ if pdf_path:
|
|
|
+ base_name = os.path.basename(pdf_path)
|
|
|
+ original_filename = os.path.splitext(base_name)[0]
|
|
|
+ else:
|
|
|
+ original_filename = None
|
|
|
|
|
|
# 拆分PDF
|
|
|
splitter = PDFSplitter()
|