4 месяцев назад · a3a41451f0
--- a/services/pdf_parser/__pycache__/workflow.cpython-312.pyc
+++ b/services/pdf_parser/__pycache__/workflow.cpython-312.pyc
--- a/services/pdf_parser/workflow.py
+++ b/services/pdf_parser/workflow.py
@@ -111,7 +111,7 @@ class PDFParsingWorkflow:
 
				             
			
 
				             # 检查响应
			
 
				             if document_info_list and len(document_info_list) > 0:
			
 
				-                document_id = document_info_list[0].id
			
 
				+                document_id = document_info_list[0]["id"]
			
 
				                 print(f"文档上传成功，文档ID: {document_id}")
			
 
				                 return {
			
 
				                     "document_id": document_id
			
@@ -193,7 +193,7 @@ class PDFParsingWorkflow:
 
				         parsed_results = []
			
 
				         
			
 
				         # 使用ThreadPoolExecutor实现并行处理
			
 
				-        with ThreadPoolExecutor(max_workers=10) as executor:
			
 
				+        with ThreadPoolExecutor(max_workers=4) as executor:
			
 
				             # 提交所有页面解析任务
			
 
				             future_to_page = {
			
 
				                 executor.submit(self._parse_single_page, page, self.model_name): page
			
--- a/services/ragflow/__pycache__/document_service.cpython-312.pyc
+++ b/services/ragflow/__pycache__/document_service.cpython-312.pyc
--- a/services/ragflow/document_service.py
+++ b/services/ragflow/document_service.py
@@ -5,13 +5,14 @@ class DocumentService:
 
				         self.http_client = http_client
			
 
				     
			
 
				     def upload_document(self, dataset_id: str, file_path: str) -> List[Dict[str, Any]]:
			
 
				+        import os
			
 
				         endpoint = f"/api/v1/datasets/{dataset_id}/documents"
			
 
				         
			
 
				         with open(file_path, 'rb') as f:
			
 
				-            files = {'file': (file_path.split('/')[-1], f)}
			
 
				-            headers = {'Content-Type': 'multipart/form-data'}
			
 
				-            
			
 
				-            response = self.http_client.post(endpoint, files=files, headers=headers)
			
 
				+            # 使用os.path.basename获取文件名，兼容Windows和Linux
			
 
				+            files = {'file': (os.path.basename(file_path), f)}
			
 
				+            # 不设置Content-Type头，让requests库自动生成正确的multipart/form-data头
			
 
				+            response = self.http_client.post(endpoint, files=files)
			
 
				         
			
 
				         if response.get("code") == 0 and response.get("data"):
			
 
				             return response["data"]
			
--- a/test_upload_document.py
+++ b/test_upload_document.py
@@ -0,0 +1,39 @@
 
				+import sys
			
 
				+import os
			
 
				+
			
 
				+# 添加项目根目录到Python路径
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				+
			
 
				+from services.utils.http_client import HTTPClient
			
 
				+from services.ragflow.document_service import DocumentService
			
 
				+
			
 
				+# 配置信息
			
 
				+API_URL = "http://localhost:8000"  # 替换为实际的RAGFlow API URL
			
 
				+API_KEY = "your_api_key"  # 替换为实际的API密钥
			
 
				+DATASET_ID = "your_dataset_id"  # 替换为实际的数据集ID
			
 
				+PDF_PATH = r"D:\project\work\ragflow_plugs\book\不一样的卡梅拉1-我想去看海.pdf"  # 使用已有的测试PDF文件
			
 
				+
			
 
				+def test_upload_document():
			
 
				+    """测试上传文档功能"""
			
 
				+    try:
			
 
				+        # 创建HTTP客户端实例
			
 
				+        http_client = HTTPClient(base_url=API_URL, api_key=API_KEY)
			
 
				+        
			
 
				+        # 创建文档服务实例
			
 
				+        document_service = DocumentService(http_client)
			
 
				+        
			
 
				+        # 调用上传文档方法
			
 
				+        print(f"开始上传文档: {PDF_PATH}")
			
 
				+        result = document_service.upload_document(DATASET_ID, PDF_PATH)
			
 
				+        
			
 
				+        # 打印结果
			
 
				+        print(f"文档上传成功: {result}")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        print(f"文档上传失败: {str(e)}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        return False
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    test_upload_document()