| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 |
- from typing import Dict, Any, List, Optional
- class DocumentService:
- def __init__(self, http_client):
- self.http_client = http_client
-
- def upload_document(self, dataset_id: str, file_path: str) -> List[Dict[str, Any]]:
- import os
- endpoint = f"/api/v1/datasets/{dataset_id}/documents"
-
- with open(file_path, 'rb') as f:
- # 使用os.path.basename获取文件名,兼容Windows和Linux
- files = {'file': (os.path.basename(file_path), f)}
- # 不设置Content-Type头,让requests库自动生成正确的multipart/form-data头
- response = self.http_client.post(endpoint, files=files)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"上传文档失败: {response.get('message', '未知错误')}")
-
- def update_document(self, dataset_id: str, document_id: str,
- name: str = None, meta_fields: Dict = None,
- chunk_method: str = None, parser_config: Dict = None,
- enabled: int = None) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}"
-
- data = {}
- if name is not None:
- data["name"] = name
- if meta_fields is not None:
- data["meta_fields"] = meta_fields
- if chunk_method is not None:
- data["chunk_method"] = chunk_method
- if parser_config is not None:
- data["parser_config"] = parser_config
- if enabled is not None:
- data["enabled"] = enabled
-
- response = self.http_client.post(endpoint, json=data)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"更新文档失败: {response.get('message', '未知错误')}")
-
- def delete_document(self, dataset_id: str, document_id: str) -> bool:
- endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}"
-
- response = self.http_client.post(endpoint, json={})
-
- if response.get("code") == 0:
- return True
- else:
- raise Exception(f"删除文档失败: {response.get('message', '未知错误')}")
-
- def delete_documents(self, dataset_id: str, document_ids: List[str]) -> bool:
- endpoint = f"/api/v1/datasets/{dataset_id}/documents"
-
- response = self.http_client.post(endpoint, json={"document_ids": document_ids})
-
- if response.get("code") == 0:
- return True
- else:
- raise Exception(f"批量删除文档失败: {response.get('message', '未知错误')}")
-
- def get_document(self, dataset_id: str, document_id: str) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}"
-
- response = self.http_client.get(endpoint)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"获取文档失败: {response.get('message', '未知错误')}")
-
- def list_documents(self, dataset_id: str, page: int = 1, size: int = 20,
- keywords: str = None, document_id: str = None, document_name: str = None,
- suffix: str = None, run: str = None) -> List[Dict[str, Any]]:
- endpoint = f"/api/v1/datasets/{dataset_id}/documents"
-
- params = {"page": page, "page_size": size}
- if keywords is not None:
- params["keywords"] = keywords
- if document_id is not None:
- params["id"] = document_id
- if document_name is not None:
- params["name"] = document_name
- if suffix is not None:
- params["suffix"] = suffix
- if run is not None:
- params["run"] = run
-
- response = self.http_client.get(endpoint, params=params)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"列出文档失败: {response.get('message', '未知错误')}")
-
- def get_document_chunks(self, dataset_id: str, document_id: str,
- keywords: str = None, page: int = 1, size: int = 20,
- chunk_id: str = None) -> List[Dict[str, Any]]:
- endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks"
-
- params = {"page": page, "page_size": size}
- if keywords is not None:
- params["keywords"] = keywords
- if chunk_id is not None:
- params["id"] = chunk_id
-
- response = self.http_client.get(endpoint, params=params)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"获取文档切片失败: {response.get('message', '未知错误')}")
-
- def parse_document(self, dataset_id: str, document_ids: List[str]) -> bool:
- endpoint = f"/api/v1/datasets/{dataset_id}/chunks"
-
- response = self.http_client.post(endpoint, json={"document_ids": document_ids})
-
- if response.get("code") == 0:
- return True
- else:
- raise Exception(f"解析文档失败: {response.get('message', '未知错误')}")
|