from typing import Dict, Any, List, Optional class DocumentService: def __init__(self, http_client): self.http_client = http_client def upload_document(self, dataset_id: str, file_path: str) -> List[Dict[str, Any]]: endpoint = f"/api/v1/datasets/{dataset_id}/documents" with open(file_path, 'rb') as f: files = {'file': (file_path.split('/')[-1], f)} headers = {'Content-Type': 'multipart/form-data'} response = self.http_client.post(endpoint, files=files, headers=headers) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"上传文档失败: {response.get('message', '未知错误')}") def update_document(self, dataset_id: str, document_id: str, name: str = None, meta_fields: Dict = None, chunk_method: str = None, parser_config: Dict = None, enabled: int = None) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}" data = {} if name is not None: data["name"] = name if meta_fields is not None: data["meta_fields"] = meta_fields if chunk_method is not None: data["chunk_method"] = chunk_method if parser_config is not None: data["parser_config"] = parser_config if enabled is not None: data["enabled"] = enabled response = self.http_client.post(endpoint, json=data) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"更新文档失败: {response.get('message', '未知错误')}") def delete_document(self, dataset_id: str, document_id: str) -> bool: endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}" response = self.http_client.post(endpoint, json={}) if response.get("code") == 0: return True else: raise Exception(f"删除文档失败: {response.get('message', '未知错误')}") def delete_documents(self, dataset_id: str, document_ids: List[str]) -> bool: endpoint = f"/api/v1/datasets/{dataset_id}/documents" response = self.http_client.post(endpoint, json={"document_ids": document_ids}) if response.get("code") == 0: return True else: raise Exception(f"批量删除文档失败: {response.get('message', '未知错误')}") def get_document(self, dataset_id: str, document_id: str) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}" response = self.http_client.get(endpoint) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"获取文档失败: {response.get('message', '未知错误')}") def list_documents(self, dataset_id: str, page: int = 1, size: int = 20, keywords: str = None, document_id: str = None, document_name: str = None, suffix: str = None, run: str = None) -> List[Dict[str, Any]]: endpoint = f"/api/v1/datasets/{dataset_id}/documents" params = {"page": page, "page_size": size} if keywords is not None: params["keywords"] = keywords if document_id is not None: params["id"] = document_id if document_name is not None: params["name"] = document_name if suffix is not None: params["suffix"] = suffix if run is not None: params["run"] = run response = self.http_client.get(endpoint, params=params) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"列出文档失败: {response.get('message', '未知错误')}") def get_document_chunks(self, dataset_id: str, document_id: str, keywords: str = None, page: int = 1, size: int = 20, chunk_id: str = None) -> List[Dict[str, Any]]: endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks" params = {"page": page, "page_size": size} if keywords is not None: params["keywords"] = keywords if chunk_id is not None: params["id"] = chunk_id response = self.http_client.get(endpoint, params=params) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"获取文档切片失败: {response.get('message', '未知错误')}") def parse_document(self, dataset_id: str, document_ids: List[str]) -> bool: endpoint = f"/api/v1/datasets/{dataset_id}/chunks" response = self.http_client.post(endpoint, json={"document_ids": document_ids}) if response.get("code") == 0: return True else: raise Exception(f"解析文档失败: {response.get('message', '未知错误')}")