from typing import Dict, Any, List, Optional class ChunkService: def __init__(self, http_client): self.http_client = http_client def create_chunk(self, dataset_id: str, document_id: str, content: str, meta_fields: Dict = None) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks" data = {"content": content} if meta_fields is not None: data["meta_fields"] = meta_fields response = self.http_client.post(endpoint, json=data) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"创建切片失败: {response.get('message', '未知错误')}") def update_chunk(self, dataset_id: str, chunk_id: str, content: str = None, meta_fields: Dict = None) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}/chunks/{chunk_id}" data = {} if content is not None: data["content"] = content if meta_fields is not None: data["meta_fields"] = meta_fields response = self.http_client.post(endpoint, json=data) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"更新切片失败: {response.get('message', '未知错误')}") def delete_chunk(self, dataset_id: str, chunk_id: str) -> bool: endpoint = f"/api/v1/datasets/{dataset_id}/chunks/{chunk_id}" response = self.http_client.post(endpoint, json={}) if response.get("code") == 0: return True else: raise Exception(f"删除切片失败: {response.get('message', '未知错误')}") def delete_chunks(self, dataset_id: str, document_id: str, chunk_ids: List[str]) -> bool: endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks" response = self.http_client.post(endpoint, json={"chunk_ids": chunk_ids}) if response.get("code") == 0: return True else: raise Exception(f"批量删除切片失败: {response.get('message', '未知错误')}") def retrieval(self, dataset_ids: List[str], query: str, top_k: int = 5, similarity_threshold: float = 0.1, vector_similarity_weight: float = 0.3, refine: bool = False) -> List[Dict[str, Any]]: endpoint = "/api/v1/retrieval" data = { "dataset_ids": dataset_ids, "query": query, "top_k": top_k, "similarity_threshold": similarity_threshold, "vector_similarity_weight": vector_similarity_weight, "refine": refine } response = self.http_client.post(endpoint, json=data) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"检索失败: {response.get('message', '未知错误')}")