| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- from typing import Dict, Any, List, Optional
- class ChunkService:
- def __init__(self, http_client):
- self.http_client = http_client
-
- def create_chunk(self, dataset_id: str, document_id: str, content: str,
- meta_fields: Dict = None) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks"
-
- data = {"content": content}
- if meta_fields is not None:
- data["meta_fields"] = meta_fields
-
- response = self.http_client.post(endpoint, json=data)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"创建切片失败: {response.get('message', '未知错误')}")
-
- def update_chunk(self, dataset_id: str, chunk_id: str, content: str = None,
- meta_fields: Dict = None) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}/chunks/{chunk_id}"
-
- data = {}
- if content is not None:
- data["content"] = content
- if meta_fields is not None:
- data["meta_fields"] = meta_fields
-
- response = self.http_client.post(endpoint, json=data)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"更新切片失败: {response.get('message', '未知错误')}")
-
- def delete_chunk(self, dataset_id: str, chunk_id: str) -> bool:
- endpoint = f"/api/v1/datasets/{dataset_id}/chunks/{chunk_id}"
-
- response = self.http_client.post(endpoint, json={})
-
- if response.get("code") == 0:
- return True
- else:
- raise Exception(f"删除切片失败: {response.get('message', '未知错误')}")
-
- def delete_chunks(self, dataset_id: str, document_id: str, chunk_ids: List[str]) -> bool:
- endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks"
-
- response = self.http_client.post(endpoint, json={"chunk_ids": chunk_ids})
-
- if response.get("code") == 0:
- return True
- else:
- raise Exception(f"批量删除切片失败: {response.get('message', '未知错误')}")
-
- def retrieval(self, dataset_ids: List[str], query: str, top_k: int = 5,
- similarity_threshold: float = 0.1, vector_similarity_weight: float = 0.3,
- refine: bool = False) -> List[Dict[str, Any]]:
- endpoint = "/api/v1/retrieval"
-
- data = {
- "dataset_ids": dataset_ids,
- "query": query,
- "top_k": top_k,
- "similarity_threshold": similarity_threshold,
- "vector_similarity_weight": vector_similarity_weight,
- "refine": refine
- }
-
- response = self.http_client.post(endpoint, json=data)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"检索失败: {response.get('message', '未知错误')}")
|