from typing import Dict, Any, List, Optional class DatasetService: def __init__(self, http_client): self.http_client = http_client def create_dataset(self, name: str, description: str = None, embedding_model: str = None, permission: str = None, chunk_method: str = None, parser_config: dict = None) -> Dict[str, Any]: endpoint = "/api/v1/datasets" data = {"name": name} if description is not None: data["description"] = description if embedding_model is not None: data["embedding_model"] = embedding_model if permission is not None: data["permission"] = permission if chunk_method is not None: data["chunk_method"] = chunk_method if parser_config is not None: data["parser_config"] = parser_config response = self.http_client.post(endpoint, json_data=data) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"创建数据集失败: {response.get('message', '未知错误')}") def delete_datasets(self, dataset_ids: List[str]) -> bool: endpoint = "/api/v1/datasets" response = self.http_client.delete(endpoint, json_data={"ids": dataset_ids}) if response.get("code") == 0: return True else: raise Exception(f"删除数据集失败: {response.get('message', '未知错误')}") def update_dataset(self, dataset_id: str, name: str = None, description: str = None, embedding_model: str = None, permission: str = None, chunk_method: str = None) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}" data = {} if name is not None: data["name"] = name if description is not None: data["description"] = description if embedding_model is not None: data["embedding_model"] = embedding_model if permission is not None: data["permission"] = permission if chunk_method is not None: data["chunk_method"] = chunk_method response = self.http_client.post(endpoint, json=data) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"更新数据集失败: {response.get('message', '未知错误')}") def list_datasets(self, page: int = 1, size: int = 20, orderby: str = "create_time", desc: bool = True, name: str = None, dataset_id: str = None) -> List[Dict[str, Any]]: endpoint = "/api/v1/datasets" params = {"page": page, "page_size": size, "orderby": orderby, "desc": int(desc)} if name is not None: params["name"] = name if dataset_id is not None: params["id"] = dataset_id response = self.http_client.get(endpoint, params=params) if response.get("code") == 0 and response.get("data"): return response["data"] elif response.get("code") == 108: return None else: raise Exception(f"列出数据集失败: {response.get('message', '未知错误')}") def get_dataset(self, dataset_id: str) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}" response = self.http_client.get(endpoint) if response.get("code") == 0 and response.get("data"): return response["data"] else: raise Exception(f"获取数据集失败: {response.get('message', '未知错误')}") def get_knowledge_graph(self, dataset_id: str) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}/knowledge_graph" response = self.http_client.get(endpoint) if response.get("code") == 0: return response.get("data", {}) else: raise Exception(f"获取知识图谱失败: {response.get('message', '未知错误')}") def delete_knowledge_graph(self, dataset_id: str) -> bool: endpoint = f"/api/v1/datasets/{dataset_id}/knowledge_graph" response = self.http_client.post(endpoint, json_data={}) if response.get("code") == 0: return True else: raise Exception(f"删除知识图谱失败: {response.get('message', '未知错误')}") def trace_graphrag(self, dataset_id: str) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}/trace_graphrag" response = self.http_client.get(endpoint) if response.get("code") == 0: return response.get("data", {}) else: raise Exception(f"获取GraphRAG追踪失败: {response.get('message', '未知错误')}") def trace_raptor(self, dataset_id: str) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}/trace_raptor" response = self.http_client.get(endpoint) if response.get("code") == 0: return response.get("data", {}) else: raise Exception(f"获取RAPTOR追踪失败: {response.get('message', '未知错误')}") def get_metadata_summary(self, dataset_id: str) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}/metadata/summary" response = self.http_client.get(endpoint) if response.get("code") == 0: return response.get("data", {}) else: raise Exception(f"获取元数据摘要失败: {response.get('message', '未知错误')}") def update_metadata(self, dataset_id: str, metadata: Dict = None, document_ids: List[str] = None, metadata_condition: Dict = None) -> bool: endpoint = f"/api/v1/datasets/{dataset_id}/metadata/update" data = {} if metadata is not None: data["metadata"] = metadata if document_ids is not None: data["document_ids"] = document_ids if metadata_condition is not None: data["metadata_condition"] = metadata_condition response = self.http_client.post(endpoint, json=data) if response.get("code") == 0: return True else: raise Exception(f"更新元数据失败: {response.get('message', '未知错误')}") def run_graphrag(self, dataset_id: str, mode: str = "light") -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}/run_graphrag" response = self.http_client.post(endpoint, json_data={"mode": mode}) if response.get("code") == 0: return response.get("data", {}) else: raise Exception(f"运行GraphRAG失败: {response.get('message', '未知错误')}") def run_raptor(self, dataset_id: str) -> Dict[str, Any]: endpoint = f"/api/v1/datasets/{dataset_id}/run_raptor" response = self.http_client.post(endpoint) if response.get("code") == 0: return response.get("data", {}) else: raise Exception(f"运行RAPTOR失败: {response.get('message', '未知错误')}")