| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181 |
- from typing import Dict, Any, List, Optional
- class DatasetService:
- def __init__(self, http_client):
- self.http_client = http_client
-
- def create_dataset(self, name: str, description: str = None,
- embedding_model: str = None, permission: str = None,
- chunk_method: str = None, parser_config: dict = None) -> Dict[str, Any]:
- endpoint = "/api/v1/datasets"
-
- data = {"name": name}
- if description is not None:
- data["description"] = description
- if embedding_model is not None:
- data["embedding_model"] = embedding_model
- if permission is not None:
- data["permission"] = permission
- if chunk_method is not None:
- data["chunk_method"] = chunk_method
- if parser_config is not None:
- data["parser_config"] = parser_config
-
- response = self.http_client.post(endpoint, json_data=data)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"创建数据集失败: {response.get('message', '未知错误')}")
-
- def delete_datasets(self, dataset_ids: List[str]) -> bool:
- endpoint = "/api/v1/datasets"
-
- response = self.http_client.delete(endpoint, json_data={"ids": dataset_ids})
-
- if response.get("code") == 0:
- return True
- else:
- raise Exception(f"删除数据集失败: {response.get('message', '未知错误')}")
-
- def update_dataset(self, dataset_id: str, name: str = None,
- description: str = None, embedding_model: str = None,
- permission: str = None, chunk_method: str = None) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}"
-
- data = {}
- if name is not None:
- data["name"] = name
- if description is not None:
- data["description"] = description
- if embedding_model is not None:
- data["embedding_model"] = embedding_model
- if permission is not None:
- data["permission"] = permission
- if chunk_method is not None:
- data["chunk_method"] = chunk_method
-
- response = self.http_client.post(endpoint, json=data)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"更新数据集失败: {response.get('message', '未知错误')}")
-
- def list_datasets(self, page: int = 1, size: int = 20, orderby: str = "create_time",
- desc: bool = True, name: str = None, dataset_id: str = None) -> List[Dict[str, Any]]:
- endpoint = "/api/v1/datasets"
-
- params = {"page": page, "page_size": size, "orderby": orderby, "desc": int(desc)}
- if name is not None:
- params["name"] = name
- if dataset_id is not None:
- params["id"] = dataset_id
-
- response = self.http_client.get(endpoint, params=params)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- elif response.get("code") == 108:
- return None
- else:
- raise Exception(f"列出数据集失败: {response.get('message', '未知错误')}")
-
- def get_dataset(self, dataset_id: str) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}"
-
- response = self.http_client.get(endpoint)
-
- if response.get("code") == 0 and response.get("data"):
- return response["data"]
- else:
- raise Exception(f"获取数据集失败: {response.get('message', '未知错误')}")
-
- def get_knowledge_graph(self, dataset_id: str) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}/knowledge_graph"
-
- response = self.http_client.get(endpoint)
-
- if response.get("code") == 0:
- return response.get("data", {})
- else:
- raise Exception(f"获取知识图谱失败: {response.get('message', '未知错误')}")
-
- def delete_knowledge_graph(self, dataset_id: str) -> bool:
- endpoint = f"/api/v1/datasets/{dataset_id}/knowledge_graph"
-
- response = self.http_client.post(endpoint, json_data={})
-
- if response.get("code") == 0:
- return True
- else:
- raise Exception(f"删除知识图谱失败: {response.get('message', '未知错误')}")
-
- def trace_graphrag(self, dataset_id: str) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}/trace_graphrag"
-
- response = self.http_client.get(endpoint)
-
- if response.get("code") == 0:
- return response.get("data", {})
- else:
- raise Exception(f"获取GraphRAG追踪失败: {response.get('message', '未知错误')}")
-
- def trace_raptor(self, dataset_id: str) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}/trace_raptor"
-
- response = self.http_client.get(endpoint)
-
- if response.get("code") == 0:
- return response.get("data", {})
- else:
- raise Exception(f"获取RAPTOR追踪失败: {response.get('message', '未知错误')}")
-
- def get_metadata_summary(self, dataset_id: str) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}/metadata/summary"
-
- response = self.http_client.get(endpoint)
-
- if response.get("code") == 0:
- return response.get("data", {})
- else:
- raise Exception(f"获取元数据摘要失败: {response.get('message', '未知错误')}")
-
- def update_metadata(self, dataset_id: str, metadata: Dict = None,
- document_ids: List[str] = None, metadata_condition: Dict = None) -> bool:
- endpoint = f"/api/v1/datasets/{dataset_id}/metadata/update"
-
- data = {}
- if metadata is not None:
- data["metadata"] = metadata
- if document_ids is not None:
- data["document_ids"] = document_ids
- if metadata_condition is not None:
- data["metadata_condition"] = metadata_condition
-
- response = self.http_client.post(endpoint, json=data)
-
- if response.get("code") == 0:
- return True
- else:
- raise Exception(f"更新元数据失败: {response.get('message', '未知错误')}")
-
- def run_graphrag(self, dataset_id: str, mode: str = "light") -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}/run_graphrag"
-
- response = self.http_client.post(endpoint, json_data={"mode": mode})
-
- if response.get("code") == 0:
- return response.get("data", {})
- else:
- raise Exception(f"运行GraphRAG失败: {response.get('message', '未知错误')}")
-
- def run_raptor(self, dataset_id: str) -> Dict[str, Any]:
- endpoint = f"/api/v1/datasets/{dataset_id}/run_raptor"
-
- response = self.http_client.post(endpoint)
-
- if response.get("code") == 0:
- return response.get("data", {})
- else:
- raise Exception(f"运行RAPTOR失败: {response.get('message', '未知错误')}")
|