dataset_service.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. from typing import Dict, Any, List, Optional
  2. class DatasetService:
  3. def __init__(self, http_client):
  4. self.http_client = http_client
  5. def create_dataset(self, name: str, description: str = None,
  6. embedding_model: str = None, permission: str = None,
  7. chunk_method: str = None, parser_config: dict = None) -> Dict[str, Any]:
  8. endpoint = "/api/v1/datasets"
  9. data = {"name": name}
  10. if description is not None:
  11. data["description"] = description
  12. if embedding_model is not None:
  13. data["embedding_model"] = embedding_model
  14. if permission is not None:
  15. data["permission"] = permission
  16. if chunk_method is not None:
  17. data["chunk_method"] = chunk_method
  18. if parser_config is not None:
  19. data["parser_config"] = parser_config
  20. response = self.http_client.post(endpoint, json_data=data)
  21. if response.get("code") == 0 and response.get("data"):
  22. return response["data"]
  23. else:
  24. raise Exception(f"创建数据集失败: {response.get('message', '未知错误')}")
  25. def delete_datasets(self, dataset_ids: List[str]) -> bool:
  26. endpoint = "/api/v1/datasets"
  27. response = self.http_client.delete(endpoint, json_data={"ids": dataset_ids})
  28. if response.get("code") == 0:
  29. return True
  30. else:
  31. raise Exception(f"删除数据集失败: {response.get('message', '未知错误')}")
  32. def update_dataset(self, dataset_id: str, name: str = None,
  33. description: str = None, embedding_model: str = None,
  34. permission: str = None, chunk_method: str = None) -> Dict[str, Any]:
  35. endpoint = f"/api/v1/datasets/{dataset_id}"
  36. data = {}
  37. if name is not None:
  38. data["name"] = name
  39. if description is not None:
  40. data["description"] = description
  41. if embedding_model is not None:
  42. data["embedding_model"] = embedding_model
  43. if permission is not None:
  44. data["permission"] = permission
  45. if chunk_method is not None:
  46. data["chunk_method"] = chunk_method
  47. response = self.http_client.post(endpoint, json=data)
  48. if response.get("code") == 0 and response.get("data"):
  49. return response["data"]
  50. else:
  51. raise Exception(f"更新数据集失败: {response.get('message', '未知错误')}")
  52. def list_datasets(self, page: int = 1, size: int = 20, orderby: str = "create_time",
  53. desc: bool = True, name: str = None, dataset_id: str = None) -> List[Dict[str, Any]]:
  54. endpoint = "/api/v1/datasets"
  55. params = {"page": page, "page_size": size, "orderby": orderby, "desc": int(desc)}
  56. if name is not None:
  57. params["name"] = name
  58. if dataset_id is not None:
  59. params["id"] = dataset_id
  60. response = self.http_client.get(endpoint, params=params)
  61. if response.get("code") == 0 and response.get("data"):
  62. return response["data"]
  63. elif response.get("code") == 108:
  64. return None
  65. else:
  66. raise Exception(f"列出数据集失败: {response.get('message', '未知错误')}")
  67. def get_dataset(self, dataset_id: str) -> Dict[str, Any]:
  68. endpoint = f"/api/v1/datasets/{dataset_id}"
  69. response = self.http_client.get(endpoint)
  70. if response.get("code") == 0 and response.get("data"):
  71. return response["data"]
  72. else:
  73. raise Exception(f"获取数据集失败: {response.get('message', '未知错误')}")
  74. def get_knowledge_graph(self, dataset_id: str) -> Dict[str, Any]:
  75. endpoint = f"/api/v1/datasets/{dataset_id}/knowledge_graph"
  76. response = self.http_client.get(endpoint)
  77. if response.get("code") == 0:
  78. return response.get("data", {})
  79. else:
  80. raise Exception(f"获取知识图谱失败: {response.get('message', '未知错误')}")
  81. def delete_knowledge_graph(self, dataset_id: str) -> bool:
  82. endpoint = f"/api/v1/datasets/{dataset_id}/knowledge_graph"
  83. response = self.http_client.post(endpoint, json_data={})
  84. if response.get("code") == 0:
  85. return True
  86. else:
  87. raise Exception(f"删除知识图谱失败: {response.get('message', '未知错误')}")
  88. def trace_graphrag(self, dataset_id: str) -> Dict[str, Any]:
  89. endpoint = f"/api/v1/datasets/{dataset_id}/trace_graphrag"
  90. response = self.http_client.get(endpoint)
  91. if response.get("code") == 0:
  92. return response.get("data", {})
  93. else:
  94. raise Exception(f"获取GraphRAG追踪失败: {response.get('message', '未知错误')}")
  95. def trace_raptor(self, dataset_id: str) -> Dict[str, Any]:
  96. endpoint = f"/api/v1/datasets/{dataset_id}/trace_raptor"
  97. response = self.http_client.get(endpoint)
  98. if response.get("code") == 0:
  99. return response.get("data", {})
  100. else:
  101. raise Exception(f"获取RAPTOR追踪失败: {response.get('message', '未知错误')}")
  102. def get_metadata_summary(self, dataset_id: str) -> Dict[str, Any]:
  103. endpoint = f"/api/v1/datasets/{dataset_id}/metadata/summary"
  104. response = self.http_client.get(endpoint)
  105. if response.get("code") == 0:
  106. return response.get("data", {})
  107. else:
  108. raise Exception(f"获取元数据摘要失败: {response.get('message', '未知错误')}")
  109. def update_metadata(self, dataset_id: str, metadata: Dict = None,
  110. document_ids: List[str] = None, metadata_condition: Dict = None) -> bool:
  111. endpoint = f"/api/v1/datasets/{dataset_id}/metadata/update"
  112. data = {}
  113. if metadata is not None:
  114. data["metadata"] = metadata
  115. if document_ids is not None:
  116. data["document_ids"] = document_ids
  117. if metadata_condition is not None:
  118. data["metadata_condition"] = metadata_condition
  119. response = self.http_client.post(endpoint, json=data)
  120. if response.get("code") == 0:
  121. return True
  122. else:
  123. raise Exception(f"更新元数据失败: {response.get('message', '未知错误')}")
  124. def run_graphrag(self, dataset_id: str, mode: str = "light") -> Dict[str, Any]:
  125. endpoint = f"/api/v1/datasets/{dataset_id}/run_graphrag"
  126. response = self.http_client.post(endpoint, json_data={"mode": mode})
  127. if response.get("code") == 0:
  128. return response.get("data", {})
  129. else:
  130. raise Exception(f"运行GraphRAG失败: {response.get('message', '未知错误')}")
  131. def run_raptor(self, dataset_id: str) -> Dict[str, Any]:
  132. endpoint = f"/api/v1/datasets/{dataset_id}/run_raptor"
  133. response = self.http_client.post(endpoint)
  134. if response.get("code") == 0:
  135. return response.get("data", {})
  136. else:
  137. raise Exception(f"运行RAPTOR失败: {response.get('message', '未知错误')}")