dataset_service.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. from typing import Dict, Any, List, Optional
  2. class DatasetService:
  3. def __init__(self, http_client):
  4. self.http_client = http_client
  5. def create_dataset(self, name: str, description: str = None,
  6. embedding_model: str = None, permission: str = None,
  7. chunk_method: str = None) -> Dict[str, Any]:
  8. endpoint = "/api/v1/datasets"
  9. data = {"name": name}
  10. if description is not None:
  11. data["description"] = description
  12. if embedding_model is not None:
  13. data["embedding_model"] = embedding_model
  14. if permission is not None:
  15. data["permission"] = permission
  16. if chunk_method is not None:
  17. data["chunk_method"] = chunk_method
  18. response = self.http_client.post(endpoint, json=data)
  19. if response.get("code") == 0 and response.get("data"):
  20. return response["data"]
  21. else:
  22. raise Exception(f"创建数据集失败: {response.get('message', '未知错误')}")
  23. def delete_datasets(self, dataset_ids: List[str]) -> bool:
  24. endpoint = "/api/v1/datasets"
  25. response = self.http_client.post(endpoint, json={"dataset_ids": dataset_ids})
  26. if response.get("code") == 0:
  27. return True
  28. else:
  29. raise Exception(f"删除数据集失败: {response.get('message', '未知错误')}")
  30. def update_dataset(self, dataset_id: str, name: str = None,
  31. description: str = None, embedding_model: str = None,
  32. permission: str = None, chunk_method: str = None) -> Dict[str, Any]:
  33. endpoint = f"/api/v1/datasets/{dataset_id}"
  34. data = {}
  35. if name is not None:
  36. data["name"] = name
  37. if description is not None:
  38. data["description"] = description
  39. if embedding_model is not None:
  40. data["embedding_model"] = embedding_model
  41. if permission is not None:
  42. data["permission"] = permission
  43. if chunk_method is not None:
  44. data["chunk_method"] = chunk_method
  45. response = self.http_client.post(endpoint, json=data)
  46. if response.get("code") == 0 and response.get("data"):
  47. return response["data"]
  48. else:
  49. raise Exception(f"更新数据集失败: {response.get('message', '未知错误')}")
  50. def list_datasets(self, page: int = 1, size: int = 20, orderby: str = "create_time",
  51. desc: bool = True, name: str = None, dataset_id: str = None) -> List[Dict[str, Any]]:
  52. endpoint = "/api/v1/datasets"
  53. params = {"page": page, "page_size": size, "orderby": orderby, "desc": int(desc)}
  54. if name is not None:
  55. params["name"] = name
  56. if dataset_id is not None:
  57. params["id"] = dataset_id
  58. response = self.http_client.get(endpoint, params=params)
  59. if response.get("code") == 0 and response.get("data"):
  60. return response["data"]
  61. else:
  62. raise Exception(f"列出数据集失败: {response.get('message', '未知错误')}")
  63. def get_dataset(self, dataset_id: str) -> Dict[str, Any]:
  64. endpoint = f"/api/v1/datasets/{dataset_id}"
  65. response = self.http_client.get(endpoint)
  66. if response.get("code") == 0 and response.get("data"):
  67. return response["data"]
  68. else:
  69. raise Exception(f"获取数据集失败: {response.get('message', '未知错误')}")
  70. def get_knowledge_graph(self, dataset_id: str) -> Dict[str, Any]:
  71. endpoint = f"/api/v1/datasets/{dataset_id}/knowledge_graph"
  72. response = self.http_client.get(endpoint)
  73. if response.get("code") == 0:
  74. return response.get("data", {})
  75. else:
  76. raise Exception(f"获取知识图谱失败: {response.get('message', '未知错误')}")
  77. def delete_knowledge_graph(self, dataset_id: str) -> bool:
  78. endpoint = f"/api/v1/datasets/{dataset_id}/knowledge_graph"
  79. response = self.http_client.post(endpoint, json={})
  80. if response.get("code") == 0:
  81. return True
  82. else:
  83. raise Exception(f"删除知识图谱失败: {response.get('message', '未知错误')}")
  84. def trace_graphrag(self, dataset_id: str) -> Dict[str, Any]:
  85. endpoint = f"/api/v1/datasets/{dataset_id}/trace_graphrag"
  86. response = self.http_client.get(endpoint)
  87. if response.get("code") == 0:
  88. return response.get("data", {})
  89. else:
  90. raise Exception(f"获取GraphRAG追踪失败: {response.get('message', '未知错误')}")
  91. def trace_raptor(self, dataset_id: str) -> Dict[str, Any]:
  92. endpoint = f"/api/v1/datasets/{dataset_id}/trace_raptor"
  93. response = self.http_client.get(endpoint)
  94. if response.get("code") == 0:
  95. return response.get("data", {})
  96. else:
  97. raise Exception(f"获取RAPTOR追踪失败: {response.get('message', '未知错误')}")
  98. def get_metadata_summary(self, dataset_id: str) -> Dict[str, Any]:
  99. endpoint = f"/api/v1/datasets/{dataset_id}/metadata/summary"
  100. response = self.http_client.get(endpoint)
  101. if response.get("code") == 0:
  102. return response.get("data", {})
  103. else:
  104. raise Exception(f"获取元数据摘要失败: {response.get('message', '未知错误')}")
  105. def update_metadata(self, dataset_id: str, metadata: Dict = None,
  106. document_ids: List[str] = None, metadata_condition: Dict = None) -> bool:
  107. endpoint = f"/api/v1/datasets/{dataset_id}/metadata/update"
  108. data = {}
  109. if metadata is not None:
  110. data["metadata"] = metadata
  111. if document_ids is not None:
  112. data["document_ids"] = document_ids
  113. if metadata_condition is not None:
  114. data["metadata_condition"] = metadata_condition
  115. response = self.http_client.post(endpoint, json=data)
  116. if response.get("code") == 0:
  117. return True
  118. else:
  119. raise Exception(f"更新元数据失败: {response.get('message', '未知错误')}")
  120. def run_graphrag(self, dataset_id: str, mode: str = "light") -> Dict[str, Any]:
  121. endpoint = f"/api/v1/datasets/{dataset_id}/run_graphrag"
  122. response = self.http_client.post(endpoint, json={"mode": mode})
  123. if response.get("code") == 0:
  124. return response.get("data", {})
  125. else:
  126. raise Exception(f"运行GraphRAG失败: {response.get('message', '未知错误')}")
  127. def run_raptor(self, dataset_id: str) -> Dict[str, Any]:
  128. endpoint = f"/api/v1/datasets/{dataset_id}/run_raptor"
  129. response = self.http_client.post(endpoint)
  130. if response.get("code") == 0:
  131. return response.get("data", {})
  132. else:
  133. raise Exception(f"运行RAPTOR失败: {response.get('message', '未知错误')}")