document_service.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. from typing import Dict, Any, List, Optional
  2. class DocumentService:
  3. def __init__(self, http_client):
  4. self.http_client = http_client
  5. def upload_document(self, dataset_id: str, file_path: str) -> List[Dict[str, Any]]:
  6. endpoint = f"/api/v1/datasets/{dataset_id}/documents"
  7. with open(file_path, 'rb') as f:
  8. files = {'file': (file_path.split('/')[-1], f)}
  9. headers = {'Content-Type': 'multipart/form-data'}
  10. response = self.http_client.post(endpoint, files=files, headers=headers)
  11. if response.get("code") == 0 and response.get("data"):
  12. return response["data"]
  13. else:
  14. raise Exception(f"上传文档失败: {response.get('message', '未知错误')}")
  15. def update_document(self, dataset_id: str, document_id: str,
  16. name: str = None, meta_fields: Dict = None,
  17. chunk_method: str = None, parser_config: Dict = None,
  18. enabled: int = None) -> Dict[str, Any]:
  19. endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}"
  20. data = {}
  21. if name is not None:
  22. data["name"] = name
  23. if meta_fields is not None:
  24. data["meta_fields"] = meta_fields
  25. if chunk_method is not None:
  26. data["chunk_method"] = chunk_method
  27. if parser_config is not None:
  28. data["parser_config"] = parser_config
  29. if enabled is not None:
  30. data["enabled"] = enabled
  31. response = self.http_client.post(endpoint, json=data)
  32. if response.get("code") == 0 and response.get("data"):
  33. return response["data"]
  34. else:
  35. raise Exception(f"更新文档失败: {response.get('message', '未知错误')}")
  36. def delete_document(self, dataset_id: str, document_id: str) -> bool:
  37. endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}"
  38. response = self.http_client.post(endpoint, json={})
  39. if response.get("code") == 0:
  40. return True
  41. else:
  42. raise Exception(f"删除文档失败: {response.get('message', '未知错误')}")
  43. def delete_documents(self, dataset_id: str, document_ids: List[str]) -> bool:
  44. endpoint = f"/api/v1/datasets/{dataset_id}/documents"
  45. response = self.http_client.post(endpoint, json={"document_ids": document_ids})
  46. if response.get("code") == 0:
  47. return True
  48. else:
  49. raise Exception(f"批量删除文档失败: {response.get('message', '未知错误')}")
  50. def get_document(self, dataset_id: str, document_id: str) -> Dict[str, Any]:
  51. endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}"
  52. response = self.http_client.get(endpoint)
  53. if response.get("code") == 0 and response.get("data"):
  54. return response["data"]
  55. else:
  56. raise Exception(f"获取文档失败: {response.get('message', '未知错误')}")
  57. def list_documents(self, dataset_id: str, page: int = 1, size: int = 20,
  58. keywords: str = None, document_id: str = None, document_name: str = None,
  59. suffix: str = None, run: str = None) -> List[Dict[str, Any]]:
  60. endpoint = f"/api/v1/datasets/{dataset_id}/documents"
  61. params = {"page": page, "page_size": size}
  62. if keywords is not None:
  63. params["keywords"] = keywords
  64. if document_id is not None:
  65. params["id"] = document_id
  66. if document_name is not None:
  67. params["name"] = document_name
  68. if suffix is not None:
  69. params["suffix"] = suffix
  70. if run is not None:
  71. params["run"] = run
  72. response = self.http_client.get(endpoint, params=params)
  73. if response.get("code") == 0 and response.get("data"):
  74. return response["data"]
  75. else:
  76. raise Exception(f"列出文档失败: {response.get('message', '未知错误')}")
  77. def get_document_chunks(self, dataset_id: str, document_id: str,
  78. keywords: str = None, page: int = 1, size: int = 20,
  79. chunk_id: str = None) -> List[Dict[str, Any]]:
  80. endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks"
  81. params = {"page": page, "page_size": size}
  82. if keywords is not None:
  83. params["keywords"] = keywords
  84. if chunk_id is not None:
  85. params["id"] = chunk_id
  86. response = self.http_client.get(endpoint, params=params)
  87. if response.get("code") == 0 and response.get("data"):
  88. return response["data"]
  89. else:
  90. raise Exception(f"获取文档切片失败: {response.get('message', '未知错误')}")
  91. def parse_document(self, dataset_id: str, document_ids: List[str]) -> bool:
  92. endpoint = f"/api/v1/datasets/{dataset_id}/chunks"
  93. response = self.http_client.post(endpoint, json={"document_ids": document_ids})
  94. if response.get("code") == 0:
  95. return True
  96. else:
  97. raise Exception(f"解析文档失败: {response.get('message', '未知错误')}")