document_service.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. from typing import Dict, Any, List, Optional
  2. class DocumentService:
  3. def __init__(self, http_client):
  4. self.http_client = http_client
  5. def upload_document(self, dataset_id: str, file_path: str) -> List[Dict[str, Any]]:
  6. import os
  7. endpoint = f"/api/v1/datasets/{dataset_id}/documents"
  8. with open(file_path, 'rb') as f:
  9. # 使用os.path.basename获取文件名,兼容Windows和Linux
  10. files = {'file': (os.path.basename(file_path), f)}
  11. # 不设置Content-Type头,让requests库自动生成正确的multipart/form-data头
  12. response = self.http_client.post(endpoint, files=files)
  13. if response.get("code") == 0 and response.get("data"):
  14. return response["data"]
  15. else:
  16. raise Exception(f"上传文档失败: {response.get('message', '未知错误')}")
  17. def update_document(self, dataset_id: str, document_id: str,
  18. name: str = None, meta_fields: Dict = None,
  19. chunk_method: str = None, parser_config: Dict = None,
  20. enabled: int = None) -> Dict[str, Any]:
  21. endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}"
  22. data = {}
  23. if name is not None:
  24. data["name"] = name
  25. if meta_fields is not None:
  26. data["meta_fields"] = meta_fields
  27. if chunk_method is not None:
  28. data["chunk_method"] = chunk_method
  29. if parser_config is not None:
  30. data["parser_config"] = parser_config
  31. if enabled is not None:
  32. data["enabled"] = enabled
  33. response = self.http_client.post(endpoint, json_data=data)
  34. if response.get("code") == 0 and response.get("data"):
  35. return response["data"]
  36. else:
  37. raise Exception(f"更新文档失败: {response.get('message', '未知错误')}")
  38. def delete_document(self, dataset_id: str, document_id: str) -> bool:
  39. endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}"
  40. response = self.http_client.post(endpoint, json_data={})
  41. if response.get("code") == 0:
  42. return True
  43. else:
  44. raise Exception(f"删除文档失败: {response.get('message', '未知错误')}")
  45. def delete_documents(self, dataset_id: str, document_ids: List[str]) -> bool:
  46. endpoint = f"/api/v1/datasets/{dataset_id}/documents"
  47. response = self.http_client.post(endpoint, json_data={"document_ids": document_ids})
  48. if response.get("code") == 0:
  49. return True
  50. else:
  51. raise Exception(f"批量删除文档失败: {response.get('message', '未知错误')}")
  52. def get_document(self, dataset_id: str, document_id: str) -> Dict[str, Any]:
  53. endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}"
  54. response = self.http_client.get(endpoint)
  55. if response.get("code") == 0 and response.get("data"):
  56. return response["data"]
  57. else:
  58. raise Exception(f"获取文档失败: {response.get('message', '未知错误')}")
  59. def list_documents(self, dataset_id: str, page: int = 1, size: int = 20,
  60. keywords: str = None, document_id: str = None, document_name: str = None,
  61. suffix: str = None, run: str = None) -> List[Dict[str, Any]]:
  62. endpoint = f"/api/v1/datasets/{dataset_id}/documents"
  63. params = {"page": page, "page_size": size}
  64. if keywords is not None:
  65. params["keywords"] = keywords
  66. if document_id is not None:
  67. params["id"] = document_id
  68. if document_name is not None:
  69. params["name"] = document_name
  70. if suffix is not None:
  71. params["suffix"] = suffix
  72. if run is not None:
  73. params["run"] = run
  74. response = self.http_client.get(endpoint, params=params)
  75. if response.get("code") == 0 and response.get("data"):
  76. return response["data"]
  77. else:
  78. raise Exception(f"列出文档失败: {response.get('message', '未知错误')}")
  79. def get_document_chunks(self, dataset_id: str, document_id: str,
  80. keywords: str = None, page: int = 1, size: int = 20,
  81. chunk_id: str = None) -> List[Dict[str, Any]]:
  82. endpoint = f"/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks"
  83. params = {"page": page, "page_size": size}
  84. if keywords is not None:
  85. params["keywords"] = keywords
  86. if chunk_id is not None:
  87. params["id"] = chunk_id
  88. response = self.http_client.get(endpoint, params=params)
  89. if response.get("code") == 0 and response.get("data"):
  90. return response["data"]
  91. else:
  92. raise Exception(f"获取文档切片失败: {response.get('message', '未知错误')}")
  93. def parse_document(self, dataset_id: str, document_ids: List[str]) -> bool:
  94. endpoint = f"/api/v1/datasets/{dataset_id}/chunks"
  95. response = self.http_client.post(endpoint, json_data={"document_ids": document_ids})
  96. if response.get("code") == 0:
  97. return True
  98. else:
  99. raise Exception(f"解析文档失败: {response.get('message', '未知错误')}")