document_util.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. from token import OP
  2. from typing import Optional
  3. from ragflow_sdk import DataSet, Document
  4. from conf.settings import ragflow_settings
  5. from .base_util import RAGFlowBaseUtil
  6. from common.models.pagination import Pagination
  7. class DocumentUtil(RAGFlowBaseUtil):
  8. def __init__(self):
  9. super().__init__()
  10. def upload_documents(self, dataset_name: Optional[str] = None, document_list: list[dict] = None):
  11. """
  12. 上传文档到数据集
  13. """
  14. try:
  15. if not document_list:
  16. raise ValueError("文档列表不能为空")
  17. # 上传文档到数据集
  18. doc_list = self.ragflow_client.get_dataset(name=dataset_name).upload_documents(document_list)
  19. return doc_list
  20. except Exception as e:
  21. print(f"上传文档到数据集失败: {e}")
  22. raise e
  23. def list_documents(self, dataset_name: Optional[str] = None, pagination: Pagination = Pagination(), id: str = None, keywords: str = None) -> list[Document]:
  24. """
  25. 列出数据集的文档
  26. """
  27. try:
  28. doc_list = self.ragflow_client.get_dataset(name=dataset_name).list_documents(id=id, keywords=keywords, **pagination.to_dict())
  29. return doc_list
  30. except Exception as e:
  31. print(f"列出数据集文档失败: {e}")
  32. raise e
  33. def get_document(self, dataset_name: Optional[str] = None, document_id: Optional[str] = None) -> Document:
  34. """
  35. 获取文档
  36. """
  37. _list = self.list_documents(dataset_name=dataset_name, id=document_id)
  38. if len(_list) > 0:
  39. return _list[0]
  40. raise Exception("Document %s not found" % document_id)
  41. # Document.update(update_message:dict)
  42. def update_document(self, document: Document, update_message: dict):
  43. """
  44. 更新文档
  45. """
  46. try:
  47. document.update(update_message)
  48. except Exception as e:
  49. print(f"更新文档失败: {e}")
  50. raise e
  51. def delete_document(self, dataset_name: Optional[str] = None, ids: list[str] = None):
  52. """
  53. 删除文档
  54. """
  55. try:
  56. if not ids:
  57. raise ValueError("文档ID列表不能为空")
  58. self.ragflow_client.get_dataset(name=dataset_name).delete_documents(ids=ids)
  59. except Exception as e:
  60. print(f"删除文档失败: {e}")
  61. raise e
  62. def async_parse_documents(self, dataset_name: Optional[str] = None,document_ids: list[str] = None):
  63. """
  64. 异步解析文档
  65. """
  66. try:
  67. self.ragflow_client.get_dataset(name=dataset_name).async_parse_documents(document_ids=document_ids)
  68. except Exception as e:
  69. print(f"异步解析文档失败: {e}")
  70. raise e
  71. def parse_documents(self, dataset_name: Optional[str] = None,document_ids: list[str] = None) -> list[tuple[str, str, int, int]]:
  72. """
  73. 解析文档
  74. """
  75. try:
  76. doc_list = self.ragflow_client.get_dataset(name=dataset_name).parse_documents(document_ids=document_ids)
  77. return doc_list
  78. except Exception as e:
  79. print(f"解析文档失败: {e}")
  80. raise e