| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- from token import OP
- from typing import Optional
- from ragflow_sdk import DataSet, Document
- from conf.settings import ragflow_settings
- from .base_util import RAGFlowBaseUtil
- from common.models.pagination import Pagination
- class DocumentUtil(RAGFlowBaseUtil):
- def __init__(self):
- super().__init__()
-
- def upload_documents(self, dataset_name: Optional[str] = None, document_list: list[dict] = None):
- """
- 上传文档到数据集
- """
- try:
- if not document_list:
- raise ValueError("文档列表不能为空")
- # 上传文档到数据集
- doc_list = self.ragflow_client.get_dataset(name=dataset_name).upload_documents(document_list)
- return doc_list
- except Exception as e:
- print(f"上传文档到数据集失败: {e}")
- raise e
- def list_documents(self, dataset_name: Optional[str] = None, pagination: Pagination = Pagination(), id: str = None, keywords: str = None) -> list[Document]:
- """
- 列出数据集的文档
- """
- try:
- doc_list = self.ragflow_client.get_dataset(name=dataset_name).list_documents(id=id, keywords=keywords, **pagination.to_dict())
- return doc_list
- except Exception as e:
- print(f"列出数据集文档失败: {e}")
- raise e
- def get_document(self, dataset_name: Optional[str] = None, document_id: Optional[str] = None) -> Document:
- """
- 获取文档
- """
- _list = self.list_documents(dataset_name=dataset_name, id=document_id)
- if len(_list) > 0:
- return _list[0]
- raise Exception("Document %s not found" % document_id)
- # Document.update(update_message:dict)
- def update_document(self, document: Document, update_message: dict):
- """
- 更新文档
- """
- try:
- document.update(update_message)
- except Exception as e:
- print(f"更新文档失败: {e}")
- raise e
- def delete_document(self, dataset_name: Optional[str] = None, ids: list[str] = None):
- """
- 删除文档
- """
- try:
- if not ids:
- raise ValueError("文档ID列表不能为空")
- self.ragflow_client.get_dataset(name=dataset_name).delete_documents(ids=ids)
- except Exception as e:
- print(f"删除文档失败: {e}")
- raise e
- def async_parse_documents(self, dataset_name: Optional[str] = None,document_ids: list[str] = None):
- """
- 异步解析文档
- """
- try:
- self.ragflow_client.get_dataset(name=dataset_name).async_parse_documents(document_ids=document_ids)
- except Exception as e:
- print(f"异步解析文档失败: {e}")
- raise e
-
- def parse_documents(self, dataset_name: Optional[str] = None,document_ids: list[str] = None) -> list[tuple[str, str, int, int]]:
- """
- 解析文档
- """
- try:
- doc_list = self.ragflow_client.get_dataset(name=dataset_name).parse_documents(document_ids=document_ids)
- return doc_list
- except Exception as e:
- print(f"解析文档失败: {e}")
- raise e
|