from token import OP from typing import Optional from ragflow_sdk import DataSet, Document from conf.settings import ragflow_settings from .base_util import RAGFlowBaseUtil from common.models.pagination import Pagination class DocumentUtil(RAGFlowBaseUtil): def __init__(self): super().__init__() def upload_documents(self, dataset_name: Optional[str] = None, document_list: list[dict] = None): """ 上传文档到数据集 """ try: if not document_list: raise ValueError("文档列表不能为空") # 上传文档到数据集 doc_list = self.ragflow_client.get_dataset(name=dataset_name).upload_documents(document_list) return doc_list except Exception as e: print(f"上传文档到数据集失败: {e}") raise e def list_documents(self, dataset_name: Optional[str] = None, pagination: Pagination = Pagination(), id: str = None, keywords: str = None) -> list[Document]: """ 列出数据集的文档 """ try: doc_list = self.ragflow_client.get_dataset(name=dataset_name).list_documents(id=id, keywords=keywords, **pagination.to_dict()) return doc_list except Exception as e: print(f"列出数据集文档失败: {e}") raise e def get_document(self, dataset_name: Optional[str] = None, document_id: Optional[str] = None) -> Document: """ 获取文档 """ _list = self.list_documents(dataset_name=dataset_name, id=document_id) if len(_list) > 0: return _list[0] raise Exception("Document %s not found" % document_id) # Document.update(update_message:dict) def update_document(self, document: Document, update_message: dict): """ 更新文档 """ try: document.update(update_message) except Exception as e: print(f"更新文档失败: {e}") raise e def delete_document(self, dataset_name: Optional[str] = None, ids: list[str] = None): """ 删除文档 """ try: if not ids: raise ValueError("文档ID列表不能为空") self.ragflow_client.get_dataset(name=dataset_name).delete_documents(ids=ids) except Exception as e: print(f"删除文档失败: {e}") raise e def async_parse_documents(self, dataset_name: Optional[str] = None,document_ids: list[str] = None): """ 异步解析文档 """ try: self.ragflow_client.get_dataset(name=dataset_name).async_parse_documents(document_ids=document_ids) except Exception as e: print(f"异步解析文档失败: {e}") raise e def parse_documents(self, dataset_name: Optional[str] = None,document_ids: list[str] = None) -> list[tuple[str, str, int, int]]: """ 解析文档 """ try: doc_list = self.ragflow_client.get_dataset(name=dataset_name).parse_documents(document_ids=document_ids) return doc_list except Exception as e: print(f"解析文档失败: {e}") raise e