|
@@ -0,0 +1,302 @@
|
|
|
|
|
+import sys
|
|
|
|
|
+import os
|
|
|
|
|
+from typing import Dict, Any, List, Optional
|
|
|
|
|
+from dataclasses import dataclass
|
|
|
|
|
+
|
|
|
|
|
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
|
|
|
+
|
|
|
|
|
+from src.utils.http_client import HTTPClient
|
|
|
|
|
+from src.conf.settings import ragflow_settings
|
|
|
|
|
+from src.utils.ragflow.dataset_service import DatasetService
|
|
|
|
|
+from src.utils.ragflow.document_service import DocumentService
|
|
|
|
|
+from src.utils.ragflow.chunk_service import ChunkService
|
|
|
|
|
+from src.utils.ragflow.chat_service import ChatService
|
|
|
|
|
+from src.utils.ragflow.agent_service import AgentService
|
|
|
|
|
+from src.utils.ragflow.file_service import FileService
|
|
|
|
|
+from src.utils.ragflow.openai_service import OpenAICompatibleService
|
|
|
|
|
+
|
|
|
|
|
+@dataclass
|
|
|
|
|
+class DocumentInfo:
|
|
|
|
|
+ id: str
|
|
|
|
|
+ name: str
|
|
|
|
|
+ type: str
|
|
|
|
|
+ size: int
|
|
|
|
|
+ location: str
|
|
|
|
|
+ dataset_id: str
|
|
|
|
|
+ chunk_method: str
|
|
|
|
|
+ chunk_count: Optional[int] = None
|
|
|
|
|
+ token_count: Optional[int] = None
|
|
|
|
|
+ run: str = "UNSTART"
|
|
|
|
|
+ status: str = "1"
|
|
|
|
|
+
|
|
|
|
|
+@dataclass
|
|
|
|
|
+class ChunkInfo:
|
|
|
|
|
+ id: str
|
|
|
|
|
+ document_id: str
|
|
|
|
|
+ content: str
|
|
|
|
|
+ document_name: str
|
|
|
|
|
+ dataset_id: str
|
|
|
|
|
+ similarity: float = 0.0
|
|
|
|
|
+ vector_similarity: float = 0.0
|
|
|
|
|
+ term_similarity: float = 0.0
|
|
|
|
|
+
|
|
|
|
|
+@dataclass
|
|
|
|
|
+class DatasetInfo:
|
|
|
|
|
+ id: str
|
|
|
|
|
+ name: str
|
|
|
|
|
+ description: Optional[str] = None
|
|
|
|
|
+ embedding_model: Optional[str] = None
|
|
|
|
|
+ permission: Optional[str] = None
|
|
|
|
|
+ chunk_method: Optional[str] = None
|
|
|
|
|
+ chunk_count: int = 0
|
|
|
|
|
+ document_count: int = 0
|
|
|
|
|
+ token_count: int = 0
|
|
|
|
|
+ status: str = "1"
|
|
|
|
|
+
|
|
|
|
|
+@dataclass
|
|
|
|
|
+class ChatInfo:
|
|
|
|
|
+ id: str
|
|
|
|
|
+ name: str
|
|
|
|
|
+ dataset_ids: List[str]
|
|
|
|
|
+ llm: Dict[str, Any]
|
|
|
|
|
+ prompt: str
|
|
|
|
|
+
|
|
|
|
|
+@dataclass
|
|
|
|
|
+class AgentInfo:
|
|
|
|
|
+ id: str
|
|
|
|
|
+ name: str
|
|
|
|
|
+ llm: Dict[str, Any]
|
|
|
|
|
+ description: Optional[str] = None
|
|
|
|
|
+
|
|
|
|
|
+@dataclass
|
|
|
|
|
+class FileInfo:
|
|
|
|
|
+ id: str
|
|
|
|
|
+ parent_id: str
|
|
|
|
|
+ name: str
|
|
|
|
|
+ type: str
|
|
|
|
|
+ size: int
|
|
|
|
|
+
|
|
|
|
|
+class RAGFlowService:
|
|
|
|
|
+ def __init__(self, base_url: str = None, api_key: str = None):
|
|
|
|
|
+ base_url = base_url or ragflow_settings.ragflow_api_url
|
|
|
|
|
+ api_key = api_key or ragflow_settings.ragflow_api_key
|
|
|
|
|
+ self.http_client = HTTPClient(base_url=base_url, api_key=api_key)
|
|
|
|
|
+
|
|
|
|
|
+ self.dataset_service = DatasetService(self.http_client)
|
|
|
|
|
+ self.document_service = DocumentService(self.http_client)
|
|
|
|
|
+ self.chunk_service = ChunkService(self.http_client)
|
|
|
|
|
+ self.chat_service = ChatService(self.http_client)
|
|
|
|
|
+ self.agent_service = AgentService(self.http_client)
|
|
|
|
|
+ self.file_service = FileService(self.http_client)
|
|
|
|
|
+ self.openai_service = OpenAICompatibleService(self.http_client)
|
|
|
|
|
+
|
|
|
|
|
+ def create_dataset(self, name: str, description: str = None,
|
|
|
|
|
+ embedding_model: str = None, permission: str = None,
|
|
|
|
|
+ chunk_method: str = None, parser_config: dict = None) -> DatasetInfo:
|
|
|
|
|
+ return self.dataset_service.create_dataset(name, description, embedding_model, permission, chunk_method, parser_config)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_datasets(self, dataset_ids: List[str]) -> bool:
|
|
|
|
|
+ return self.dataset_service.delete_datasets(dataset_ids)
|
|
|
|
|
+
|
|
|
|
|
+ def update_dataset(self, dataset_id: str, name: str = None,
|
|
|
|
|
+ description: str = None, embedding_model: str = None,
|
|
|
|
|
+ permission: str = None, chunk_method: str = None) -> DatasetInfo:
|
|
|
|
|
+ return self.dataset_service.update_dataset(dataset_id, name, description, embedding_model, permission, chunk_method)
|
|
|
|
|
+
|
|
|
|
|
+ def list_datasets(self, page: int = 1, size: int = 20, orderby: str = "create_time",
|
|
|
|
|
+ desc: bool = True, name: str = None, dataset_id: str = None) -> List[DatasetInfo]:
|
|
|
|
|
+ return self.dataset_service.list_datasets(page, size, orderby, desc, name, dataset_id)
|
|
|
|
|
+
|
|
|
|
|
+ def get_dataset(self, name: Optional[str] = None, dataset_id: Optional[str] = None) -> DatasetInfo:
|
|
|
|
|
+ _list = self.list_datasets(name=name, dataset_id=dataset_id)
|
|
|
|
|
+ if _list is None:
|
|
|
|
|
+ return None
|
|
|
|
|
+ elif len(_list) > 0:
|
|
|
|
|
+ return _list[0]
|
|
|
|
|
+
|
|
|
|
|
+ def get_knowledge_graph(self, dataset_id: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.dataset_service.get_knowledge_graph(dataset_id)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_knowledge_graph(self, dataset_id: str) -> bool:
|
|
|
|
|
+ return self.dataset_service.delete_knowledge_graph(dataset_id)
|
|
|
|
|
+
|
|
|
|
|
+ def trace_graphrag(self, dataset_id: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.dataset_service.trace_graphrag(dataset_id)
|
|
|
|
|
+
|
|
|
|
|
+ def trace_raptor(self, dataset_id: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.dataset_service.trace_raptor(dataset_id)
|
|
|
|
|
+
|
|
|
|
|
+ def get_metadata_summary(self, dataset_id: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.dataset_service.get_metadata_summary(dataset_id)
|
|
|
|
|
+
|
|
|
|
|
+ def update_metadata(self, dataset_id: str, metadata: Dict = None,
|
|
|
|
|
+ document_ids: List[str] = None, metadata_condition: Dict = None) -> bool:
|
|
|
|
|
+ return self.dataset_service.update_metadata(dataset_id, metadata, document_ids, metadata_condition)
|
|
|
|
|
+
|
|
|
|
|
+ def run_graphrag(self, dataset_id: str, mode: str = "light") -> Dict[str, Any]:
|
|
|
|
|
+ return self.dataset_service.run_graphrag(dataset_id, mode)
|
|
|
|
|
+
|
|
|
|
|
+ def run_raptor(self, dataset_id: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.dataset_service.run_raptor(dataset_id)
|
|
|
|
|
+
|
|
|
|
|
+ def upload_document(self, dataset_id: str, file_path: str) -> List[DocumentInfo]:
|
|
|
|
|
+ return self.document_service.upload_document(dataset_id, file_path)
|
|
|
|
|
+
|
|
|
|
|
+ def update_document(self, dataset_id: str, document_id: str,
|
|
|
|
|
+ name: str = None, meta_fields: Dict = None,
|
|
|
|
|
+ chunk_method: str = None, parser_config: Dict = None,
|
|
|
|
|
+ enabled: int = None) -> DocumentInfo:
|
|
|
|
|
+ return self.document_service.update_document(dataset_id, document_id, name, meta_fields, chunk_method, parser_config, enabled)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_document(self, dataset_id: str, document_id: str) -> bool:
|
|
|
|
|
+ return self.document_service.delete_document(dataset_id, document_id)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_documents(self, dataset_id: str, document_ids: List[str]) -> bool:
|
|
|
|
|
+ return self.document_service.delete_documents(dataset_id, document_ids)
|
|
|
|
|
+
|
|
|
|
|
+ def get_document(self, dataset_id: str, document_id: str) -> DocumentInfo:
|
|
|
|
|
+ return self.document_service.get_document(dataset_id, document_id)
|
|
|
|
|
+
|
|
|
|
|
+ def list_documents(self, dataset_id: str, page: int = 1, size: int = 20,
|
|
|
|
|
+ keywords: str = None, document_id: str = None, document_name: str = None,
|
|
|
|
|
+ suffix: str = None, run: str = None) -> List[DocumentInfo]:
|
|
|
|
|
+ return self.document_service.list_documents(dataset_id, page, size, keywords, document_id, document_name, suffix, run)
|
|
|
|
|
+
|
|
|
|
|
+ def get_document_chunks(self, dataset_id: str, document_id: str,
|
|
|
|
|
+ keywords: str = None, page: int = 1, size: int = 20,
|
|
|
|
|
+ chunk_id: str = None) -> List[ChunkInfo]:
|
|
|
|
|
+ return self.document_service.get_document_chunks(dataset_id, document_id, keywords, page, size, chunk_id)
|
|
|
|
|
+
|
|
|
|
|
+ def parse_document(self, dataset_id: str, document_ids: List[str]) -> bool:
|
|
|
|
|
+ return self.document_service.parse_document(dataset_id, document_ids)
|
|
|
|
|
+
|
|
|
|
|
+ def create_chunk(self, dataset_id: str, document_id: str, content: str,
|
|
|
|
|
+ important_keywords: List[str] = None) -> ChunkInfo:
|
|
|
|
|
+ return self.chunk_service.create_chunk(dataset_id, document_id, content, important_keywords)
|
|
|
|
|
+
|
|
|
|
|
+ def update_chunk(self, dataset_id: str, chunk_id: str, content: str = None,
|
|
|
|
|
+ important_keywords: List[str] = None) -> ChunkInfo:
|
|
|
|
|
+ return self.chunk_service.update_chunk(dataset_id, chunk_id, content, important_keywords)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_chunk(self, dataset_id: str, chunk_id: str) -> bool:
|
|
|
|
|
+ return self.chunk_service.delete_chunk(dataset_id, chunk_id)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_chunks(self, dataset_id: str, document_id: str, chunk_ids: List[str]) -> bool:
|
|
|
|
|
+ return self.chunk_service.delete_chunks(dataset_id, document_id, chunk_ids)
|
|
|
|
|
+
|
|
|
|
|
+ def retrieval(self, dataset_ids: List[str], query: str, top_k: int = 5,
|
|
|
|
|
+ similarity_threshold: float = 0.1, vector_similarity_weight: float = 0.3,
|
|
|
|
|
+ refine: bool = False) -> List[ChunkInfo]:
|
|
|
|
|
+ return self.chunk_service.retrieval(dataset_ids, query, top_k, similarity_threshold, vector_similarity_weight, refine)
|
|
|
|
|
+
|
|
|
|
|
+ def create_chat(self, name: str, dataset_ids: List[str], llm: Dict[str, Any],
|
|
|
|
|
+ prompt: str = None) -> ChatInfo:
|
|
|
|
|
+ return self.chat_service.create_chat(name, dataset_ids, llm, prompt)
|
|
|
|
|
+
|
|
|
|
|
+ def update_chat(self, chat_id: str, name: str = None, dataset_ids: List[str] = None,
|
|
|
|
|
+ llm: Dict[str, Any] = None, prompt: str = None) -> ChatInfo:
|
|
|
|
|
+ return self.chat_service.update_chat(chat_id, name, dataset_ids, llm, prompt)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_chats(self, chat_ids: List[str]) -> bool:
|
|
|
|
|
+ return self.chat_service.delete_chats(chat_ids)
|
|
|
|
|
+
|
|
|
|
|
+ def list_chats(self, page: int = 1, size: int = 20, orderby: str = "create_time",
|
|
|
|
|
+ desc: bool = True, name: str = None, chat_id: str = None) -> List[ChatInfo]:
|
|
|
|
|
+ return self.chat_service.list_chats(page, size, orderby, desc, name, chat_id)
|
|
|
|
|
+
|
|
|
|
|
+ def create_chat_session(self, chat_id: str, name: str = None) -> Dict[str, Any]:
|
|
|
|
|
+ return self.chat_service.create_chat_session(chat_id, name)
|
|
|
|
|
+
|
|
|
|
|
+ def update_chat_session(self, chat_id: str, session_id: str,
|
|
|
|
|
+ name: str = None, message: List[Dict] = None) -> Dict[str, Any]:
|
|
|
|
|
+ return self.chat_service.update_chat_session(chat_id, session_id, name, message)
|
|
|
|
|
+
|
|
|
|
|
+ def list_chat_sessions(self, chat_id: str, page: int = 1, size: int = 20,
|
|
|
|
|
+ orderby: str = "create_time", desc: bool = True,
|
|
|
|
|
+ session_id: str = None, session_name: str = None) -> List[Dict[str, Any]]:
|
|
|
|
|
+ return self.chat_service.list_chat_sessions(chat_id, page, size, orderby, desc, session_id, session_name)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_chat_session(self, chat_id: str, session_id: str) -> bool:
|
|
|
|
|
+ return self.chat_service.delete_chat_session(chat_id, session_id)
|
|
|
|
|
+
|
|
|
|
|
+ def chat_completion(self, chat_id: str, query: str, stream: bool = False,
|
|
|
|
|
+ session_id: str = None) -> Dict[str, Any]:
|
|
|
|
|
+ return self.chat_service.chat_completion(chat_id, query, stream, session_id)
|
|
|
|
|
+
|
|
|
|
|
+ def create_agent(self, name: str, llm: Dict[str, Any], description: str = None) -> AgentInfo:
|
|
|
|
|
+ return self.agent_service.create_agent(name, llm, description)
|
|
|
|
|
+
|
|
|
|
|
+ def update_agent(self, agent_id: str, name: str = None, llm: Dict[str, Any] = None,
|
|
|
|
|
+ description: str = None) -> AgentInfo:
|
|
|
|
|
+ return self.agent_service.update_agent(agent_id, name, llm, description)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_agent(self, agent_id: str) -> bool:
|
|
|
|
|
+ return self.agent_service.delete_agent(agent_id)
|
|
|
|
|
+
|
|
|
|
|
+ def list_agents(self, page: int = 1, size: int = 20, orderby: str = "create_time",
|
|
|
|
|
+ desc: bool = True, name: str = None, agent_id: str = None) -> List[AgentInfo]:
|
|
|
|
|
+ return self.agent_service.list_agents(page, size, orderby, desc, name, agent_id)
|
|
|
|
|
+
|
|
|
|
|
+ def create_agent_session(self, agent_id: str, name: str = None) -> Dict[str, Any]:
|
|
|
|
|
+ return self.agent_service.create_agent_session(agent_id, name)
|
|
|
|
|
+
|
|
|
|
|
+ def list_agent_sessions(self, agent_id: str, page: int = 1, size: int = 20,
|
|
|
|
|
+ orderby: str = "create_time", desc: bool = True,
|
|
|
|
|
+ session_id: str = None, user_id: str = None,
|
|
|
|
|
+ dsl: str = None) -> List[Dict[str, Any]]:
|
|
|
|
|
+ return self.agent_service.list_agent_sessions(agent_id, page, size, orderby, desc, session_id, user_id, dsl)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_agent_session(self, agent_id: str, session_id: str) -> bool:
|
|
|
|
|
+ return self.agent_service.delete_agent_session(agent_id, session_id)
|
|
|
|
|
+
|
|
|
|
|
+ def agent_completion(self, agent_id: str, query: str, stream: bool = False,
|
|
|
|
|
+ session_id: str = None) -> Dict[str, Any]:
|
|
|
|
|
+ return self.agent_service.agent_completion(agent_id, query, stream, session_id)
|
|
|
|
|
+
|
|
|
|
|
+ def get_related_questions(self, dataset_id: str, question: str, top: int = 10) -> List[str]:
|
|
|
|
|
+ return self.agent_service.get_related_questions(dataset_id, question, top)
|
|
|
|
|
+
|
|
|
|
|
+ def list_files(self, parent_id: str = None, keywords: str = None,
|
|
|
|
|
+ page: int = 1, size: int = 20, orderby: str = "create_time",
|
|
|
|
|
+ desc: bool = True) -> List[FileInfo]:
|
|
|
|
|
+ return self.file_service.list_files(parent_id, keywords, page, size, orderby, desc)
|
|
|
|
|
+
|
|
|
|
|
+ def get_root_folder(self) -> Dict[str, Any]:
|
|
|
|
|
+ return self.file_service.get_root_folder()
|
|
|
|
|
+
|
|
|
|
|
+ def get_parent_folder(self, file_id: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.file_service.get_parent_folder(file_id)
|
|
|
|
|
+
|
|
|
|
|
+ def get_all_parent_folders(self, file_id: str) -> List[Dict[str, Any]]:
|
|
|
|
|
+ return self.file_service.get_all_parent_folders(file_id)
|
|
|
|
|
+
|
|
|
|
|
+ def get_file(self, file_id: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.file_service.get_file(file_id)
|
|
|
|
|
+
|
|
|
|
|
+ def upload_file(self, file_path: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.file_service.upload_file(file_path)
|
|
|
|
|
+
|
|
|
|
|
+ def create_file(self, file_id: str, tenant_id: str = None) -> Dict[str, Any]:
|
|
|
|
|
+ return self.file_service.create_file(file_id, tenant_id)
|
|
|
|
|
+
|
|
|
|
|
+ def delete_file(self, file_id: str) -> bool:
|
|
|
|
|
+ return self.file_service.delete_file(file_id)
|
|
|
|
|
+
|
|
|
|
|
+ def rename_file(self, file_id: str, new_name: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.file_service.rename_file(file_id, new_name)
|
|
|
|
|
+
|
|
|
|
|
+ def move_file(self, file_id: str, parent_id: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.file_service.move_file(file_id, parent_id)
|
|
|
|
|
+
|
|
|
|
|
+ def convert_file(self, file_id: str) -> Dict[str, Any]:
|
|
|
|
|
+ return self.file_service.convert_file(file_id)
|
|
|
|
|
+
|
|
|
|
|
+ def openai_chat_completion(self, chat_id: str, messages: List[Dict[str, Any]],
|
|
|
|
|
+ stream: bool = False, model: str = "model",
|
|
|
|
|
+ extra_body: Dict = None) -> Dict[str, Any]:
|
|
|
|
|
+ return self.openai_service.chat_completion(chat_id, messages, stream, model, extra_body)
|
|
|
|
|
+
|
|
|
|
|
+ def openai_agent_completion(self, agent_id: str, messages: List[Dict[str, Any]],
|
|
|
|
|
+ stream: bool = False, model: str = "model",
|
|
|
|
|
+ session_id: str = None) -> Dict[str, Any]:
|
|
|
|
|
+ return self.openai_service.agent_completion(agent_id, messages, stream, model, session_id)
|