ragflow_service.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. import sys
  2. import os
  3. from typing import Dict, Any, List, Optional
  4. from dataclasses import dataclass
  5. sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
  6. from utils.http_client import HTTPClient
  7. from conf.settings import ragflow_settings
  8. from utils.ragflow.dataset_service import DatasetService
  9. from utils.ragflow.document_service import DocumentService
  10. from utils.ragflow.chunk_service import ChunkService
  11. from utils.ragflow.chat_service import ChatService
  12. from utils.ragflow.agent_service import AgentService
  13. from utils.ragflow.file_service import FileService
  14. from utils.ragflow.openai_service import OpenAICompatibleService
  15. @dataclass
  16. class DocumentInfo:
  17. id: str
  18. name: str
  19. type: str
  20. size: int
  21. location: str
  22. dataset_id: str
  23. chunk_method: str
  24. chunk_count: Optional[int] = None
  25. token_count: Optional[int] = None
  26. run: str = "UNSTART"
  27. status: str = "1"
  28. @dataclass
  29. class ChunkInfo:
  30. id: str
  31. document_id: str
  32. content: str
  33. document_name: str
  34. dataset_id: str
  35. similarity: float = 0.0
  36. vector_similarity: float = 0.0
  37. term_similarity: float = 0.0
  38. @dataclass
  39. class DatasetInfo:
  40. id: str
  41. name: str
  42. description: Optional[str] = None
  43. embedding_model: Optional[str] = None
  44. permission: Optional[str] = None
  45. chunk_method: Optional[str] = None
  46. chunk_count: int = 0
  47. document_count: int = 0
  48. token_count: int = 0
  49. status: str = "1"
  50. @dataclass
  51. class ChatInfo:
  52. id: str
  53. name: str
  54. dataset_ids: List[str]
  55. llm: Dict[str, Any]
  56. prompt: str
  57. @dataclass
  58. class AgentInfo:
  59. id: str
  60. name: str
  61. llm: Dict[str, Any]
  62. description: Optional[str] = None
  63. @dataclass
  64. class FileInfo:
  65. id: str
  66. parent_id: str
  67. name: str
  68. type: str
  69. size: int
  70. class RAGFlowService:
  71. def __init__(self, base_url: str = None, api_key: str = None):
  72. base_url = base_url or ragflow_settings.ragflow_api_url
  73. api_key = api_key or ragflow_settings.ragflow_api_key
  74. self.http_client = HTTPClient(base_url=base_url, api_key=api_key)
  75. self.dataset_service = DatasetService(self.http_client)
  76. self.document_service = DocumentService(self.http_client)
  77. self.chunk_service = ChunkService(self.http_client)
  78. self.chat_service = ChatService(self.http_client)
  79. self.agent_service = AgentService(self.http_client)
  80. self.file_service = FileService(self.http_client)
  81. self.openai_service = OpenAICompatibleService(self.http_client)
  82. def create_dataset(self, name: str, description: str = None,
  83. embedding_model: str = None, permission: str = None,
  84. chunk_method: str = None, parser_config: dict = None) -> DatasetInfo:
  85. return self.dataset_service.create_dataset(name, description, embedding_model, permission, chunk_method, parser_config)
  86. def delete_datasets(self, dataset_ids: List[str]) -> bool:
  87. return self.dataset_service.delete_datasets(dataset_ids)
  88. def update_dataset(self, dataset_id: str, name: str = None,
  89. description: str = None, embedding_model: str = None,
  90. permission: str = None, chunk_method: str = None) -> DatasetInfo:
  91. return self.dataset_service.update_dataset(dataset_id, name, description, embedding_model, permission, chunk_method)
  92. def list_datasets(self, page: int = 1, size: int = 20, orderby: str = "create_time",
  93. desc: bool = True, name: str = None, dataset_id: str = None) -> List[DatasetInfo]:
  94. return self.dataset_service.list_datasets(page, size, orderby, desc, name, dataset_id)
  95. def get_dataset(self, name: Optional[str] = None, dataset_id: Optional[str] = None) -> DatasetInfo:
  96. _list = self.list_datasets(name=name, dataset_id=dataset_id)
  97. if _list is None:
  98. return None
  99. elif len(_list) > 0:
  100. return _list[0]
  101. def get_knowledge_graph(self, dataset_id: str) -> Dict[str, Any]:
  102. return self.dataset_service.get_knowledge_graph(dataset_id)
  103. def delete_knowledge_graph(self, dataset_id: str) -> bool:
  104. return self.dataset_service.delete_knowledge_graph(dataset_id)
  105. def trace_graphrag(self, dataset_id: str) -> Dict[str, Any]:
  106. return self.dataset_service.trace_graphrag(dataset_id)
  107. def trace_raptor(self, dataset_id: str) -> Dict[str, Any]:
  108. return self.dataset_service.trace_raptor(dataset_id)
  109. def get_metadata_summary(self, dataset_id: str) -> Dict[str, Any]:
  110. return self.dataset_service.get_metadata_summary(dataset_id)
  111. def update_metadata(self, dataset_id: str, metadata: Dict = None,
  112. document_ids: List[str] = None, metadata_condition: Dict = None) -> bool:
  113. return self.dataset_service.update_metadata(dataset_id, metadata, document_ids, metadata_condition)
  114. def run_graphrag(self, dataset_id: str, mode: str = "light") -> Dict[str, Any]:
  115. return self.dataset_service.run_graphrag(dataset_id, mode)
  116. def run_raptor(self, dataset_id: str) -> Dict[str, Any]:
  117. return self.dataset_service.run_raptor(dataset_id)
  118. def upload_document(self, dataset_id: str, file_path: str) -> List[DocumentInfo]:
  119. return self.document_service.upload_document(dataset_id, file_path)
  120. def update_document(self, dataset_id: str, document_id: str,
  121. name: str = None, meta_fields: Dict = None,
  122. chunk_method: str = None, parser_config: Dict = None,
  123. enabled: int = None) -> DocumentInfo:
  124. return self.document_service.update_document(dataset_id, document_id, name, meta_fields, chunk_method, parser_config, enabled)
  125. def delete_document(self, dataset_id: str, document_id: str) -> bool:
  126. return self.document_service.delete_document(dataset_id, document_id)
  127. def delete_documents(self, dataset_id: str, document_ids: List[str]) -> bool:
  128. return self.document_service.delete_documents(dataset_id, document_ids)
  129. def get_document(self, dataset_id: str, document_id: str) -> DocumentInfo:
  130. return self.document_service.get_document(dataset_id, document_id)
  131. def list_documents(self, dataset_id: str, page: int = 1, size: int = 20,
  132. keywords: str = None, document_id: str = None, document_name: str = None,
  133. suffix: str = None, run: str = None) -> List[DocumentInfo]:
  134. return self.document_service.list_documents(dataset_id, page, size, keywords, document_id, document_name, suffix, run)
  135. def get_document_chunks(self, dataset_id: str, document_id: str,
  136. keywords: str = None, page: int = 1, size: int = 20,
  137. chunk_id: str = None) -> List[ChunkInfo]:
  138. return self.document_service.get_document_chunks(dataset_id, document_id, keywords, page, size, chunk_id)
  139. def parse_document(self, dataset_id: str, document_ids: List[str]) -> bool:
  140. return self.document_service.parse_document(dataset_id, document_ids)
  141. def create_chunk(self, dataset_id: str, document_id: str, content: str,
  142. important_keywords: List[str] = None) -> ChunkInfo:
  143. return self.chunk_service.create_chunk(dataset_id, document_id, content, important_keywords)
  144. def update_chunk(self, dataset_id: str, chunk_id: str, content: str = None,
  145. important_keywords: List[str] = None) -> ChunkInfo:
  146. return self.chunk_service.update_chunk(dataset_id, chunk_id, content, important_keywords)
  147. def delete_chunk(self, dataset_id: str, chunk_id: str) -> bool:
  148. return self.chunk_service.delete_chunk(dataset_id, chunk_id)
  149. def delete_chunks(self, dataset_id: str, document_id: str, chunk_ids: List[str]) -> bool:
  150. return self.chunk_service.delete_chunks(dataset_id, document_id, chunk_ids)
  151. def retrieval(self, dataset_ids: List[str], query: str, top_k: int = 5,
  152. similarity_threshold: float = 0.1, vector_similarity_weight: float = 0.3,
  153. refine: bool = False) -> List[ChunkInfo]:
  154. return self.chunk_service.retrieval(dataset_ids, query, top_k, similarity_threshold, vector_similarity_weight, refine)
  155. def create_chat(self, name: str, dataset_ids: List[str], llm: Dict[str, Any],
  156. prompt: str = None) -> ChatInfo:
  157. return self.chat_service.create_chat(name, dataset_ids, llm, prompt)
  158. def update_chat(self, chat_id: str, name: str = None, dataset_ids: List[str] = None,
  159. llm: Dict[str, Any] = None, prompt: str = None) -> ChatInfo:
  160. return self.chat_service.update_chat(chat_id, name, dataset_ids, llm, prompt)
  161. def delete_chats(self, chat_ids: List[str]) -> bool:
  162. return self.chat_service.delete_chats(chat_ids)
  163. def list_chats(self, page: int = 1, size: int = 20, orderby: str = "create_time",
  164. desc: bool = True, name: str = None, chat_id: str = None) -> List[ChatInfo]:
  165. return self.chat_service.list_chats(page, size, orderby, desc, name, chat_id)
  166. def create_chat_session(self, chat_id: str, name: str = None) -> Dict[str, Any]:
  167. return self.chat_service.create_chat_session(chat_id, name)
  168. def update_chat_session(self, chat_id: str, session_id: str,
  169. name: str = None, message: List[Dict] = None) -> Dict[str, Any]:
  170. return self.chat_service.update_chat_session(chat_id, session_id, name, message)
  171. def list_chat_sessions(self, chat_id: str, page: int = 1, size: int = 20,
  172. orderby: str = "create_time", desc: bool = True,
  173. session_id: str = None, session_name: str = None) -> List[Dict[str, Any]]:
  174. return self.chat_service.list_chat_sessions(chat_id, page, size, orderby, desc, session_id, session_name)
  175. def delete_chat_session(self, chat_id: str, session_id: str) -> bool:
  176. return self.chat_service.delete_chat_session(chat_id, session_id)
  177. def chat_completion(self, chat_id: str, query: str, stream: bool = False,
  178. session_id: str = None) -> Dict[str, Any]:
  179. return self.chat_service.chat_completion(chat_id, query, stream, session_id)
  180. def create_agent(self, name: str, llm: Dict[str, Any], description: str = None) -> AgentInfo:
  181. return self.agent_service.create_agent(name, llm, description)
  182. def update_agent(self, agent_id: str, name: str = None, llm: Dict[str, Any] = None,
  183. description: str = None) -> AgentInfo:
  184. return self.agent_service.update_agent(agent_id, name, llm, description)
  185. def delete_agent(self, agent_id: str) -> bool:
  186. return self.agent_service.delete_agent(agent_id)
  187. def list_agents(self, page: int = 1, size: int = 20, orderby: str = "create_time",
  188. desc: bool = True, name: str = None, agent_id: str = None) -> List[AgentInfo]:
  189. return self.agent_service.list_agents(page, size, orderby, desc, name, agent_id)
  190. def create_agent_session(self, agent_id: str, name: str = None) -> Dict[str, Any]:
  191. return self.agent_service.create_agent_session(agent_id, name)
  192. def list_agent_sessions(self, agent_id: str, page: int = 1, size: int = 20,
  193. orderby: str = "create_time", desc: bool = True,
  194. session_id: str = None, user_id: str = None,
  195. dsl: str = None) -> List[Dict[str, Any]]:
  196. return self.agent_service.list_agent_sessions(agent_id, page, size, orderby, desc, session_id, user_id, dsl)
  197. def delete_agent_session(self, agent_id: str, session_id: str) -> bool:
  198. return self.agent_service.delete_agent_session(agent_id, session_id)
  199. def agent_completion(self, agent_id: str, query: str, stream: bool = False,
  200. session_id: str = None) -> Dict[str, Any]:
  201. return self.agent_service.agent_completion(agent_id, query, stream, session_id)
  202. def get_related_questions(self, dataset_id: str, question: str, top: int = 10) -> List[str]:
  203. return self.agent_service.get_related_questions(dataset_id, question, top)
  204. def list_files(self, parent_id: str = None, keywords: str = None,
  205. page: int = 1, size: int = 20, orderby: str = "create_time",
  206. desc: bool = True) -> List[FileInfo]:
  207. return self.file_service.list_files(parent_id, keywords, page, size, orderby, desc)
  208. def get_root_folder(self) -> Dict[str, Any]:
  209. return self.file_service.get_root_folder()
  210. def get_parent_folder(self, file_id: str) -> Dict[str, Any]:
  211. return self.file_service.get_parent_folder(file_id)
  212. def get_all_parent_folders(self, file_id: str) -> List[Dict[str, Any]]:
  213. return self.file_service.get_all_parent_folders(file_id)
  214. def get_file(self, file_id: str) -> Dict[str, Any]:
  215. return self.file_service.get_file(file_id)
  216. def upload_file(self, file_path: str) -> Dict[str, Any]:
  217. return self.file_service.upload_file(file_path)
  218. def create_file(self, file_id: str, tenant_id: str = None) -> Dict[str, Any]:
  219. return self.file_service.create_file(file_id, tenant_id)
  220. def delete_file(self, file_id: str) -> bool:
  221. return self.file_service.delete_file(file_id)
  222. def rename_file(self, file_id: str, new_name: str) -> Dict[str, Any]:
  223. return self.file_service.rename_file(file_id, new_name)
  224. def move_file(self, file_id: str, parent_id: str) -> Dict[str, Any]:
  225. return self.file_service.move_file(file_id, parent_id)
  226. def convert_file(self, file_id: str) -> Dict[str, Any]:
  227. return self.file_service.convert_file(file_id)
  228. def openai_chat_completion(self, chat_id: str, messages: List[Dict[str, Any]],
  229. stream: bool = False, model: str = "model",
  230. extra_body: Dict = None) -> Dict[str, Any]:
  231. return self.openai_service.chat_completion(chat_id, messages, stream, model, extra_body)
  232. def openai_agent_completion(self, agent_id: str, messages: List[Dict[str, Any]],
  233. stream: bool = False, model: str = "model",
  234. session_id: str = None) -> Dict[str, Any]:
  235. return self.openai_service.agent_completion(agent_id, messages, stream, model, session_id)