dataset_util.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. from typing import Optional
  2. from ragflow_sdk import DataSet
  3. from conf.settings import ragflow_settings, model_settings
  4. from .conf.rag_parser_config import RagParserDefaults
  5. from .base_util import RAGFlowBaseUtil
  6. class DataSetUtil(RAGFlowBaseUtil):
  7. def __init__(self):
  8. super().__init__()
  9. def create_dataset(self, chunk_method: str = RagParserDefaults.DATASET_CHUNK_METHOD,
  10. dataset_name: Optional[str] = None, dataset_desc: str = "",
  11. embedding_model: str = f"{model_settings.embedding_model_name}@SILICONFLOW",
  12. parser_config: dict = RagParserDefaults.DATASET_CONFIG_DICT):
  13. """
  14. 创建数据集
  15. """
  16. # 封装数据集参数
  17. parser_obj = DataSet.ParserConfig(self.ragflow_client, parser_config)
  18. # 创建数据集
  19. dataset = self.ragflow_client.create_dataset(
  20. name=dataset_name,
  21. description=dataset_desc,
  22. embedding_model=embedding_model,
  23. permission=RagParserDefaults.DATASET_PERMISSION,
  24. chunk_method= chunk_method,
  25. parser_config=parser_obj
  26. )
  27. return dataset
  28. def delete_dataset(self, ids: list[str] | None = None):
  29. """
  30. 删除数据集
  31. """
  32. try:
  33. if ids is not None:
  34. self.ragflow_client.delete_datasets(ids)
  35. except Exception as e:
  36. print(f"删除数据集失败: {e}")
  37. def get_dataset(self, name: Optional[str] = None):
  38. """
  39. 获取数据集
  40. """
  41. try:
  42. dataset = self.ragflow_client.get_dataset(name=name)
  43. except Exception as e:
  44. print(f"获取数据集失败: {e}")
  45. return None
  46. return dataset
  47. def list_datasets(self, page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True, id: str | None = None, name: str | None = None) -> list[DataSet]:
  48. """
  49. 获取数据集列表
  50. """
  51. try:
  52. dataset_list = self.ragflow_client.list_datasets(
  53. page=page,
  54. page_size=page_size,
  55. orderby=orderby,
  56. desc=desc,
  57. id=id,
  58. name=name,
  59. )
  60. except Exception as e:
  61. print(f"获取数据集列表失败: {e}")
  62. return None
  63. return dataset_list