test_ragflow_http_api.py 4.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. from utils.ragflow.ragflow_service import RAGFlowService
  2. from utils.infinity import InfinityClient
  3. DATASET_CONFIG_DICT = {
  4. "chunk_token_num": 256,
  5. "delimiter": "\n!?;。;!?",
  6. "html4excel": False,
  7. "layout_recognize": "Pro/Qwen/Qwen2.5-VL-7B-Instruct@SILICONFLOW",
  8. "auto_keywords": 5,
  9. "tag_kb_ids": [],
  10. "topn_tags": 3,
  11. "task_page_size": 4,
  12. "raptor": {
  13. "max_cluster": 64,
  14. "max_token": 256,
  15. "prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
  16. "random_seed": 0,
  17. "threshold": 0.1,
  18. "use_raptor": True
  19. },
  20. "graphrag": {
  21. "resolution": True,
  22. "use_graphrag": True,
  23. "method": "general",
  24. "entity_types": [
  25. "event",
  26. "Book",
  27. "Author",
  28. "Illustrator",
  29. "Series",
  30. "Theme",
  31. "Genre",
  32. "Character",
  33. "Setting",
  34. "AgeGroup",
  35. "Competency",
  36. "ArtStyle",
  37. "Award",
  38. "Publisher"
  39. ]
  40. }
  41. }
  42. ragflow_service = RAGFlowService()
  43. dataset_ids = [
  44. "c2be78a4f10711f095230242c0a85002"
  45. ]
  46. def create_dataset():
  47. dataset = ragflow_service.create_dataset(name="test_http_dataset1", description="测试HTTP数据集1",
  48. permission="team",
  49. chunk_method="naive",
  50. parser_config=DATASET_CONFIG_DICT)
  51. print(dataset)
  52. def delete_dataset(dataset_id: str):
  53. flg = ragflow_service.delete_datasets(dataset_ids=[dataset_id])
  54. print(flg)
  55. def delete_datasets(dataset_ids: list[str]):
  56. flg = ragflow_service.delete_datasets(dataset_ids=dataset_ids)
  57. print(flg)
  58. def list_datasets(name: str = None):
  59. datasets = ragflow_service.list_datasets(name=name)
  60. print(datasets)
  61. def get_dataset(name: str = None, dataset_id: str = None):
  62. dataset = ragflow_service.get_dataset(name=name, dataset_id=dataset_id)
  63. print(dataset)
  64. def add_chunk(dataset_id: str, document_id: str, content: str, important_keywords: list[int] = None):
  65. chunk = ragflow_service.create_chunk(dataset_id=dataset_id, document_id=document_id, content=content, important_keywords=important_keywords)
  66. print(chunk)
  67. return chunk
  68. import os
  69. def test_image():
  70. url = "http://192.168.16.134:9000/bookpage/daa1861c-2096-42c0-b8e3-a163f96f0f66.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ck7I8Esssx6rzZrXQ5uP%2F20260109%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20260109T074307Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=6150ffc414cccbedc255bc0a72d85fd4e693a59b112789af61e8a0e93d00e5dc"
  71. # 截取url中的daa1861c-2096-42c0-b8e3-a163f96f0f66.png部分
  72. img_id = os.path.basename(url).split("?")[0]
  73. print(img_id)
  74. if __name__ == "__main__":
  75. test_image()
  76. # dataset_id = "18caf531f04d11f095670242c0a85002"
  77. # document_id = "3dda0a90f1e211f0a3b80242c0a85002"
  78. # tag = "社会L3_人际交往L3_同理心L3"
  79. # content="能感知他人情绪,对同伴的困难产生理解并尝试回应"
  80. # important_keywords = ["3", "4"]
  81. # chunk = add_chunk(dataset_id=dataset_id, document_id=document_id, content=content, important_keywords=important_keywords)
  82. # chunk_id = chunk["chunk"]["id"]
  83. # infinity_client = InfinityClient()
  84. # infinity_client.update(database_name="default_db", table_name="ragflow_92162247e93e11f084830242ac1d0002_18caf531f04d11f095670242c0a85002", cond=f"id = '{chunk_id}'", data={"tag_kwd": tag})
  85. # print(chunk_id)