rag_parser_config.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. class RagParserDefaults:
  2. DATASET_PERMISSION="team"
  3. DATASET_CHUNK_METHOD="naive"
  4. DATASET_CONFIG_DICT = {
  5. "chunk_token_num": 256,
  6. "delimiter": "\n!?;。;!?",
  7. "html4excel": False,
  8. "layout_recognize": "Pro/Qwen/Qwen2.5-VL-7B-Instruct@SILICONFLOW",
  9. "auto_keywords": 5,
  10. "tag_kb_ids": [],
  11. "topn_tags": 3,
  12. "task_page_size": 4,
  13. "raptor": {
  14. "max_cluster": 64,
  15. "max_token": 256,
  16. "prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
  17. "random_seed": 0,
  18. "threshold": 0.1,
  19. "use_raptor": True
  20. },
  21. "graphrag": {
  22. "resolution": True,
  23. "use_graphrag": True,
  24. "method": "general",
  25. "entity_types": [
  26. "event",
  27. "Book",
  28. "Author",
  29. "Illustrator",
  30. "Series",
  31. "Theme",
  32. "Genre",
  33. "Character",
  34. "Setting",
  35. "AgeGroup",
  36. "Competency",
  37. "ArtStyle",
  38. "Award",
  39. "Publisher"
  40. ]
  41. }
  42. }