test_qa_workflow.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. """
  2. 测试QA工作流
  3. """
  4. import tempfile
  5. import os
  6. from src.datasets.parser.workflows import QAParsingWorkflowV2
  7. # 创建一个临时PDF文件用于测试
  8. def create_test_pdf():
  9. with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as f:
  10. # 写入简单的PDF内容
  11. f.write(b"%PDF-1.4\n1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\nxref\n0 4\n0000000000 65535 f \n0000000009 00000 n \n0000000052 00000 n \n0000000101 00000 n \ntrailer\n<< /Size 4 /Root 1 0 R >>\n%%EOF")
  12. return f.name
  13. def test_qa_workflow():
  14. print("开始测试QA工作流...")
  15. # 创建测试PDF文件
  16. pdf_path = create_test_pdf()
  17. print(f"创建测试PDF文件: {pdf_path}")
  18. try:
  19. # 初始化工作流
  20. workflow = QAParsingWorkflowV2()
  21. # 运行工作流
  22. result = workflow.run(
  23. pdf_path=pdf_path,
  24. dataset_id="test-dataset-123",
  25. qa_count_per_chunk=5,
  26. chunk_size=500,
  27. chunk_overlap=100
  28. )
  29. print(f"工作流执行结果: {result}")
  30. finally:
  31. # 清理测试文件
  32. if os.path.exists(pdf_path):
  33. os.unlink(pdf_path)
  34. print(f"清理测试PDF文件: {pdf_path}")
  35. if __name__ == "__main__":
  36. test_qa_workflow()