| 123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- """
- 测试QA工作流
- """
- import tempfile
- import os
- from src.datasets.parser.workflows import QAParsingWorkflowV2
- # 创建一个临时PDF文件用于测试
- def create_test_pdf():
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as f:
- # 写入简单的PDF内容
- f.write(b"%PDF-1.4\n1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\nxref\n0 4\n0000000000 65535 f \n0000000009 00000 n \n0000000052 00000 n \n0000000101 00000 n \ntrailer\n<< /Size 4 /Root 1 0 R >>\n%%EOF")
- return f.name
- def test_qa_workflow():
- print("开始测试QA工作流...")
-
- # 创建测试PDF文件
- pdf_path = create_test_pdf()
- print(f"创建测试PDF文件: {pdf_path}")
-
- try:
- # 初始化工作流
- workflow = QAParsingWorkflowV2()
-
- # 运行工作流
- result = workflow.run(
- pdf_path=pdf_path,
- dataset_id="test-dataset-123",
- qa_count_per_chunk=5,
- chunk_size=500,
- chunk_overlap=100
- )
-
- print(f"工作流执行结果: {result}")
-
- finally:
- # 清理测试文件
- if os.path.exists(pdf_path):
- os.unlink(pdf_path)
- print(f"清理测试PDF文件: {pdf_path}")
- if __name__ == "__main__":
- test_qa_workflow()
|