""" 测试QA工作流 """ import tempfile import os from src.datasets.parser.workflows import QAParsingWorkflowV2 # 创建一个临时PDF文件用于测试 def create_test_pdf(): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as f: # 写入简单的PDF内容 f.write(b"%PDF-1.4\n1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\nxref\n0 4\n0000000000 65535 f \n0000000009 00000 n \n0000000052 00000 n \n0000000101 00000 n \ntrailer\n<< /Size 4 /Root 1 0 R >>\n%%EOF") return f.name def test_qa_workflow(): print("开始测试QA工作流...") # 创建测试PDF文件 pdf_path = create_test_pdf() print(f"创建测试PDF文件: {pdf_path}") try: # 初始化工作流 workflow = QAParsingWorkflowV2() # 运行工作流 result = workflow.run( pdf_path=pdf_path, dataset_id="test-dataset-123", qa_count_per_chunk=5, chunk_size=500, chunk_overlap=100 ) print(f"工作流执行结果: {result}") finally: # 清理测试文件 if os.path.exists(pdf_path): os.unlink(pdf_path) print(f"清理测试PDF文件: {pdf_path}") if __name__ == "__main__": test_qa_workflow()