|
@@ -1,30 +1,29 @@
|
|
|
import os
|
|
import os
|
|
|
import concurrent.futures
|
|
import concurrent.futures
|
|
|
|
|
+import time
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
from langgraph.graph import StateGraph, START, END
|
|
from langgraph.graph import StateGraph, START, END
|
|
|
from typing import List, Dict, Any
|
|
from typing import List, Dict, Any
|
|
|
from pydantic import BaseModel, Field, ConfigDict
|
|
from pydantic import BaseModel, Field, ConfigDict
|
|
|
from parser.pdf_parser.pdf_splitter import PDFSplitter
|
|
from parser.pdf_parser.pdf_splitter import PDFSplitter
|
|
|
from model.qwen_vl import QWenVLParser
|
|
from model.qwen_vl import QWenVLParser
|
|
|
-from utils.ragflow_sdk import DataSetUtil, DocumentUtil, ChunkUtil
|
|
|
|
|
from utils.ragflow.ragflow_service import RAGFlowService
|
|
from utils.ragflow.ragflow_service import RAGFlowService
|
|
|
from model.multimodal_embedding import Embedding
|
|
from model.multimodal_embedding import Embedding
|
|
|
from conf.settings import model_settings, vector_db_settings
|
|
from conf.settings import model_settings, vector_db_settings
|
|
|
from utils.infinity import get_client
|
|
from utils.infinity import get_client
|
|
|
|
|
+from langfuse.langchain import CallbackHandler
|
|
|
|
|
+from conf.rag_parser_config import RagParserDefaults
|
|
|
|
|
|
|
|
# 定义工作流状态类
|
|
# 定义工作流状态类
|
|
|
class PDFParsingState(BaseModel):
|
|
class PDFParsingState(BaseModel):
|
|
|
"""PDF解析工作流状态"""
|
|
"""PDF解析工作流状态"""
|
|
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
|
pdf_path: str = Field(..., description="PDF文件路径")
|
|
pdf_path: str = Field(..., description="PDF文件路径")
|
|
|
- dataset_id: str = Field(..., description="数据集ID")
|
|
|
|
|
- page_dataset_id: str = Field(..., description="页面数据集ID")
|
|
|
|
|
- ragflow_service: RAGFlowService = Field(default_factory=RAGFlowService, description="RAGFlow服务实例")
|
|
|
|
|
- dataset_util: DataSetUtil = Field(default_factory=DataSetUtil, description="数据集工具类实例")
|
|
|
|
|
- document_util: DocumentUtil = Field(default_factory=DocumentUtil, description="文档工具类实例")
|
|
|
|
|
- chunk_util: ChunkUtil = Field(default_factory=ChunkUtil, description="文档工具类实例")
|
|
|
|
|
- embedding_model: Embedding = Field(default_factory=Embedding, description="多模态嵌入模型实例")
|
|
|
|
|
|
|
+ dataset_name: str = Field(..., description="数据集名称")
|
|
|
|
|
+ dataset_id: str = Field(default="", description="RAGFLOW数据集ID")
|
|
|
document_id: str = Field(default="", description="上传后的文档ID")
|
|
document_id: str = Field(default="", description="上传后的文档ID")
|
|
|
|
|
+ page_dataset_id: str = Field(..., description="页面数据集ID")
|
|
|
|
|
+ page_document_id: str = Field(default="", description="上传后的页面文档ID")
|
|
|
split_pages: List[Dict[str, Any]] = Field(default_factory=list, description="拆分后的页面列表")
|
|
split_pages: List[Dict[str, Any]] = Field(default_factory=list, description="拆分后的页面列表")
|
|
|
current_page: Dict[str, Any] = Field(default_factory=dict, description="当前处理的页面")
|
|
current_page: Dict[str, Any] = Field(default_factory=dict, description="当前处理的页面")
|
|
|
parsed_results: List[Dict[str, Any]] = Field(default_factory=list, description="解析结果列表")
|
|
parsed_results: List[Dict[str, Any]] = Field(default_factory=list, description="解析结果列表")
|
|
@@ -46,6 +45,9 @@ class PDFParsingWorkflow:
|
|
|
"""
|
|
"""
|
|
|
self.model_name = model_name
|
|
self.model_name = model_name
|
|
|
self.workflow = self._build_workflow()
|
|
self.workflow = self._build_workflow()
|
|
|
|
|
+ self.ragflow_service = RAGFlowService()
|
|
|
|
|
+ self.langfuse_handler = CallbackHandler()
|
|
|
|
|
+ self.embedding_model = Embedding(model_name=model_settings.multimodal_embedding_model_name, api_key=model_settings.dashscope_api_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
def _build_workflow(self):
|
|
def _build_workflow(self):
|
|
@@ -53,8 +55,17 @@ class PDFParsingWorkflow:
|
|
|
# 创建状态图
|
|
# 创建状态图
|
|
|
graph = StateGraph(PDFParsingState)
|
|
graph = StateGraph(PDFParsingState)
|
|
|
|
|
|
|
|
|
|
+ # 添加查询知识库是否存在节点
|
|
|
|
|
+ graph.add_node("get_ragflow_dataset", self.get_ragflow_dataset)
|
|
|
|
|
+
|
|
|
|
|
+ # 添加创建知识库节点
|
|
|
|
|
+ graph.add_node("create_ragflow_dataset", self.create_ragflow_dataset)
|
|
|
|
|
+
|
|
|
# 添加上传文档节点
|
|
# 添加上传文档节点
|
|
|
graph.add_node("upload_document", self._upload_document_node)
|
|
graph.add_node("upload_document", self._upload_document_node)
|
|
|
|
|
+
|
|
|
|
|
+ # 添加上传图书页面文档节点
|
|
|
|
|
+ graph.add_node("upload_page_document", self._upload_page_document_node)
|
|
|
|
|
|
|
|
# 添加解析文档节点
|
|
# 添加解析文档节点
|
|
|
graph.add_node("parse_document", self._parse_document_node)
|
|
graph.add_node("parse_document", self._parse_document_node)
|
|
@@ -64,6 +75,9 @@ class PDFParsingWorkflow:
|
|
|
|
|
|
|
|
# 添加解析图像节点
|
|
# 添加解析图像节点
|
|
|
graph.add_node("parse_image", self._parse_image_node)
|
|
graph.add_node("parse_image", self._parse_image_node)
|
|
|
|
|
+
|
|
|
|
|
+ # 添加解析图书页面图像节点
|
|
|
|
|
+ graph.add_node("create_ragflow_chunk", self.create_ragflow_chunk)
|
|
|
|
|
|
|
|
# 添加向量化入库节点
|
|
# 添加向量化入库节点
|
|
|
graph.add_node("vectorize_store", self._vectorize_store_node)
|
|
graph.add_node("vectorize_store", self._vectorize_store_node)
|
|
@@ -72,10 +86,21 @@ class PDFParsingWorkflow:
|
|
|
graph.add_node("complete", self._complete_node)
|
|
graph.add_node("complete", self._complete_node)
|
|
|
|
|
|
|
|
# 定义边
|
|
# 定义边
|
|
|
- # 定义RagFLow解析文档
|
|
|
|
|
- graph.add_edge(START, "upload_document")
|
|
|
|
|
|
|
+ # 查询知识库是否存在
|
|
|
|
|
+ graph.add_edge(START, "get_ragflow_dataset")
|
|
|
|
|
+ # 添加条件边,判断知识库是否存在
|
|
|
|
|
+ graph.add_conditional_edges(
|
|
|
|
|
+ "get_ragflow_dataset",
|
|
|
|
|
+ self._check_dataset_exists,
|
|
|
|
|
+ {
|
|
|
|
|
+ "exists": "upload_document",
|
|
|
|
|
+ "not_exists": "create_ragflow_dataset"
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
# 添加解析文档边
|
|
# 添加解析文档边
|
|
|
|
|
+ graph.add_edge("create_ragflow_dataset", "upload_document")
|
|
|
graph.add_edge("upload_document", "parse_document")
|
|
graph.add_edge("upload_document", "parse_document")
|
|
|
|
|
+ graph.add_edge("upload_document", "upload_page_document")
|
|
|
graph.add_edge("parse_document", "split_pdf")
|
|
graph.add_edge("parse_document", "split_pdf")
|
|
|
# 定义图片解析边
|
|
# 定义图片解析边
|
|
|
graph.add_edge("split_pdf", "parse_image")
|
|
graph.add_edge("split_pdf", "parse_image")
|
|
@@ -86,45 +111,55 @@ class PDFParsingWorkflow:
|
|
|
self._should_continue_parsing,
|
|
self._should_continue_parsing,
|
|
|
{
|
|
{
|
|
|
"continue": "parse_image",
|
|
"continue": "parse_image",
|
|
|
- "complete": "vectorize_store"
|
|
|
|
|
|
|
+ "complete": "vectorize_store",
|
|
|
}
|
|
}
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- # 添加向量化入库边
|
|
|
|
|
- graph.add_edge("vectorize_store", "complete")
|
|
|
|
|
|
|
+ # 添加从vectorize_store到create_ragflow_chunk的边
|
|
|
|
|
+ graph.add_edge("vectorize_store", "create_ragflow_chunk")
|
|
|
|
|
+
|
|
|
|
|
+ graph.add_edge("create_ragflow_chunk", "complete")
|
|
|
|
|
|
|
|
graph.add_edge("complete", END)
|
|
graph.add_edge("complete", END)
|
|
|
|
|
|
|
|
# 编译工作流
|
|
# 编译工作流
|
|
|
return graph.compile()
|
|
return graph.compile()
|
|
|
|
|
|
|
|
- def get_ragflow_dataset(self, dataset_name: str) -> str:
|
|
|
|
|
|
|
+ def get_ragflow_dataset(self, state: PDFParsingState) -> str:
|
|
|
"""获取RAGFLOW数据集ID"""
|
|
"""获取RAGFLOW数据集ID"""
|
|
|
try:
|
|
try:
|
|
|
- dataset_id = self.dataset_util.get_dataset(name=dataset_name)
|
|
|
|
|
- print(f"数据集 {dataset_name} 的ID为: {dataset_id}")
|
|
|
|
|
- return dataset_id
|
|
|
|
|
|
|
+ dataset = self.ragflow_service.get_dataset(name=state.dataset_name)
|
|
|
|
|
+ dataset_id = dataset["id"] if dataset else ""
|
|
|
|
|
+ print(f"数据集 {state.dataset_name} 的ID为: {dataset_id}")
|
|
|
|
|
+ return {
|
|
|
|
|
+ "dataset_id": dataset_id
|
|
|
|
|
+ }
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
- print(f"获取数据集ID时出错: {str(e)}")
|
|
|
|
|
- raise
|
|
|
|
|
|
|
+ raise Exception(f"获取数据集ID时出错: {str(e)}")
|
|
|
|
|
|
|
|
- def create_ragflow_dataset(self, state: PDFParsingState, dataset_name: str) -> str:
|
|
|
|
|
- """创建RAGFLOW数据集"""
|
|
|
|
|
- if state.dataset_id:
|
|
|
|
|
- print(f"数据集 {dataset_name} 已存在,数据集ID: {state.dataset_id}")
|
|
|
|
|
- return state.dataset_id
|
|
|
|
|
|
|
+ def _check_dataset_exists(self, state: PDFParsingState) -> str:
|
|
|
|
|
+ """检查RAGFLOW数据集是否存在"""
|
|
|
|
|
+ # 判断state.dataset_id是否为空,为空则返回"not_exists",否则返回"exists"
|
|
|
|
|
+ if state.dataset_id == "":
|
|
|
|
|
+ return "not_exists"
|
|
|
|
|
+ else:
|
|
|
|
|
+ return "exists"
|
|
|
|
|
|
|
|
- print(f"开始创建数据集: {dataset_name}")
|
|
|
|
|
|
|
+ def create_ragflow_dataset(self, state: PDFParsingState) -> str:
|
|
|
|
|
+ """创建RAGFLOW数据集"""
|
|
|
|
|
+ print(f"开始创建数据集: {state.dataset_name}")
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
# 创建数据集
|
|
# 创建数据集
|
|
|
- dataset_id = self.dataset_util.create_dataset(
|
|
|
|
|
- chunk_method="naive",
|
|
|
|
|
- dataset_name=dataset_name,
|
|
|
|
|
- dataset_desc="",
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ dataset = self.ragflow_service.create_dataset(name=state.dataset_name, description="",
|
|
|
|
|
+ permission=RagParserDefaults.DATASET_PERMISSION,
|
|
|
|
|
+ chunk_method=RagParserDefaults.DATASET_CHUNK_METHOD,
|
|
|
|
|
+ parser_config=RagParserDefaults.DATASET_CONFIG_DICT)
|
|
|
|
|
+ dataset_id = dataset["id"]
|
|
|
print(f"数据集创建成功,数据集ID: {dataset_id}")
|
|
print(f"数据集创建成功,数据集ID: {dataset_id}")
|
|
|
- return dataset_id
|
|
|
|
|
|
|
+ return {
|
|
|
|
|
+ "dataset_id": dataset_id
|
|
|
|
|
+ }
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
print(f"创建数据集时出错: {str(e)}")
|
|
print(f"创建数据集时出错: {str(e)}")
|
|
|
raise
|
|
raise
|
|
@@ -135,24 +170,42 @@ class PDFParsingWorkflow:
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
# 上传文档
|
|
# 上传文档
|
|
|
- document_info_list = state.ragflow_service.upload_document(
|
|
|
|
|
|
|
+ document_info_list = self.ragflow_service.upload_document(
|
|
|
dataset_id=state.dataset_id,
|
|
dataset_id=state.dataset_id,
|
|
|
file_path=state.pdf_path
|
|
file_path=state.pdf_path
|
|
|
)
|
|
)
|
|
|
- # 上传文档
|
|
|
|
|
- document_info_list2 = state.ragflow_service.upload_document(
|
|
|
|
|
- dataset_id=state.page_dataset_id,
|
|
|
|
|
- file_path=state.pdf_path
|
|
|
|
|
- )
|
|
|
|
|
|
|
|
|
|
# 检查响应
|
|
# 检查响应
|
|
|
if document_info_list and len(document_info_list) > 0:
|
|
if document_info_list and len(document_info_list) > 0:
|
|
|
document_id = document_info_list[0]["id"]
|
|
document_id = document_info_list[0]["id"]
|
|
|
- page_document_id = document_info_list2[0]["id"]
|
|
|
|
|
print(f"文档上传成功,文档ID: {document_id}")
|
|
print(f"文档上传成功,文档ID: {document_id}")
|
|
|
return {
|
|
return {
|
|
|
"document_id": document_id,
|
|
"document_id": document_id,
|
|
|
- "page_document_id": page_document_id
|
|
|
|
|
|
|
+ }
|
|
|
|
|
+ else:
|
|
|
|
|
+ print("文档上传失败: 未返回有效的文档信息")
|
|
|
|
|
+ raise Exception("文档上传失败: 未返回有效的文档信息")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"上传文档时出错: {str(e)}")
|
|
|
|
|
+ raise
|
|
|
|
|
+
|
|
|
|
|
+ def _upload_page_document_node(self, state: PDFParsingState) -> Dict[str, Any]:
|
|
|
|
|
+ """RAGFLOW上传页面文档节点"""
|
|
|
|
|
+ print(f"开始上传页面文档到数据集 {state.dataset_id}: {state.pdf_path}")
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ # 上传文档
|
|
|
|
|
+ document_info_list = self.ragflow_service.upload_document(
|
|
|
|
|
+ dataset_id=state.page_dataset_id,
|
|
|
|
|
+ file_path=state.pdf_path
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 检查响应
|
|
|
|
|
+ if document_info_list and len(document_info_list) > 0:
|
|
|
|
|
+ page_document_id = document_info_list[0]["id"]
|
|
|
|
|
+ print(f"文档上传成功,文档ID: {page_document_id}")
|
|
|
|
|
+ return {
|
|
|
|
|
+ "page_document_id": page_document_id,
|
|
|
}
|
|
}
|
|
|
else:
|
|
else:
|
|
|
print("文档上传失败: 未返回有效的文档信息")
|
|
print("文档上传失败: 未返回有效的文档信息")
|
|
@@ -167,7 +220,7 @@ class PDFParsingWorkflow:
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
# 解析文档
|
|
# 解析文档
|
|
|
- parse_success = state.ragflow_service.parse_document(
|
|
|
|
|
|
|
+ parse_success = self.ragflow_service.parse_document(
|
|
|
dataset_id=state.dataset_id,
|
|
dataset_id=state.dataset_id,
|
|
|
document_ids=[state.document_id]
|
|
document_ids=[state.document_id]
|
|
|
)
|
|
)
|
|
@@ -322,26 +375,22 @@ class PDFParsingWorkflow:
|
|
|
print(f"开始单页上传,共 {len(state.parsed_results)} 页")
|
|
print(f"开始单页上传,共 {len(state.parsed_results)} 页")
|
|
|
|
|
|
|
|
# 遍历所有解析结果,上传单页
|
|
# 遍历所有解析结果,上传单页
|
|
|
- for parsed_result in state.parsed_results:
|
|
|
|
|
|
|
+ # 遍历所有解析结果,生成向量化文档
|
|
|
|
|
+ for i, parsed_result in enumerate(state.parsed_results):
|
|
|
page_number = parsed_result.get("page_number")
|
|
page_number = parsed_result.get("page_number")
|
|
|
text = parsed_result.get("content", "")
|
|
text = parsed_result.get("content", "")
|
|
|
- image = state.split_pages[page_number - 1].get("image")
|
|
|
|
|
|
|
+ image_path = state.split_pages[i].get("image_path")
|
|
|
|
|
|
|
|
# 上传单页到RagFlow Chunk
|
|
# 上传单页到RagFlow Chunk
|
|
|
- chunk = state.chunk_util.add_chunk(
|
|
|
|
|
- dataset_name=state.dataset_name,
|
|
|
|
|
- document_id=state.page_document_id,
|
|
|
|
|
- content=text,
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- infinity_client = get_client()
|
|
|
|
|
- infinity_client.update(database_name=state.dataset_name, table_name="", cond=f"id = {chunk_id}", data={"tag_kwd": tag_name})
|
|
|
|
|
-
|
|
|
|
|
- # 检查响应
|
|
|
|
|
- if document_info and document_info.get("id"):
|
|
|
|
|
- print(f"第 {page_number} 页上传成功,文档ID: {document_info['id']}")
|
|
|
|
|
- else:
|
|
|
|
|
- print(f"第 {page_number} 页上传失败")
|
|
|
|
|
|
|
+ chunk = self.ragflow_service.create_chunk(dataset_id=state.page_dataset_id,
|
|
|
|
|
+ document_id=state.page_document_id,
|
|
|
|
|
+ content=text)
|
|
|
|
|
+ chunk_id = chunk["chunk"]["id"]
|
|
|
|
|
+ print(f"上传第 {page_number} 页,Chunk ID: {chunk_id}")
|
|
|
|
|
+ # # 睡眠50ms,避免上传过快
|
|
|
|
|
+ # time.sleep(0.05)
|
|
|
|
|
+ # result = get_client().update(database_name=state.dataset_name, table_name="", cond=f"id = '{chunk_id}'", data={"img_id": img_id})
|
|
|
|
|
+ # print(f"更新第 {page_number} 页,Chunk ID: {chunk_id},结果: {result}")
|
|
|
|
|
|
|
|
def _vectorize_store_node(self, state: PDFParsingState) -> Dict[str, Any]:
|
|
def _vectorize_store_node(self, state: PDFParsingState) -> Dict[str, Any]:
|
|
|
"""向量化入库节点"""
|
|
"""向量化入库节点"""
|
|
@@ -349,7 +398,7 @@ class PDFParsingWorkflow:
|
|
|
|
|
|
|
|
# 创建索引(如果不存在)
|
|
# 创建索引(如果不存在)
|
|
|
index_name = f"{vector_db_settings.infinity_table_name}"
|
|
index_name = f"{vector_db_settings.infinity_table_name}"
|
|
|
- state.vector_db.create_index(index_name)
|
|
|
|
|
|
|
+ # get_client().create_index()
|
|
|
|
|
|
|
|
# 准备要入库的文档列表
|
|
# 准备要入库的文档列表
|
|
|
documents_to_store = []
|
|
documents_to_store = []
|
|
@@ -369,7 +418,7 @@ class PDFParsingWorkflow:
|
|
|
|
|
|
|
|
# 获取多模态嵌入向量
|
|
# 获取多模态嵌入向量
|
|
|
print(f"正在生成第 {page_number} 页的多模态嵌入...")
|
|
print(f"正在生成第 {page_number} 页的多模态嵌入...")
|
|
|
- embedding = state.embedding_model.get_multimodal_embedding(text, image)
|
|
|
|
|
|
|
+ embedding = self.embedding_model.get_multimodal_embedding(text, image)
|
|
|
|
|
|
|
|
# 生成1024维稠密向量(如果嵌入向量维度不是1024,这里需要处理)
|
|
# 生成1024维稠密向量(如果嵌入向量维度不是1024,这里需要处理)
|
|
|
dense_vector_1024 = embedding[:1024] # 取前1024维
|
|
dense_vector_1024 = embedding[:1024] # 取前1024维
|
|
@@ -395,8 +444,11 @@ class PDFParsingWorkflow:
|
|
|
# 批量入库
|
|
# 批量入库
|
|
|
if documents_to_store:
|
|
if documents_to_store:
|
|
|
print(f"开始入库,共 {len(documents_to_store)} 个文档")
|
|
print(f"开始入库,共 {len(documents_to_store)} 个文档")
|
|
|
- infinity_client = get_client()
|
|
|
|
|
- result = infinity_client.insert(index_name, documents_to_store)
|
|
|
|
|
|
|
+ result = get_client().insert(
|
|
|
|
|
+ table_name=vector_db_settings.infinity_table_name,
|
|
|
|
|
+ documents=documents_to_store,
|
|
|
|
|
+ database_name=vector_db_settings.infinity_database
|
|
|
|
|
+ )
|
|
|
print(f"入库结果: {result}")
|
|
print(f"入库结果: {result}")
|
|
|
|
|
|
|
|
return {
|
|
return {
|
|
@@ -414,7 +466,7 @@ class PDFParsingWorkflow:
|
|
|
"is_complete": True
|
|
"is_complete": True
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- def run(self, pdf_path: str, page_dataset_id: str, ragflow_api_url: str, rag_flow_api_key: str) -> Dict[str, Any]:
|
|
|
|
|
|
|
+ def run(self, pdf_path: str, page_dataset_id: str, dataset_name: str) -> Dict[str, Any]:
|
|
|
"""
|
|
"""
|
|
|
运行PDF解析工作流
|
|
运行PDF解析工作流
|
|
|
|
|
|
|
@@ -430,13 +482,9 @@ class PDFParsingWorkflow:
|
|
|
initial_state = PDFParsingState(
|
|
initial_state = PDFParsingState(
|
|
|
pdf_path=pdf_path,
|
|
pdf_path=pdf_path,
|
|
|
page_dataset_id=page_dataset_id,
|
|
page_dataset_id=page_dataset_id,
|
|
|
- embedding_model=Embedding(model_name=model_settings.multimodal_embedding_model_name, api_key=model_settings.dashscope_api_key),
|
|
|
|
|
- dataset_util=DataSetUtil(),
|
|
|
|
|
- document_util=DocumentUtil(),
|
|
|
|
|
- chunk_util=ChunkUtil(),
|
|
|
|
|
- ragflow_service=RAGFlowService(api_url=ragflow_api_url, api_key=rag_flow_api_key)
|
|
|
|
|
|
|
+ dataset_name=dataset_name
|
|
|
)
|
|
)
|
|
|
- result = self.workflow.invoke(initial_state)
|
|
|
|
|
|
|
+ result = self.workflow.invoke(initial_state, config={"callbacks": [self.langfuse_handler]})
|
|
|
|
|
|
|
|
# 检查结果类型,如果是字典直接返回,否则调用dict()方法
|
|
# 检查结果类型,如果是字典直接返回,否则调用dict()方法
|
|
|
if isinstance(result, dict):
|
|
if isinstance(result, dict):
|