| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- """
- 总结节点
- 对图像解析结果进行总结提要,并保存到文件。
- """
- import os
- from typing import Dict, Any, List, Optional
- from src.datasets.parser.core.base import BaseNode, BaseState
- from src.datasets.parser.core.registry import register_node
- from src.conf.settings import model_settings
- from src.model.qwen_vl import QWenVLParser
- from src.common.logging_config import get_logger
- logger = get_logger(__name__)
- @register_node()
- class SummaryNode(BaseNode):
- """
- 总结节点
-
- 对图像解析结果进行总结提要,并将结果保存到文件。
- """
-
- def __init__(self, dimension_id: int = 0, model_name: Optional[str] = None):
- """
- 初始化总结节点
-
- Args:
- dimension_id: 维度ID,用于生成文件名
- model_name: 模型名称
- """
- self.dimension_id = dimension_id
- self.model_name = model_name or model_settings.chat_model_name
- # 总结提示模板
- self.summary_prompt = """
- 你是一位专业的内容总结专家,擅长从长篇文本中提取核心内容并生成简洁的总结。
-
- 请对以下内容进行总结,要求:
- 1. 提炼核心观点和关键信息
- 2. 保持内容的完整性和准确性
- 3. 使用清晰、连贯的语言
- 4. 总结长度适中,不要过于冗长
-
- 内容:
- {content}
-
- 总结:
- """
-
- @property
- def name(self) -> str:
- return f"summary_node"
-
- def _summarize_content(self, content: str) -> str:
- """
- 使用模型对内容进行总结
-
- Args:
- content: 待总结的内容
-
- Returns:
- str: 总结结果
- """
- try:
- # 构建提示,使用安全的替换方式
- if "{content}" in self.summary_prompt:
- messages = self.summary_prompt.replace("{content}", content)
- else:
- messages = self.summary_prompt
-
- # 调用模型生成总结
- chat_model = QWenVLParser(self.model_name)
- response = chat_model.chat(prompt=messages)
- return response
- except Exception as e:
- logger.error(f"总结内容时出错: {str(e)}")
- return f"总结失败: {str(e)}"
-
- def execute(self, state: BaseState) -> Dict[str, Any]:
- """
- 执行总结操作
-
- Args:
- state: 包含解析结果的状态
-
- Returns:
- 包含总结结果的字典
- """
- # 获取解析结果
- parsed_results = getattr(state, 'parsed_results', [])
- original_filename = getattr(state, 'original_filename', '')
- if not parsed_results:
- logger.warning("没有解析结果可总结")
- return {
- "summary": "",
- "saved_path": "",
- "is_complete": True
- }
-
- # 提取并合并内容
- import json
- content_parts = []
- for result in parsed_results:
- if isinstance(result, dict):
- content = result.get('content', '')
- if content:
- # 处理content:如果是字典,转换为JSON字符串;如果是字符串,直接使用
- if isinstance(content, dict):
- content_parts.append(json.dumps(content, ensure_ascii=False, indent=2))
- else:
- content_parts.append(str(content))
-
- if not content_parts:
- logger.warning("解析结果中没有内容可总结")
- return {
- "summary": "",
- "saved_path": "",
- "is_complete": True
- }
-
- # 合并内容
- combined_content = "\n".join(content_parts)
- logger.info(f"开始总结内容,长度: {len(combined_content)} 字符")
-
- # 生成总结
- summary = self._summarize_content(combined_content)
- logger.info("内容总结完成")
-
- # 确保temp目录存在
- temp_dir = "temp"
- os.makedirs(temp_dir, exist_ok=True)
-
- # 保存总结到文件
- file_path = os.path.join(temp_dir, f"{original_filename}_{self.dimension_id}.md")
- try:
- with open(file_path, 'w', encoding='utf-8') as f:
- f.write(summary)
- logger.info(f"总结已保存到: {file_path}")
- except Exception as e:
- logger.error(f"保存总结文件时出错: {str(e)}")
- file_path = ""
-
- return {
- "summary": summary,
- "saved_path": file_path,
- "is_complete": True
- }
|