3 kuukautta sitten · 3a375c66d2
--- a/.env.example
+++ b/.env.example
@@ -1,8 +1,39 @@
 
				 # 模型配置
			
 
				 MODEL_PROVIDER=openai
			
 
				-MODEL_NAME=qwen3-vl
			
 
				-BASE_URL=https://api.openai.com/v1
			
 
				-API_KEY=your-api-key-here
			
 
				+MODEL_NAME=Qwen/Qwen3-VL-8B-Instruct
			
 
				+BASE_URL=https://api.siliconflow.cn/v1
			
 
				+API_KEY=sk-xvrfniafyxprllrgedsgosdwcmfmrbnrvhhztssqsmnzacfj
			
 
				+DASHSCOPE=sk-bc0f1026a41c4c92beb014be8973e4e2
			
 
				+# embedding模型配置
			
 
				+EMBEDDING_MODEL_NAME=Qwen/Qwen3-Embedding-0.6B
			
 
				+MULTIMODAL_EMBEDDING_MODEL_NAME=qwen2.5-vl-embedding
			
 
				+# RAGFLOW配置
			
 
				+RAGFLOW_API_URL=http://192.168.16.134/
			
 
				+RAGFLOW_API_KEY=ragflow-sPJ06xiUdRrcfDRlOD-GN2gl-U2DLB-PbgNGckUu0KM
			
 
				+DATASET_ID=a0f1aa03ed2c11f08b8f0242c0a85002
			
 
				 
			
 
				 # 应用配置
			
 
				 LOG_LEVEL=INFO
			
 
				+
			
 
				+# 向量数据库配置
			
 
				+# 可选值: es, infinity
			
 
				+VECTOR_DB_TYPE=infinity
			
 
				+
			
 
				+# Infinity向量数据库配置
			
 
				+INFINITY_HOST=192.168.16.134
			
 
				+INFINITY_PORT=23820
			
 
				+INFINITY_SDK_PORT=23817
			
 
				+INFINITY_DATABASE=book_image_db
			
 
				+INFINITY_USER=admin
			
 
				+INFINITY_PASSWORD=admin
			
 
				+INFINITY_TABLE_NAME=book_page_image
			
 
				+
			
 
				+# MinIO配置
			
 
				+MINIO_ENDPOINT=192.168.16.134:9000
			
 
				+MINIO_ACCESS_KEY=ck7I8Esssx6rzZrXQ5uP
			
 
				+MINIO_SECRET_KEY=8Hz5o2WXNuQJPDMLyBiUQpbefhTWYzYnm5ToBLSb
			
 
				+MINIO_BUCKET_NAME=bookpage
			
 
				+# 本地测试设为false；生产环境设为true
			
 
				+MINIO_SECURE=False
			
 
				+
			
 
				+
			
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,4 @@
 
				+_pycache_/
			
 
				+.infinity_client/
			
 
				+.trae/
			
 
				+.model_output/
			
--- a/__pycache__/main.cpython-312.pyc
+++ b/__pycache__/main.cpython-312.pyc
--- a/api/__pycache__/__init__.cpython-312.pyc
+++ b/api/__pycache__/__init__.cpython-312.pyc
--- a/api/__pycache__/hybrid_search_http.cpython-312.pyc
+++ b/api/__pycache__/hybrid_search_http.cpython-312.pyc
--- a/api/__pycache__/search_infinity.cpython-312.pyc
+++ b/api/__pycache__/search_infinity.cpython-312.pyc
--- a/api/db/services/__init__.py
+++ b/api/db/services/__init__.py
@@ -0,0 +1,3 @@
 
				+# from api.db.services.infinity_search_service import InfinitySearchService
			
 
				+
			
 
				+# search_service = InfinitySearchService()
			
--- a/api/db/services/__pycache__/__init__.cpython-312.pyc
+++ b/api/db/services/__pycache__/__init__.cpython-312.pyc
--- a/api/db/services/__pycache__/infinity_search_service.cpython-312.pyc
+++ b/api/db/services/__pycache__/infinity_search_service.cpython-312.pyc
--- a/api/db/services/infinity_search_service.py
+++ b/api/db/services/infinity_search_service.py
@@ -0,0 +1,150 @@
 
				+from typing import Dict, Any, List
			
 
				+from conf.config import VectorDBConfig
			
 
				+from utils.infinity import InfinityClient
			
 
				+from utils.file.image_util import image_util
			
 
				+from model.multimodal_embedding import get_embedding_model
			
 
				+
			
 
				+
			
 
				+def convert_to_basic_types(obj: Any) -> Any:
			
 
				+    """
			
 
				+    递归将对象转换为基本类型，以便Pydantic能够序列化
			
 
				+    
			
 
				+    特殊处理：当字典中的子项包含相同长度的数组时，将其转换为数组对象结构
			
 
				+    例如：{"a": [1,2], "b": [3,4]} -> [{"a":1, "b":3}, {"a":2, "b":4}]
			
 
				+    
			
 
				+    Args:
			
 
				+        obj: 要转换的对象
			
 
				+    
			
 
				+    Returns:
			
 
				+        转换后的基本类型对象
			
 
				+    """
			
 
				+    if obj is None:
			
 
				+        return None
			
 
				+    elif isinstance(obj, (str, int, float, bool)):
			
 
				+        return obj
			
 
				+    elif isinstance(obj, dict):
			
 
				+        # 先递归转换所有值
			
 
				+        converted = {k: convert_to_basic_types(v) for k, v in obj.items()}
			
 
				+        
			
 
				+        # 检查是否需要转换为数组对象结构
			
 
				+        # 条件：所有值都是列表，且长度一致，且长度大于0
			
 
				+        values = list(converted.values())
			
 
				+        if all(isinstance(v, list) for v in values):
			
 
				+            lengths = [len(v) for v in values]
			
 
				+            if len(set(lengths)) == 1 and lengths[0] > 0:
			
 
				+                # 转换为数组对象结构
			
 
				+                result = []
			
 
				+                keys = list(converted.keys())
			
 
				+                for i in range(lengths[0]):
			
 
				+                    item = {}
			
 
				+                    for key in keys:
			
 
				+                        # 处理数组中可能存在的None值
			
 
				+                        if i < len(converted[key]):
			
 
				+                            item[key] = converted[key][i]
			
 
				+                        else:
			
 
				+                            item[key] = None
			
 
				+                    result.append(item)
			
 
				+                return result
			
 
				+        
			
 
				+        return converted
			
 
				+    elif isinstance(obj, (list, tuple)):
			
 
				+        return [convert_to_basic_types(item) for item in obj]
			
 
				+    else:
			
 
				+        # 对于其他类型，尝试将其转换为字符串或字典
			
 
				+        try:
			
 
				+            return dict(obj)
			
 
				+        except:
			
 
				+            return str(obj)
			
 
				+
			
 
				+
			
 
				+class InfinitySearchService:
			
 
				+    def __init__(self, infinity_client: InfinityClient, vector_field: str = None, match_field: str = None, match_type: str = None, table_name: str = None):
			
 
				+        self.infinity_client = infinity_client
			
 
				+        # 输出字段
			
 
				+        self.output_fields = [
			
 
				+                    "file_name",
			
 
				+                    "page_number",
			
 
				+                    "content",
			
 
				+                    "image_path",
			
 
				+                    "dataset_id",
			
 
				+                    "document_id"
			
 
				+                ]
			
 
				+        self.vector_field = vector_field or "dense_vector_1024"
			
 
				+        self.match_field = match_field or "content"
			
 
				+        self.match_type = match_type or "cosine"
			
 
				+        self.table_name = table_name or VectorDBConfig.get_infinity_table_name()
			
 
				+
			
 
				+    def search(self, search_query: Dict[str, Any]) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        执行Infinity数据库搜索
			
 
				+    
			
 
				+        Args:
			
 
				+            search_query: 搜索查询参数
			
 
				+        
			
 
				+        Returns:
			
 
				+            搜索结果，转换为基本类型以便序列化
			
 
				+        """
			
 
				+        try:
			
 
				+            # 执行搜索
			
 
				+            result = self.infinity_client.search(self.table_name, self.output_fields, search_query)
			
 
				+            # 将结果转换为基本类型，处理可能的复杂类型
			
 
				+            result_dict = result.to_result()
			
 
				+            # 递归转换所有复杂类型为基本类型
			
 
				+            return convert_to_basic_types(result_dict)
			
 
				+        except Exception as e:
			
 
				+            raise Exception(f"搜索失败: {str(e)}")
			
 
				+
			
 
				+    def vector_search(self, search_query: Dict[str, Any]):
			
 
				+        """
			
 
				+        执行Infinity数据库向量检索
			
 
				+    
			
 
				+        Args:
			
 
				+            search_query: 向量检索查询参数
			
 
				+        
			
 
				+        Returns:
			
 
				+            向量检索结果，转换为基本类型以便序列化
			
 
				+        """
			
 
				+        try:
			
 
				+            # 1.处理image_url为image: Image.Image
			
 
				+            image = image_util._url_to_image(search_query["image_url"])
			
 
				+            # 2.将图片进行向量化
			
 
				+            query_vector = get_embedding_model().get_multimodal_embedding(search_query["matching_text"], image)
			
 
				+
			
 
				+            search_query["vector_field"] = self.vector_field
			
 
				+            search_query["query_vector"] = query_vector
			
 
				+            # 执行向量检索
			
 
				+            result = self.infinity_client.vector_search(self.table_name, self.output_fields, search_query)
			
 
				+            # 将结果转换为基本类型，处理可能的复杂类型
			
 
				+            result_dict = result.to_result()
			
 
				+            # 递归转换所有复杂类型为基本类型
			
 
				+            return convert_to_basic_types(result_dict)
			
 
				+        except Exception as e:
			
 
				+            raise Exception(f"向量检索失败: {str(e)}")
			
 
				+
			
 
				+    def hybrid_search(self, search_query: Dict[str, Any]):
			
 
				+        """
			
 
				+        执行Infinity数据库混合检索
			
 
				+    
			
 
				+        Args:
			
 
				+            search_query: 混合检索查询参数
			
 
				+        
			
 
				+        Returns:
			
 
				+            混合检索结果，转换为基本类型以便序列化
			
 
				+        """
			
 
				+        try:
			
 
				+            # 1.处理image_url为image: Image.Image
			
 
				+            image = image_util._url_to_image(search_query["image_url"])
			
 
				+            # 2.将图片进行向量化
			
 
				+            query_vector = get_embedding_model().get_multimodal_embedding(search_query["matching_text"], image)
			
 
				+            search_query["vector_field"] = self.vector_field
			
 
				+            search_query["query_vector"] = query_vector
			
 
				+            search_query["match_field"] = self.match_field
			
 
				+            # 执行混合检索
			
 
				+            result = self.infinity_client.hybrid_search(self.table_name, self.output_fields, search_query)
			
 
				+            # 将结果转换为基本类型，处理可能的复杂类型
			
 
				+            result_dict = result.to_result()
			
 
				+            # 递归转换所有复杂类型为基本类型
			
 
				+            return convert_to_basic_types(result_dict)
			
 
				+        except Exception as e:
			
 
				+            raise Exception(f"混合检索失败: {str(e)}")
			
 
				+
			
--- a/api/hybrid_search_http.py
+++ b/api/hybrid_search_http.py
@@ -1,136 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-"""
			
 
				-混合检索HTTP服务
			
 
				-使用FastAPI框架实现，提供混合检索的HTTP POST接口
			
 
				-"""
			
 
				-
			
 
				-import sys
			
 
				-import os
			
 
				-import requests
			
 
				-from io import BytesIO
			
 
				-from typing import List, Dict, Any
			
 
				-from fastapi import FastAPI, HTTPException, Body
			
 
				-from pydantic import BaseModel
			
 
				-from PIL import Image
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-from utils.infinity_util import InfinityVectorDB
			
 
				-from model.multimodal_embedding import Embedding
			
 
				-from conf.config import ModelConfig, VectorDBConfig
			
 
				-
			
 
				-# 初始化FastAPI应用
			
 
				-app = FastAPI(
			
 
				-    title="混合检索HTTP服务",
			
 
				-    description="提供混合检索的HTTP POST接口",
			
 
				-    version="1.0.0"
			
 
				-)
			
 
				-
			
 
				-# 初始化向量数据库
			
 
				-vector_db = InfinityVectorDB()
			
 
				-
			
 
				-# 初始化多模态嵌入模型
			
 
				-embedding_model = Embedding(
			
 
				-    model_name=ModelConfig.get_multimodal_embedding_model_name(),
			
 
				-    api_key=ModelConfig.get_dashscope_api_key()
			
 
				-)
			
 
				-
			
 
				-# 定义请求模型
			
 
				-class HybridSearchRequest(BaseModel):
			
 
				-    """混合检索请求模型"""
			
 
				-    text_query: str
			
 
				-    image: str
			
 
				-    topn: int = 2
			
 
				-
			
 
				-# 定义响应模型
			
 
				-class HybridSearchResponse(BaseModel):
			
 
				-    """混合检索响应模型"""
			
 
				-    success: bool
			
 
				-    message: str
			
 
				-    output: List[Dict[str, Any]] = []
			
 
				-    total: int = 0
			
 
				-
			
 
				-@app.post("/hybrid_search", response_model=HybridSearchResponse)
			
 
				-def hybrid_search(request: HybridSearchRequest = Body(...)):
			
 
				-    """
			
 
				-    混合检索API
			
 
				-    使用文本查询和向量查询进行混合检索
			
 
				-    
			
 
				-    请求参数：
			
 
				-    - text_query: 文本查询
			
 
				-    - image: 图片URL
			
 
				-    - topn: 返回结果数量，默认2
			
 
				-    
			
 
				-    返回结果：
			
 
				-    - success: 是否成功
			
 
				-    - message: 结果消息
			
 
				-    - output: 检索结果列表
			
 
				-    - total: 总命中数
			
 
				-    """
			
 
				-    try:
			
 
				-        # 解析请求参数
			
 
				-        text_query = request.text_query
			
 
				-        image_url = request.image
			
 
				-        topn = request.topn
			
 
				-        
			
 
				-        print(f"开始混合检索，数据库: {VectorDBConfig.get_infinity_database}, 知识库id: {ModelConfig.get_dataset_id()}, 文本查询: {text_query}, 返回数量: {topn}")
			
 
				-        
			
 
				-        # 构建索引名称
			
 
				-        index_name = f"{VectorDBConfig.get_infinity_table_name()}" 
			
 
				-        print(f"开始生成多模态嵌入，文本长度: {len(text_query)}")
			
 
				-        
			
 
				-        # 处理image_url为image: Image.Image
			
 
				-        if isinstance(image_url, str):
			
 
				-                # 下载图片
			
 
				-                response = requests.get(image_url)
			
 
				-                response.raise_for_status()  # 检查HTTP状态码
			
 
				-    
			
 
				-                # 将响应内容转换为字节流
			
 
				-                image_bytes = BytesIO(response.content)
			
 
				-    
			
 
				-                # 创建Image对象
			
 
				-                image = Image.open(image_bytes)
			
 
				-        
			
 
				-        # 生成多模态嵌入向量
			
 
				-        embedding = embedding_model.get_multimodal_embedding(text_query, image)
			
 
				-        
			
 
				-        print(f"多模态嵌入生成完成，向量长度: {len(embedding)}")
			
 
				-        
			
 
				-        # 执行混合检索
			
 
				-        result = vector_db.hybrid_search(
			
 
				-            index_name=index_name,
			
 
				-            match_method="dense",
			
 
				-            vector_field="dense_vector_1024",
			
 
				-            query_vector=embedding,
			
 
				-            element_type="float",
			
 
				-            metric_type="cosine",
			
 
				-            topn=topn,
			
 
				-            text_query=text_query,
			
 
				-            text_field="content"
			
 
				-        )
			
 
				-        
			
 
				-        print(f"混合检索完成，总命中数: {result.get('total', 0)}")
			
 
				-        
			
 
				-        # 返回成功响应
			
 
				-        return HybridSearchResponse(
			
 
				-            success=True,
			
 
				-            message="混合检索成功",
			
 
				-            output=result.get("output", []),
			
 
				-            total=result.get("total", topn)
			
 
				-        )
			
 
				-    except Exception as e:
			
 
				-        print(f"混合检索失败: {str(e)}")
			
 
				-        raise HTTPException(status_code=500, detail=str(e))
			
 
				-
			
 
				-@app.get("/health")
			
 
				-def health_check():
			
 
				-    """健康检查接口"""
			
 
				-    return {
			
 
				-        "status": "ok",
			
 
				-        "message": "混合检索HTTP服务正常运行"
			
 
				-    }
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    import uvicorn
			
 
				-    uvicorn.run(app, host="0.0.0.0", port=18001)
			
--- a/api/hybrid_search_http_example.py
+++ b/api/hybrid_search_http_example.py
@@ -1,55 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-混合检索HTTP服务请求示例
			
 
				-使用Python requests库调用混合检索接口
			
 
				-"""
			
 
				-
			
 
				-import requests
			
 
				-import json
			
 
				-
			
 
				-def hybrid_search_example():
			
 
				-    """
			
 
				-    混合检索接口调用示例
			
 
				-    """
			
 
				-    # 服务地址
			
 
				-    base_url = "http://localhost:18001"
			
 
				-    endpoint = "/hybrid_search"
			
 
				-    url = f"{base_url}{endpoint}"
			
 
				-    
			
 
				-    # 示例1：基本请求（仅文本查询）
			
 
				-    print("示例1：基本请求（仅文本查询）")
			
 
				-    payload1 = {
			
 
				-        "text_query": "这是一个测试文本查询",
			
 
				-        "topn": 2
			
 
				-    }
			
 
				-    
			
 
				-    response1 = requests.post(url, json=payload1)
			
 
				-    print(f"状态码: {response1.status_code}")
			
 
				-    print(f"响应内容: {json.dumps(response1.json(), indent=2, ensure_ascii=False)}")
			
 
				-    
			
 
				-    # 示例2：完整请求（文本+图片）
			
 
				-    print("\n示例2：完整请求（文本+图片）")
			
 
				-    payload2 = {
			
 
				-        "text_query": "这是一个带图片的测试查询",
			
 
				-        "image": "https://example.com/test.jpg",
			
 
				-        "topn": 5
			
 
				-    }
			
 
				-    
			
 
				-    response2 = requests.post(url, json=payload2)
			
 
				-    print(f"状态码: {response2.status_code}")
			
 
				-    print(f"响应内容: {json.dumps(response2.json(), indent=2, ensure_ascii=False)}")
			
 
				-    
			
 
				-    # 示例3：使用默认topn值
			
 
				-    print("\n示例3：使用默认topn值")
			
 
				-    payload3 = {
			
 
				-        "text_query": "这是一个使用默认值的测试",
			
 
				-        "image": "https://example.com/another.jpg"
			
 
				-    }
			
 
				-    
			
 
				-    response3 = requests.post(url, json=payload3)
			
 
				-    print(f"状态码: {response3.status_code}")
			
 
				-    print(f"响应内容: {json.dumps(response3.json(), indent=2, ensure_ascii=False)}")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    hybrid_search_example()
			
--- a/api/hybrid_search_http_example.sh
+++ b/api/hybrid_search_http_example.sh
@@ -1,35 +0,0 @@
 
				-#!/bin/bash
			
 
				-
			
 
				-# 混合检索HTTP服务请求示例
			
 
				-# 服务地址：http://localhost:18001
			
 
				-# 接口路径：/hybrid_search
			
 
				-# 请求方法：POST
			
 
				-# 请求格式：application/json
			
 
				-
			
 
				-# 示例1：基本请求（仅文本查询）
			
 
				-echo "示例1：基本请求（仅文本查询）"
			
 
				-curl -X POST "http://localhost:18001/hybrid_search" \
			
 
				-     -H "Content-Type: application/json" \
			
 
				-     -d '{
			
 
				-           "text_query": "这是一个测试文本查询",
			
 
				-           "topn": 2
			
 
				-         }'
			
 
				-
			
 
				-echo -e "\n\n示例2：完整请求（文本+图片）"
			
 
				-# 示例2：完整请求（文本+图片）
			
 
				-curl -X POST "http://localhost:18001/hybrid_search" \
			
 
				-     -H "Content-Type: application/json" \
			
 
				-     -d '{
			
 
				-           "text_query": "这是一个带图片的测试查询",
			
 
				-           "image": "https://example.com/test.jpg",
			
 
				-           "topn": 5
			
 
				-         }'
			
 
				-
			
 
				-echo -e "\n\n示例3：使用默认topn值"
			
 
				-# 示例3：使用默认topn值（默认值为2）
			
 
				-curl -X POST "http://localhost:18001/hybrid_search" \
			
 
				-     -H "Content-Type: application/json" \
			
 
				-     -d '{
			
 
				-           "text_query": "这是一个使用默认值的测试",
			
 
				-           "image": "https://example.com/another.jpg"
			
 
				-         }'
			
--- a/api/search_infinity.py
+++ b/api/search_infinity.py
@@ -0,0 +1,76 @@
 
				+# Infinity搜索API服务
			
 
				+
			
 
				+from fastapi import FastAPI, HTTPException
			
 
				+from typing import List, Dict, Any, Optional
			
 
				+from api.db.services.infinity_search_service import InfinitySearchService
			
 
				+from utils.infinity import get_client
			
 
				+
			
 
				+
			
 
				+# 创建FastAPI应用
			
 
				+
			
 
				+app = FastAPI(
			
 
				+    title="Infinity Search API",
			
 
				+    description="基于Infinity向量数据库的搜索API服务",
			
 
				+    version="1.0.0"
			
 
				+)
			
 
				+
			
 
				+# 请求模型
			
 
				+from pydantic import BaseModel
			
 
				+
			
 
				+class SearchRequest(BaseModel):
			
 
				+    """搜索请求模型"""
			
 
				+    search_query: Dict[str, Any]
			
 
				+
			
 
				+# 1. 普通搜索接口
			
 
				+@app.post("/text", response_model=Dict[str, Any])
			
 
				+def search(request: SearchRequest):
			
 
				+    """
			
 
				+    普通搜索接口
			
 
				+    
			
 
				+    - **table_name**: 表名
			
 
				+    - **output_fields**: 要返回的字段列表
			
 
				+    - **query**: 查询条件，包含field、query和topn字段
			
 
				+    - **database_name**: 数据库名称（可选，默认使用客户端配置的数据库）
			
 
				+    """
			
 
				+    try:
			
 
				+        search_service = InfinitySearchService(infinity_client=get_client())
			
 
				+        result = search_service.search(request.search_query)
			
 
				+        return {"success": True, "result": result}
			
 
				+    except Exception as e:
			
 
				+        raise HTTPException(status_code=500, detail=f"搜索失败: {str(e)}")
			
 
				+
			
 
				+# 2. 向量搜索接口
			
 
				+@app.post("/vector", response_model=Dict[str, Any])
			
 
				+def vector_search(request: SearchRequest):
			
 
				+    """
			
 
				+    向量搜索接口
			
 
				+    
			
 
				+    - **table_name**: 表名
			
 
				+    - **output_fields**: 要返回的字段列表
			
 
				+    - **query**: 查询条件，包含vector_field、query_vector和topn字段
			
 
				+    - **database_name**: 数据库名称（可选，默认使用客户端配置的数据库）
			
 
				+    """
			
 
				+    try:
			
 
				+        search_service = InfinitySearchService(infinity_client=get_client())
			
 
				+        result = search_service.vector_search(request.search_query)
			
 
				+        return {"success": True, "result": result}
			
 
				+    except Exception as e:
			
 
				+        raise HTTPException(status_code=500, detail=f"向量搜索失败: {str(e)}")
			
 
				+
			
 
				+# 3. 混合搜索接口
			
 
				+@app.post("/hybrid", response_model=Dict[str, Any])
			
 
				+def hybrid_search(request: SearchRequest):
			
 
				+    """
			
 
				+    混合搜索接口
			
 
				+    
			
 
				+    - **table_name**: 表名
			
 
				+    - **output_fields**: 要返回的字段列表
			
 
				+    - **query**: 查询条件，包含vector_field、query_vector、field、query、topn和fusion_weight字段
			
 
				+    - **database_name**: 数据库名称（可选，默认使用客户端配置的数据库）
			
 
				+    """
			
 
				+    try:
			
 
				+        search_service = InfinitySearchService(infinity_client=get_client())
			
 
				+        result = search_service.hybrid_search(request.search_query)
			
 
				+        return {"success": True, "result": result}
			
 
				+    except Exception as e:
			
 
				+        raise HTTPException(status_code=500, detail=f"混合搜索失败: {str(e)}")
			
--- a/doc/README.md
+++ b/doc/README.md
@@ -1,252 +0,0 @@
 
				-# Ragflow_plugs 项目文档
			
 
				-
			
 
				-## 1. 项目概述
			
 
				-
			
 
				-Ragflow_plugs是一个基于RAG（检索增强生成）技术的多模态混合检索系统，支持文本和图像的联合检索，并提供灵活的HTTP API服务。该系统旨在为智能应用提供高效、准确的多模态信息检索能力。
			
 
				-
			
 
				-## 2. 目录结构
			
 
				-
			
 
				-```
			
 
				-├── agent/             # 智能代理模块
			
 
				-├── api/               # HTTP API服务模块
			
 
				-├── book/              # 示例文档和输出目录
			
 
				-├── conf/              # 配置文件
			
 
				-├── doc/               # 项目文档
			
 
				-├── model/             # 模型相关模块
			
 
				-├── parser/            # 文档解析模块
			
 
				-├── test/              # 测试文件
			
 
				-├── utils/             # 工具模块
			
 
				-├── workflow/          # 工作流管理模块
			
 
				-├── .env               # 环境变量配置
			
 
				-├── .env.example       # 环境变量示例
			
 
				-├── requirements.txt   # 项目依赖
			
 
				-└── __init__.py        # 项目初始化
			
 
				-```
			
 
				-
			
 
				-## 3. 核心功能
			
 
				-
			
 
				-### 3.1 多模态嵌入生成
			
 
				-
			
 
				-- 支持文本和图像的联合嵌入生成
			
 
				-- 兼容多种AI模型API
			
 
				-- 提供统一的嵌入接口
			
 
				-
			
 
				-### 3.2 混合检索
			
 
				-
			
 
				-- 结合文本检索和向量检索
			
 
				-- 支持灵活的检索参数配置
			
 
				-- 提供准确的检索结果
			
 
				-
			
 
				-### 3.3 HTTP API服务
			
 
				-
			
 
				-- 提供RESTful API
			
 
				-- 支持请求参数验证
			
 
				-- 提供统一的响应格式
			
 
				-
			
 
				-### 3.4 文档处理
			
 
				-
			
 
				-- 支持PDF文档解析
			
 
				-- 支持图像提取和处理
			
 
				-
			
 
				-### 3.5 向量数据库集成
			
 
				-
			
 
				-- 与Infinity向量数据库无缝集成
			
 
				-- 支持向量存储和检索
			
 
				-
			
 
				-## 4. 快速开始
			
 
				-
			
 
				-### 4.1 环境准备
			
 
				-
			
 
				-1. 安装Python 3.12
			
 
				-2. 安装依赖：
			
 
				-
			
 
				-```bash
			
 
				-pip install -r requirements.txt
			
 
				-```
			
 
				-
			
 
				-3. 配置环境变量：
			
 
				-
			
 
				-```bash
			
 
				-cp .env.example .env
			
 
				-# 编辑.env文件，配置API密钥和其他参数
			
 
				-```
			
 
				-
			
 
				-### 4.2 运行HTTP服务
			
 
				-
			
 
				-```bash
			
 
				-python -m api.hybrid_search_http
			
 
				-```
			
 
				-
			
 
				-服务将在 `http://0.0.0.0:18001` 上运行。
			
 
				-
			
 
				-### 4.3 使用API
			
 
				-
			
 
				-#### 混合检索API
			
 
				-
			
 
				-**请求URL**：`/hybrid_search`
			
 
				-
			
 
				-**请求方法**：POST
			
 
				-
			
 
				-**请求体**：
			
 
				-
			
 
				-```json
			
 
				-{
			
 
				-    "text_query": "文本查询",
			
 
				-    "image": "图片URL或base64编码",
			
 
				-    "topn": 2
			
 
				-}
			
 
				-```
			
 
				-
			
 
				-**响应示例**：
			
 
				-
			
 
				-```json
			
 
				-{
			
 
				-    "success": true,
			
 
				-    "message": "混合检索成功",
			
 
				-    "output": [
			
 
				-        {
			
 
				-            "file_name": "文件名",
			
 
				-            "page_number": 1,
			
 
				-            "content": "内容",
			
 
				-            "image_path": "图片路径",
			
 
				-            "dataset_id": "数据集ID",
			
 
				-            "document_id": "文档ID",
			
 
				-            "_similarity": 0.95
			
 
				-        }
			
 
				-    ],
			
 
				-    "total": 1
			
 
				-}
			
 
				-```
			
 
				-
			
 
				-#### 健康检查API
			
 
				-
			
 
				-**请求URL**：`/health`
			
 
				-
			
 
				-**请求方法**：GET
			
 
				-
			
 
				-**响应示例**：
			
 
				-
			
 
				-```json
			
 
				-{
			
 
				-    "status": "ok",
			
 
				-    "message": "混合检索HTTP服务正常运行"
			
 
				-}
			
 
				-```
			
 
				-
			
 
				-## 5. 配置说明
			
 
				-
			
 
				-### 5.1 模型配置
			
 
				-
			
 
				-| 配置项 | 说明 | 默认值 |
			
 
				-|--------|------|--------|
			
 
				-| MULTIMODAL_EMBEDDING_MODEL_NAME | 多模态嵌入模型名称 | qwen-vl-plus |
			
 
				-| DASHSCOPE_API_KEY | DashScope API密钥 | - |
			
 
				-| SILICONFLOW_API_KEY | SiliconFlow API密钥 | - |
			
 
				-
			
 
				-### 5.2 向量数据库配置
			
 
				-
			
 
				-| 配置项 | 说明 | 默认值 |
			
 
				-|--------|------|--------|
			
 
				-| INFINITY_HOST | Infinity数据库地址 | http://localhost:23820 |
			
 
				-| INFINITY_DATABASE | Infinity数据库名称 | image_db |
			
 
				-| INFINITY_TABLE_NAME | Infinity表名称 | - |
			
 
				-
			
 
				-## 6. 开发指南
			
 
				-
			
 
				-### 6.1 代码风格
			
 
				-
			
 
				-- 遵循PEP 8代码风格
			
 
				-- 使用类型注解
			
 
				-- 编写清晰的文档字符串
			
 
				-
			
 
				-### 6.2 测试
			
 
				-
			
 
				-- 运行单元测试：
			
 
				-
			
 
				-```bash
			
 
				-python -m pytest test/ -v
			
 
				-```
			
 
				-
			
 
				-- 运行特定测试：
			
 
				-
			
 
				-```bash
			
 
				-python -m pytest test/test_http_hybrid_search.py -v
			
 
				-```
			
 
				-
			
 
				-### 6.3 添加新功能
			
 
				-
			
 
				-1. 在相应的模块目录下创建新文件
			
 
				-2. 实现新功能
			
 
				-3. 编写测试用例
			
 
				-4. 更新文档
			
 
				-
			
 
				-## 7. 示例代码
			
 
				-
			
 
				-### 7.1 使用混合检索
			
 
				-
			
 
				-```python
			
 
				-from utils.infinity_util import InfinityVectorDB
			
 
				-from model.multimodal_embedding import Embedding
			
 
				-from conf.config import ModelConfig
			
 
				-
			
 
				-# 初始化向量数据库
			
 
				-vector_db = InfinityVectorDB()
			
 
				-
			
 
				-# 初始化嵌入模型
			
 
				-embedding_model = Embedding(
			
 
				-    model_name=ModelConfig.get_multimodal_embedding_model_name(),
			
 
				-    api_key=ModelConfig.get_dashscope_api_key()
			
 
				-)
			
 
				-
			
 
				-# 生成多模态嵌入
			
 
				-text_query = "文本查询"
			
 
				-image = Image.open("image.jpg")
			
 
				-embedding = embedding_model.get_multimodal_embedding(text_query, image)
			
 
				-
			
 
				-# 执行混合检索
			
 
				-result = vector_db.hybrid_search(
			
 
				-    index_name="index_name",
			
 
				-    match_method="dense",
			
 
				-    vector_field="vector_field",
			
 
				-    query_vector=embedding,
			
 
				-    element_type="float",
			
 
				-    metric_type="cosine",
			
 
				-    topn=2,
			
 
				-    text_query=text_query,
			
 
				-    text_field="content"
			
 
				-)
			
 
				-
			
 
				-print(result)
			
 
				-```
			
 
				-
			
 
				-## 8. 常见问题
			
 
				-
			
 
				-### 8.1 嵌入生成失败
			
 
				-
			
 
				-- 检查API密钥是否正确
			
 
				-- 检查网络连接
			
 
				-- 检查模型名称是否正确
			
 
				-
			
 
				-### 8.2 检索结果不准确
			
 
				-
			
 
				-- 检查向量数据库配置
			
 
				-- 检查嵌入模型配置
			
 
				-- 调整检索参数
			
 
				-
			
 
				-### 8.3 HTTP服务无法启动
			
 
				-
			
 
				-- 检查端口是否被占用
			
 
				-- 检查配置文件是否正确
			
 
				-- 检查依赖是否安装完整
			
 
				-
			
 
				-## 9. 联系方式
			
 
				-
			
 
				-如有问题或建议，请联系项目负责人。
			
 
				-
			
 
				-## 10. 版本历史
			
 
				-
			
 
				-- v1.0.0：初始版本，支持多模态混合检索和HTTP API服务
			
 
				-
			
 
				-## 11. 许可证
			
 
				-
			
 
				-本项目采用MIT许可证。
			
--- a/doc/design.md
+++ b/doc/design.md
@@ -1,446 +0,0 @@
 
				-# Ragflow_plugs 项目设计文档
			
 
				-
			
 
				-## 1. 项目概述
			
 
				-
			
 
				-Ragflow_plugs是一个基于RAG（检索增强生成）技术的多模态混合检索系统，支持文本和图像的联合检索，并提供灵活的HTTP API服务。该系统旨在为智能应用提供高效、准确的多模态信息检索能力。
			
 
				-
			
 
				-### 1.1 核心功能
			
 
				-
			
 
				-- **多模态嵌入生成**：支持文本和图像的联合嵌入生成
			
 
				-- **混合检索**：结合文本检索和向量检索，提供更准确的检索结果
			
 
				-- **HTTP API服务**：提供RESTful API，方便外部系统集成
			
 
				-- **文档处理**：支持PDF文档解析、图像提取和处理
			
 
				-- **向量数据库集成**：与Infinity向量数据库无缝集成
			
 
				-- **灵活配置**：支持多环境配置，便于部署和管理
			
 
				-
			
 
				-### 1.2 应用场景
			
 
				-
			
 
				-- 智能问答系统
			
 
				-- 图像搜索和内容推荐
			
 
				-- 多模态内容管理系统
			
 
				-- 教育资源检索
			
 
				-- 儿童绘本智能分析
			
 
				-
			
 
				-## 2. 设计思路
			
 
				-
			
 
				-### 2.1 架构设计原则
			
 
				-
			
 
				-- **模块化设计**：将系统拆分为多个独立模块，便于维护和扩展
			
 
				-- **松耦合**：模块之间通过明确的接口进行通信，降低依赖关系
			
 
				-- **可扩展性**：支持多种模型、多种向量数据库的扩展
			
 
				-- **高可用性**：设计合理的错误处理和重试机制
			
 
				-- **性能优化**：针对检索和嵌入生成进行性能优化
			
 
				-
			
 
				-### 2.2 核心设计理念
			
 
				-
			
 
				-- **多模态融合**：将文本和图像信息融合为统一的向量表示
			
 
				-- **检索增强生成**：先检索相关信息，再结合大模型生成高质量回答
			
 
				-- **分层设计**：分为数据层、服务层、API层，各层职责明确
			
 
				-- **配置驱动**：通过配置文件灵活调整系统行为
			
 
				-
			
 
				-## 3. 系统架构
			
 
				-
			
 
				-### 3.1 整体架构
			
 
				-
			
 
				-```
			
 
				-┌───────────────────────────────────────────────────────────────────┐
			
 
				-│                      Client Applications                          │
			
 
				-└───────────────────────────────────────────────────────────────────┘
			
 
				-                               │
			
 
				-                               ▼
			
 
				-┌───────────────────────────────────────────────────────────────────┐
			
 
				-│                          API Layer                               │
			
 
				-│  ┌─────────────────────────────────────────────────────────────┐  │
			
 
				-│  │                     hybrid_search_http.py                   │  │
			
 
				-│  └─────────────────────────────────────────────────────────────┘  │
			
 
				-│  ┌─────────────────────────────────────────────────────────────┐  │
			
 
				-│  │                    hybrid_search_mcp.py                    │  │
			
 
				-│  └─────────────────────────────────────────────────────────────┘  │
			
 
				-└───────────────────────────────────────────────────────────────────┘
			
 
				-                               │
			
 
				-                               ▼
			
 
				-┌───────────────────────────────────────────────────────────────────┐
			
 
				-│                         Service Layer                            │
			
 
				-│  ┌─────────────────────────────────────────────────────────────┐  │
			
 
				-│  │                      Agent Module                           │  │
			
 
				-│  │  ┌───────────────────────────────────────────────────────┐  │  │
			
 
				-│  │  │                  test_image_agent.py                  │  │  │
			
 
				-│  │  └───────────────────────────────────────────────────────┘  │  │
			
 
				-│  └─────────────────────────────────────────────────────────────┘  │
			
 
				-│  ┌─────────────────────────────────────────────────────────────┐  │
			
 
				-│  │                     Workflow Module                        │  │
			
 
				-│  │  ┌───────────────────────────────────────────────────────┐  │  │
			
 
				-│  │  │               image_parsing_workflow.py               │  │  │
			
 
				-│  │  └───────────────────────────────────────────────────────┘  │  │
			
 
				-│  └─────────────────────────────────────────────────────────────┘  │
			
 
				-└───────────────────────────────────────────────────────────────────┘
			
 
				-                               │
			
 
				-                               ▼
			
 
				-┌───────────────────────────────────────────────────────────────────┐
			
 
				-│                         Model Layer                              │
			
 
				-│  ┌─────────────────────────────────────────────────────────────┐  │
			
 
				-│  │                 multimodal_embedding.py                    │  │
			
 
				-│  └─────────────────────────────────────────────────────────────┘  │
			
 
				-│  ┌─────────────────────────────────────────────────────────────┐  │
			
 
				-│  │                         qwen_vl.py                         │  │
			
 
				-│  └─────────────────────────────────────────────────────────────┘  │
			
 
				-└───────────────────────────────────────────────────────────────────┘
			
 
				-                               │
			
 
				-                               ▼
			
 
				-┌───────────────────────────────────────────────────────────────────┐
			
 
				-│                         Data Layer                               │
			
 
				-│  ┌─────────────────────────────────────────────────────────────┐  │
			
 
				-│  │                      Parser Module                         │  │
			
 
				-│  │  ┌───────────────────────────────────────────────────────┐  │  │
			
 
				-│  │  │                      pdf_parser                       │  │  │
			
 
				-│  │  └───────────────────────────────────────────────────────┘  │  │
			
 
				-│  └─────────────────────────────────────────────────────────────┘  │
			
 
				-│  ┌─────────────────────────────────────────────────────────────┐  │
			
 
				-│  │                      Utils Module                          │  │
			
 
				-│  │  ┌───────────────────────────────────────────────────────┐  │  │
			
 
				-│  │  │                      http_client.py                   │  │  │
			
 
				-│  │  └───────────────────────────────────────────────────────┘  │  │
			
 
				-│  │  ┌───────────────────────────────────────────────────────┐  │  │
			
 
				-│  │  │                    infinity_util                      │  │  │
			
 
				-│  │  └───────────────────────────────────────────────────────┘  │  │
			
 
				-│  │  └───────────────────────────────────────────────────────┘  │  │
			
 
				-│  └─────────────────────────────────────────────────────────────┘  │
			
 
				-└───────────────────────────────────────────────────────────────────┘
			
 
				-                               │
			
 
				-                               ▼
			
 
				-┌───────────────────────────────────────────────────────────────────┐
			
 
				-│                       External Services                          │
			
 
				-│  ┌─────────────────────────────────────────────────────────────┐  │
			
 
				-│  │                    Infinity Vector DB                      │  │
			
 
				-│  └─────────────────────────────────────────────────────────────┘  │
			
 
				-│  ┌─────────────────────────────────────────────────────────────┐  │
			
 
				-│  │                 AI Model APIs (DashScope, etc.)            │  │
			
 
				-│  └─────────────────────────────────────────────────────────────┘  │
			
 
				-└───────────────────────────────────────────────────────────────────┘
			
 
				-```
			
 
				-
			
 
				-### 3.2 核心模块关系
			
 
				-
			
 
				-#### 3.2.1 多模态嵌入模块
			
 
				-
			
 
				-- **功能**：生成文本和图像的联合嵌入向量
			
 
				-- **依赖**：AI模型API（如DashScope、SiliconFlow等）
			
 
				-- **调用关系**：被hybrid_search_http.py和agent模块调用
			
 
				-
			
 
				-#### 3.2.2 混合检索模块
			
 
				-
			
 
				-- **功能**：结合文本检索和向量检索，返回综合结果
			
 
				-- **依赖**：infinity_util、vector_db等工具模块
			
 
				-- **调用关系**：被HTTP API模块和agent模块调用
			
 
				-
			
 
				-#### 3.2.3 HTTP API模块
			
 
				-
			
 
				-- **功能**：提供RESTful API服务
			
 
				-- **依赖**：FastAPI框架、混合检索模块
			
 
				-- **调用关系**：被外部客户端调用
			
 
				-
			
 
				-#### 3.2.4 Agent模块
			
 
				-
			
 
				-- **功能**：提供智能代理功能，结合检索结果生成回答
			
 
				-- **依赖**：混合检索模块、LLM模型
			
 
				-- **调用关系**：可被外部系统直接调用
			
 
				-
			
 
				-#### 3.2.5 Workflow模块
			
 
				-
			
 
				-- **功能**：管理系统工作流，如文档处理、图像分析等
			
 
				-- **依赖**：Parser模块、Model模块
			
 
				-- **调用关系**：被外部系统或定时任务调用
			
 
				-
			
 
				-## 4. 核心模块设计
			
 
				-
			
 
				-### 4.1 多模态嵌入模块
			
 
				-
			
 
				-#### 4.1.1 设计目标
			
 
				-
			
 
				-- 支持文本和图像的联合嵌入生成
			
 
				-- 兼容多种AI模型API
			
 
				-- 提供统一的嵌入接口
			
 
				-- 支持配置不同的模型参数
			
 
				-
			
 
				-#### 4.1.2 核心类与方法
			
 
				-
			
 
				-```python
			
 
				-class Embedding:
			
 
				-    def __init__(self, model_name: str, api_key: str):
			
 
				-        # 初始化嵌入模型
			
 
				-        pass
			
 
				-    
			
 
				-    def get_multimodal_embedding(self, text: str, image: Image.Image) -> List[float]:
			
 
				-        # 生成多模态嵌入向量
			
 
				-        pass
			
 
				-```
			
 
				-
			
 
				-#### 4.1.3 支持的模型
			
 
				-
			
 
				-- Qwen VL（通过DashScope API）
			
 
				-- 其他多模态模型（可扩展）
			
 
				-
			
 
				-### 4.2 混合检索模块
			
 
				-
			
 
				-#### 4.2.1 设计目标
			
 
				-
			
 
				-- 结合文本检索和向量检索
			
 
				-- 支持灵活的检索参数配置
			
 
				-- 提供准确的检索结果
			
 
				-- 支持分页和排序
			
 
				-
			
 
				-#### 4.2.2 核心类与方法
			
 
				-
			
 
				-```python
			
 
				-class InfinityVectorDB:
			
 
				-    def hybrid_search(self, index_name: str, match_method: str, vector_field: str, 
			
 
				-                     query_vector: List[float], element_type: str, metric_type: str,
			
 
				-                     topn: int, text_query: str, text_field: str) -> Dict[str, Any]:
			
 
				-        # 执行混合检索
			
 
				-        pass
			
 
				-    
			
 
				-    def vector_search(self, index_name: str, vector_field: str, vector: List[float], 
			
 
				-                     size: int = 10, filter: Dict[str, Any] = None) -> Dict[str, Any]:
			
 
				-        # 执行向量检索
			
 
				-        pass
			
 
				-```
			
 
				-
			
 
				-#### 4.2.3 检索流程
			
 
				-
			
 
				-1. 接收检索请求，包括文本查询、图像和检索参数
			
 
				-2. 生成多模态嵌入向量
			
 
				-3. 调用Infinity向量数据库进行混合检索
			
 
				-4. 处理检索结果，转换为统一格式
			
 
				-5. 返回检索结果
			
 
				-
			
 
				-### 4.3 HTTP API模块
			
 
				-
			
 
				-#### 4.3.1 设计目标
			
 
				-
			
 
				-- 提供RESTful API接口
			
 
				-- 支持请求参数验证
			
 
				-- 提供统一的响应格式
			
 
				-- 支持错误处理和日志记录
			
 
				-
			
 
				-#### 4.3.2 核心API
			
 
				-
			
 
				-| API路径 | 方法 | 功能 |
			
 
				-|--------|------|------|
			
 
				-| /hybrid_search | POST | 执行混合检索 |
			
 
				-| /health | GET | 健康检查 |
			
 
				-
			
 
				-#### 4.3.3 请求和响应模型
			
 
				-
			
 
				-```python
			
 
				-class HybridSearchRequest(BaseModel):
			
 
				-    text_query: str
			
 
				-    image: str
			
 
				-    topn: int = 2
			
 
				-    
			
 
				-class HybridSearchResponse(BaseModel):
			
 
				-    success: bool
			
 
				-    message: str
			
 
				-    output: List[Dict[str, Any]] = []
			
 
				-    total: int = 0
			
 
				-```
			
 
				-
			
 
				-### 4.4 HTTP客户端模块
			
 
				-
			
 
				-#### 4.4.1 设计目标
			
 
				-
			
 
				-- 提供统一的HTTP请求接口
			
 
				-- 支持重试机制
			
 
				-- 支持不同的HTTP方法
			
 
				-- 支持文件上传
			
 
				-
			
 
				-#### 4.4.2 核心类与方法
			
 
				-
			
 
				-```python
			
 
				-class HTTPClient:
			
 
				-    def post(self, endpoint: str, data: Optional[Dict] = None, 
			
 
				-             json_data: Optional[Dict] = None, files: Optional[Dict] = None,
			
 
				-             headers: Optional[Dict] = None) -> Dict[str, Any]:
			
 
				-        # 发送POST请求
			
 
				-        pass
			
 
				-    
			
 
				-    def get(self, endpoint: str, params: Optional[Dict] = None,
			
 
				-            headers: Optional[Dict] = None) -> Dict[str, Any]:
			
 
				-        # 发送GET请求
			
 
				-        pass
			
 
				-    
			
 
				-    def get_json(self, endpoint: str, json_data: Optional[Dict] = None,
			
 
				-                headers: Optional[Dict] = None) -> Dict[str, Any]:
			
 
				-        # 发送带有JSON数据的GET请求
			
 
				-        pass
			
 
				-    
			
 
				-    def put(self, endpoint: str, data: Optional[Dict] = None, 
			
 
				-            json_data: Optional[Dict] = None, headers: Optional[Dict] = None) -> Dict[str, Any]:
			
 
				-        # 发送PUT请求
			
 
				-        pass
			
 
				-    
			
 
				-    def delete(self, endpoint: str, data: Optional[Dict] = None, 
			
 
				-               json_data: Optional[Dict] = None, headers: Optional[Dict] = None) -> Dict[str, Any]:
			
 
				-        # 发送DELETE请求
			
 
				-        pass
			
 
				-```
			
 
				-
			
 
				-## 5. 数据流程
			
 
				-
			
 
				-### 5.1 多模态检索流程
			
 
				-
			
 
				-```
			
 
				-1. 客户端发送检索请求，包括文本查询和图像URL
			
 
				-2. API层接收请求，解析参数
			
 
				-3. 下载图像，转换为Image对象
			
 
				-4. 调用多模态嵌入模块生成嵌入向量
			
 
				-5. 调用混合检索模块执行检索
			
 
				-6. 处理检索结果，转换为统一格式
			
 
				-7. 返回JSON响应给客户端
			
 
				-```
			
 
				-
			
 
				-### 5.2 文档处理流程
			
 
				-
			
 
				-```
			
 
				-1. 上传PDF文档
			
 
				-2. 解析PDF，提取文本和图像
			
 
				-3. 生成文档的多模态嵌入
			
 
				-4. 将嵌入向量和元数据存储到向量数据库
			
 
				-5. 建立索引，便于后续检索
			
 
				-```
			
 
				-
			
 
				-## 6. 配置管理
			
 
				-
			
 
				-### 6.1 配置文件结构
			
 
				-
			
 
				-```
			
 
				-├── conf/
			
 
				-│   ├── config.py          # 配置管理类
			
 
				-│   ├── infinity_mapping.json  # Infinity数据库映射配置
			
 
				-│   └── __init__.py
			
 
				-└── .env                   # 环境变量配置
			
 
				-```
			
 
				-
			
 
				-### 6.2 配置管理类
			
 
				-
			
 
				-```python
			
 
				-class ModelConfig:
			
 
				-    @staticmethod
			
 
				-    def get_multimodal_embedding_model_name() -> str:
			
 
				-        # 获取多模态嵌入模型名称
			
 
				-        pass
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def get_dashscope_api_key() -> str:
			
 
				-        # 获取DashScope API密钥
			
 
				-        pass
			
 
				-    
			
 
				-    # 其他模型配置方法
			
 
				-    
			
 
				-class VectorDBConfig:
			
 
				-    @staticmethod
			
 
				-    def get_infinity_database() -> str:
			
 
				-        # 获取Infinity数据库名称
			
 
				-        pass
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def get_infinity_table_name() -> str:
			
 
				-        # 获取Infinity表名称
			
 
				-        pass
			
 
				-    
			
 
				-    # 其他向量数据库配置方法
			
 
				-```
			
 
				-
			
 
				-### 6.3 环境变量配置
			
 
				-
			
 
				-```
			
 
				-# 模型API配置
			
 
				-DASHSCOPE_API_KEY=your_api_key
			
 
				-SILICONFLOW_API_KEY=your_api_key
			
 
				-
			
 
				-# 向量数据库配置
			
 
				-INFINITY_HOST=http://localhost:23820
			
 
				-INFINITY_DATABASE=image_db
			
 
				-
			
 
				-# 应用配置
			
 
				-LOG_LEVEL=INFO
			
 
				-```
			
 
				-
			
 
				-## 7. 技术栈
			
 
				-
			
 
				-| 类别 | 技术/框架 | 用途 |
			
 
				-|------|----------|------|
			
 
				-| 编程语言 | Python 3.12 | 主要开发语言 |
			
 
				-| Web框架 | FastAPI | HTTP API服务 |
			
 
				-| HTTP客户端 | Requests | HTTP请求处理 |
			
 
				-| 图像处理 | PIL/Pillow | 图像加载和处理 |
			
 
				-| 向量数据库 | Infinity | 向量存储和检索 |
			
 
				-| LLM集成 | LangChain | 大语言模型集成 |
			
 
				-| AI模型API | DashScope, SiliconFlow | 多模态嵌入和生成 |
			
 
				-| 配置管理 | python-dotenv | 环境变量管理 |
			
 
				-| 测试框架 | pytest | 单元测试和集成测试 |
			
 
				-
			
 
				-## 8. 部署和运行
			
 
				-
			
 
				-### 8.1 依赖安装
			
 
				-
			
 
				-```bash
			
 
				-pip install -r requirements.txt
			
 
				-```
			
 
				-
			
 
				-### 8.2 运行HTTP服务
			
 
				-
			
 
				-```bash
			
 
				-python -m api.hybrid_search_http
			
 
				-```
			
 
				-
			
 
				-### 8.3 测试运行
			
 
				-
			
 
				-```bash
			
 
				-python -m pytest test/
			
 
				-```
			
 
				-
			
 
				-## 9. 测试策略
			
 
				-
			
 
				-### 9.1 单元测试
			
 
				-
			
 
				-- 针对核心模块的单元测试
			
 
				-- 测试覆盖主要功能点
			
 
				-- 使用模拟对象减少外部依赖
			
 
				-
			
 
				-### 9.2 集成测试
			
 
				-
			
 
				-- 测试模块之间的集成
			
 
				-- 测试与外部服务的集成
			
 
				-- 测试完整的业务流程
			
 
				-
			
 
				-### 9.3 性能测试
			
 
				-
			
 
				-- 测试多模态嵌入生成的性能
			
 
				-- 测试混合检索的响应时间
			
 
				-- 测试系统的并发处理能力
			
 
				-
			
 
				-## 10. 扩展和维护
			
 
				-
			
 
				-### 10.1 扩展方向
			
 
				-
			
 
				-- 支持更多的多模态模型
			
 
				-- 支持更多的向量数据库
			
 
				-- 增强文档处理能力，支持更多文档格式
			
 
				-- 添加更多的检索算法和优化策略
			
 
				-- 增强API功能，支持更复杂的检索请求
			
 
				-
			
 
				-### 10.2 维护建议
			
 
				-
			
 
				-- 定期更新依赖库
			
 
				-- 监控系统性能和错误日志
			
 
				-- 定期备份数据
			
 
				-- 进行安全审计和漏洞修复
			
 
				-- 保持文档更新
			
 
				-
			
 
				-## 11. 结论
			
 
				-
			
 
				-Ragflow_plugs是一个功能强大的多模态混合检索系统，具有良好的架构设计和灵活的扩展能力。该系统支持文本和图像的联合检索，提供高效、准确的检索结果，并通过HTTP API方便外部系统集成。
			
 
				-
			
 
				-通过模块化设计和清晰的接口定义，系统具有良好的可维护性和可扩展性。配置驱动的设计使得系统可以轻松适应不同的环境和需求。
			
 
				-
			
 
				-该系统可以广泛应用于智能问答、图像搜索、内容推荐等场景，为智能应用提供强大的多模态信息检索能力。
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,64 @@
 
				+# 主应用入口，整合多个 FastAPI 应用
			
 
				+import uvicorn
			
 
				+from fastapi import FastAPI
			
 
				+from contextlib import asynccontextmanager
			
 
				+
			
 
				+# 导入所有子应用
			
 
				+from api.search_infinity import app as search_app
			
 
				+
			
 
				+# 定义主应用的生命周期管理
			
 
				+@asynccontextmanager
			
 
				+async def main_lifespan(app: FastAPI):
			
 
				+    """主应用生命周期管理"""
			
 
				+    from utils.infinity import get_client, close_client
			
 
				+    print("=== Infinity API Gateway 启动 ===")
			
 
				+    # 1. 初始化全局客户端（在服务启动时）
			
 
				+    get_client(database="book_image_db")
			
 
				+    print("✅ Infinity客户端已初始化")
			
 
				+    yield
			
 
				+
			
 
				+    print("=== Infinity API Gateway 关闭 ===")
			
 
				+     # 2. 关闭全局客户端（在服务关闭时）
			
 
				+    close_client()
			
 
				+    print("✅ Infinity客户端已关闭")
			
 
				+
			
 
				+# 创建主应用
			
 
				+main_app = FastAPI(
			
 
				+    title="Infinity API Gateway",
			
 
				+    description="整合多个 FastAPI 应用的 API 网关",
			
 
				+    version="1.0.0",
			
 
				+    lifespan=main_lifespan
			
 
				+)
			
 
				+
			
 
				+# 挂载子应用
			
 
				+# 1. 搜索 API - 访问路径: /search/*
			
 
				+main_app.mount("/search", search_app, name="search_api")
			
 
				+
			
 
				+# 主应用根路径
			
 
				+@main_app.get("/")
			
 
				+async def root():
			
 
				+    """API 网关根路径"""
			
 
				+    return {
			
 
				+        "message": "Welcome to GRAPH_RAG API Gateway",
			
 
				+        "available_apps": {
			
 
				+            "search_api": "访问路径: /search, 文档: /search/docs",
			
 
				+            "hybrid_http_api": "访问路径: /hybrid, 文档: /hybrid/docs"
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+# 健康检查端点
			
 
				+@main_app.get("/health")
			
 
				+async def health_check():
			
 
				+    """主应用健康检查"""
			
 
				+    return {"status": "healthy", "service": "Infinity API Gateway"}
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    """启动主应用"""
			
 
				+    uvicorn.run(
			
 
				+        "main:main_app",  # 应用路径: 模块名:应用实例名
			
 
				+        host="0.0.0.0",   # 允许所有IP访问
			
 
				+        port=18001,         # 服务端口
			
 
				+        reload=False,       # 开发模式下自动重载
			
 
				+        workers=1,         # 生产环境可根据需要增加
			
 
				+        log_level="info"   # 日志级别
			
 
				+    )
			
--- a/model/__pycache__/multimodal_embedding.cpython-312.pyc
+++ b/model/__pycache__/multimodal_embedding.cpython-312.pyc
--- a/model/multimodal_embedding.py
+++ b/model/multimodal_embedding.py
@@ -5,7 +5,7 @@ import io
 
				 from langchain_openai import OpenAIEmbeddings
			
 
				 from dashscope import MultiModalEmbedding
			
 
				 from conf.config import ModelConfig
			
 
				-from utils.minio.image_util import ImageUtil
			
 
				+from utils.file.image_util import image_util as ImageUtil
			
 
				 
			
 
				 class Embedding:
			
 
				     """Embedding模型工具"""
			
@@ -137,4 +137,17 @@ class Embedding:
 
				             else:
			
 
				                 raise Exception(f"Error: {response.message}")   
			
 
				         except Exception as e:
			
 
				-            raise Exception(f"多模态embedding生成失败: {str(e)}")
			
 
				+            raise Exception(f"多模态embedding生成失败: {str(e)}")
			
 
				+
			
 
				+# 全局单例
			
 
				+def get_embedding_model() -> Embedding:
			
 
				+    """
			
 
				+    获取全局单例的Embedding模型
			
 
				+    
			
 
				+    Returns:
			
 
				+        Embedding: 全局单例的Embedding模型
			
 
				+    """
			
 
				+    return Embedding(
			
 
				+        model_name=ModelConfig.get_multimodal_embedding_model_name(),
			
 
				+        api_key=ModelConfig.get_dashscope_api_key()
			
 
				+    )
			
--- a/test/check_infinity_sdk.py
+++ b/test/check_infinity_sdk.py
@@ -1,43 +0,0 @@
 
				-import infinity
			
 
				-
			
 
				-print("=== 检查Infinity SDK结构 ===")
			
 
				-
			
 
				-# 检查infinity模块的内容
			
 
				-print("\n1. Infinity模块内容:")
			
 
				-print(dir(infinity))
			
 
				-
			
 
				-# 检查infinity是否有common子模块
			
 
				-print("\n2. 检查infinity.common是否存在:")
			
 
				-try:
			
 
				-    import infinity.common
			
 
				-    print("✓ infinity.common存在")
			
 
				-    print("infinity.common内容:")
			
 
				-    print(dir(infinity.common))
			
 
				-except Exception as e:
			
 
				-    print(f"✗ infinity.common不存在: {e}")
			
 
				-
			
 
				-# 检查infinity是否有ConflictType
			
 
				-print("\n3. 检查infinity.ConflictType是否存在:")
			
 
				-print(f"hasattr(infinity, 'ConflictType'): {hasattr(infinity, 'ConflictType')}")
			
 
				-
			
 
				-# 检查infinity是否有其他相关属性
			
 
				-print("\n4. 检查infinity的其他属性:")
			
 
				-for attr in ['DataType', 'IndexType', 'MetricType', 'NetworkAddress', 'create_database']:
			
 
				-    print(f"hasattr(infinity, '{attr}'): {hasattr(infinity, attr)}")
			
 
				-
			
 
				-# 尝试查找ConflictType的正确位置
			
 
				-print("\n5. 尝试查找ConflictType:")
			
 
				-import pkgutil
			
 
				-import sys
			
 
				-
			
 
				-for _, module_name, _ in pkgutil.iter_modules(sys.modules['infinity'].__path__):
			
 
				-    full_module_name = f"infinity.{module_name}"
			
 
				-    try:
			
 
				-        module = __import__(full_module_name, fromlist=[''])
			
 
				-        print(f"\n检查模块: {full_module_name}")
			
 
				-        module_attrs = dir(module)
			
 
				-        print(f"属性: {module_attrs}")
			
 
				-        if 'ConflictType' in module_attrs:
			
 
				-            print(f"✓ 找到ConflictType在 {full_module_name}")
			
 
				-    except Exception as e:
			
 
				-        print(f"无法导入 {full_module_name}: {e}")
			
--- a/test/main.py
+++ b/test/main.py
@@ -1,172 +0,0 @@
 
				-import sys
			
 
				-import os
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-from workflow.workflow import PDFParsingWorkflow
			
 
				-import json
			
 
				-from typing import Dict, Any
			
 
				-from conf.config import ModelConfig
			
 
				-
			
 
				-os.environ["LANGSMITH_TRACING"] = "true"
			
 
				-os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
			
 
				-os.environ["LANGSMITH_API_KEY"] = "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"
			
 
				-os.environ["LANGSMITH_PROJECT"] = "ragflow_plugs"
			
 
				-
			
 
				-
			
 
				-
			
 
				-class PDFParsingService:
			
 
				-    """PDF扫描件拆分解析服务"""
			
 
				-    
			
 
				-    def __init__(self, model_name: str = None):
			
 
				-        """
			
 
				-        初始化PDF解析服务
			
 
				-        
			
 
				-        Args:
			
 
				-            model_name: QWEN VL模型名称，若为None则使用配置文件中的值
			
 
				-        """
			
 
				-        # 从配置文件获取默认模型名称
			
 
				-        default_model = ModelConfig.get_model_name()
			
 
				-        self.model_name = model_name or default_model
			
 
				-        self.workflow = PDFParsingWorkflow(model_name=self.model_name)
			
 
				-    
			
 
				-    def parse_pdf(self, pdf_path: str) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        解析PDF扫描件
			
 
				-        
			
 
				-        Args:
			
 
				-            pdf_path: PDF文件路径
			
 
				-            
			
 
				-        Returns:
			
 
				-            Dict: 解析结果，包含:
			
 
				-                - pdf_path: PDF文件路径
			
 
				-                - total_pages: 总页数
			
 
				-                - parsed_results: 每一页的解析结果
			
 
				-                - is_complete: 是否完成
			
 
				-        """
			
 
				-        # 运行工作流
			
 
				-        result = self.workflow.run(pdf_path, ModelConfig.get_dataset_id(), ModelConfig.get_ragflow_api_url(), ModelConfig.get_ragflow_api_key())
			
 
				-        
			
 
				-        # 整理输出结果
			
 
				-        output = {
			
 
				-            "pdf_path": result["pdf_path"],
			
 
				-            "total_pages": len(result["split_pages"]),
			
 
				-            "parsed_results": result["parsed_results"],
			
 
				-            "is_complete": result["is_complete"]
			
 
				-        }
			
 
				-        
			
 
				-        return output
			
 
				-    
			
 
				-    def parse_pdf_to_json(self, pdf_path: str, output_json_path: str = None) -> str:
			
 
				-        """
			
 
				-        解析PDF并输出为JSON格式
			
 
				-        
			
 
				-        Args:
			
 
				-            pdf_path: PDF文件路径
			
 
				-            output_json_path: 输出JSON文件路径，若为None则返回JSON字符串
			
 
				-            
			
 
				-        Returns:
			
 
				-            str: JSON字符串或输出文件路径
			
 
				-        """
			
 
				-        result = self.parse_pdf(pdf_path)
			
 
				-        json_str = json.dumps(result, ensure_ascii=False, indent=2)
			
 
				-        
			
 
				-        if output_json_path:
			
 
				-            with open(output_json_path, "w", encoding="utf-8") as f:
			
 
				-                f.write(json_str)
			
 
				-            return output_json_path
			
 
				-        
			
 
				-        return json_str
			
 
				-    
			
 
				-    def parse_pdf_to_markdown(self, pdf_path: str) -> str:
			
 
				-        """
			
 
				-        解析PDF并输出为Markdown格式，包含页码、描述和图片
			
 
				-        
			
 
				-        Args:
			
 
				-            pdf_path: PDF文件路径
			
 
				-            
			
 
				-        Returns:
			
 
				-            str: 输出Markdown文件路径
			
 
				-        """
			
 
				-        # 解析PDF
			
 
				-        result = self.parse_pdf(pdf_path)
			
 
				-        
			
 
				-        # 获取PDF文件名（不含扩展名）
			
 
				-        pdf_filename = os.path.splitext(os.path.basename(pdf_path))[0]
			
 
				-        
			
 
				-        # 输出目录
			
 
				-        output_dir = r"d:\project\work\ragflow_plugs\book\output"
			
 
				-        
			
 
				-        # 确保输出目录存在
			
 
				-        os.makedirs(output_dir, exist_ok=True)
			
 
				-        
			
 
				-        # Markdown文件名
			
 
				-        md_filename = f"{pdf_filename}.md"
			
 
				-        md_file_path = os.path.join(output_dir, md_filename)
			
 
				-        
			
 
				-        # 图片存储目录
			
 
				-        images_dir = os.path.join(output_dir, f"{pdf_filename}_images")
			
 
				-        os.makedirs(images_dir, exist_ok=True)
			
 
				-        
			
 
				-        # 构建Markdown内容
			
 
				-        md_content = f"# {pdf_filename} 解析结果\n\n"
			
 
				-        md_content += f"**总页数**: {result['total_pages']}\n"
			
 
				-        md_content += f"**模型**: {self.model_name}\n\n"
			
 
				-        md_content += "---\n\n"
			
 
				-        
			
 
				-        # 遍历所有解析结果
			
 
				-        for page_result in result['parsed_results']:
			
 
				-            page_number = page_result.get('page_number', 0)
			
 
				-            content = page_result.get('content', '')
			
 
				-            
			
 
				-            # 写入页码和描述
			
 
				-            md_content += f"## 第 {page_number} 页\n\n"
			
 
				-            md_content += f"### 描述\n{content}\n\n"
			
 
				-            
			
 
				-            # 从temp目录获取已保存的图片
			
 
				-            pdf_filename = os.path.splitext(os.path.basename(pdf_path))[0]
			
 
				-            temp_image_dir = r".\temp"
			
 
				-            temp_image_filename = f"{pdf_filename}_{page_number}.png"
			
 
				-            temp_image_path = os.path.join(temp_image_dir, temp_image_filename)
			
 
				-            
			
 
				-            # 检查图片是否存在
			
 
				-            if os.path.exists(temp_image_path):
			
 
				-                # 在Markdown中引用temp目录中的图片，使用正斜杠确保语法正确
			
 
				-                temp_image_url = temp_image_path.replace("\\", "/")
			
 
				-                md_content += f"### 图片\n"
			
 
				-                md_content += f"![第 {page_number} 页图片]({temp_image_url})\n\n"
			
 
				-                md_content += "---\n\n"
			
 
				-        
			
 
				-        # 写入Markdown文件
			
 
				-        with open(md_file_path, "w", encoding="utf-8") as f:
			
 
				-            f.write(md_content)
			
 
				-        
			
 
				-        return md_file_path
			
 
				-
			
 
				-def main():
			
 
				-    """主函数，示例用法"""
			
 
				-    # 示例：使用服务解析PDF
			
 
				-    # 1. 创建服务实例（使用配置文件中的默认模型）
			
 
				-    service = PDFParsingService()
			
 
				-    
			
 
				-    # 2. 或指定模型名称
			
 
				-    # service = PDFParsingService(model_name="qwen3-vl")
			
 
				-    
			
 
				-    # 3. 解析PDF文件
			
 
				-    pdf_path = r"D:\project\work\ragflow_plugs\book\出发！超级播种机.pdf"
			
 
				-    
			
 
				-    # 4. 保存为Markdown文件 
			
 
				-    md_output_path = service.parse_pdf_to_markdown(pdf_path)
			
 
				-    print(f"解析结果已保存到: {md_output_path}")
			
 
				-    
			
 
				-    # 5. 或直接获取结果
			
 
				-    # result = service.parse_pdf(pdf_path)
			
 
				-    # print(json.dumps(result, ensure_ascii=False, indent=2))
			
 
				-    
			
 
				-    # 6. 或保存为JSON文件
			
 
				-    # output_path = service.parse_pdf_to_json(pdf_path, "output.json")
			
 
				-    # print(f"解析结果已保存到: {output_path}")
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
--- a/test/test_compress_image_bytes.py
+++ b/test/test_compress_image_bytes.py
@@ -1,58 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-"""
			
 
				-测试图片压缩到字节流功能
			
 
				-"""
			
 
				-
			
 
				-import os
			
 
				-import sys
			
 
				-from io import BytesIO
			
 
				-from PIL import Image
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-from utils.minio.image_util import image_util
			
 
				-
			
 
				-# 生成一个大的测试图片
			
 
				-def generate_test_image(width=2000, height=2000, color=(255, 0, 0)):
			
 
				-    """
			
 
				-    生成一个大的测试图片
			
 
				-    """
			
 
				-    print(f"生成测试图片，大小: {width}x{height}")
			
 
				-    img = Image.new('RGB', (width, height), color=color)
			
 
				-    img_stream = BytesIO()
			
 
				-    img.save(img_stream, format='PNG')
			
 
				-    img_stream.seek(0)
			
 
				-    return img_stream
			
 
				-
			
 
				-# 测试图片压缩到字节流功能
			
 
				-def test_compress_image_bytes():
			
 
				-    """
			
 
				-    测试图片压缩到字节流功能
			
 
				-    """
			
 
				-    print("开始测试图片压缩到字节流功能...")
			
 
				-    
			
 
				-    # 生成测试图片
			
 
				-    img_stream = generate_test_image()
			
 
				-    
			
 
				-    # 将图片流转换为字节流
			
 
				-    img_bytes = img_stream.getvalue()
			
 
				-    print(f"原始图片字节大小: {len(img_bytes)} 字节")
			
 
				-    
			
 
				-    # 调用压缩到字节流方法
			
 
				-    compressed_bytes = image_util.compress_image_bytes(img_bytes, max_size_kb=5000)
			
 
				-    
			
 
				-    # 检查压缩后大小
			
 
				-    compressed_size = len(compressed_bytes) / 1024
			
 
				-    print(f"压缩后大小: {compressed_size:.2f}KB")
			
 
				-    
			
 
				-    # 验证压缩后大小
			
 
				-    assert compressed_size <= 5000, f"压缩后大小 {compressed_size:.2f}KB 超过了最大限制 5000KB"
			
 
				-    
			
 
				-    # 验证返回类型
			
 
				-    assert isinstance(compressed_bytes, bytes), f"返回类型应为bytes，实际为 {type(compressed_bytes)}"
			
 
				-    
			
 
				-    print("图片压缩到字节流测试成功!")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_compress_image_bytes()
			
--- a/test/test_es_conn.py
+++ b/test/test_es_conn.py
@@ -1,121 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-
			
 
				-import json
			
 
				-from services.utils.es_conn import ESConnection
			
 
				-
			
 
				-def test_es_connection():
			
 
				-    """
			
 
				-    测试 Elasticsearch 连接和基本功能
			
 
				-    """
			
 
				-    try:
			
 
				-        # 初始化连接
			
 
				-        print("正在初始化 Elasticsearch 连接...")
			
 
				-        es = ESConnection(hosts=["http://localhost:9200"])
			
 
				-        print("连接成功！")
			
 
				-        
			
 
				-        # 测试索引创建
			
 
				-        index_name = "test_ragflow_index"
			
 
				-        print(f"\n正在创建索引: {index_name}")
			
 
				-        success = es.create_index(index_name)
			
 
				-        if success:
			
 
				-            print(f"索引 {index_name} 创建成功！")
			
 
				-        else:
			
 
				-            print(f"索引 {index_name} 创建失败！")
			
 
				-            return False
			
 
				-        
			
 
				-        # 测试文档插入
			
 
				-        test_doc = {
			
 
				-            "title": "测试文档",
			
 
				-            "content": "这是一个用于测试 Elasticsearch 连接的文档",
			
 
				-            "content_tks": "这 是 一个 用于 测试 Elasticsearch 连接 的 文档",
			
 
				-            "vector_768_vec": [0.1] * 768,
			
 
				-            "created_at": "2024-01-01 00:00:00",
			
 
				-            "count_int": 10,
			
 
				-            "importance_flt": 0.8,
			
 
				-            "tags_kwd": ["测试", "elasticsearch"],
			
 
				-            "kb_id": "test_kb_123"
			
 
				-        }
			
 
				-        
			
 
				-        print("\n正在插入测试文档...")
			
 
				-        insert_success = es.insert(index_name, test_doc)
			
 
				-        if insert_success:
			
 
				-            print("文档插入成功！")
			
 
				-        else:
			
 
				-            print("文档插入失败！")
			
 
				-            return False
			
 
				-        
			
 
				-        # 测试批量插入
			
 
				-        test_docs = []
			
 
				-        for i in range(3):
			
 
				-            doc = {
			
 
				-                "title": f"批量测试文档 {i}",
			
 
				-                "content": f"这是第 {i} 个批量测试文档",
			
 
				-                "content_tks": f"这是 第 {i} 个 批量 测试 文档",
			
 
				-                "vector_768_vec": [0.1] * 768,
			
 
				-                "created_at": "2024-01-01 00:00:00",
			
 
				-                "count_int": i,
			
 
				-                "importance_flt": 0.5 + i * 0.1,
			
 
				-                "tags_kwd": ["批量", "测试"],
			
 
				-                "kb_id": "test_kb_123"
			
 
				-            }
			
 
				-            test_docs.append(doc)
			
 
				-        
			
 
				-        print("\n正在批量插入测试文档...")
			
 
				-        bulk_result = es.bulk_insert(index_name, test_docs)
			
 
				-        print(f"批量插入结果: {bulk_result}")
			
 
				-        
			
 
				-        # 测试全文检索
			
 
				-        print("\n正在测试全文检索...")
			
 
				-        text_query = {
			
 
				-            "match": {
			
 
				-                "content": "测试"
			
 
				-            }
			
 
				-        }
			
 
				-        text_result = es.search(index_name, text_query, size=5)
			
 
				-        print(f"全文检索结果: {text_result['hits']['total']} 个命中")
			
 
				-        
			
 
				-        # 测试向量检索
			
 
				-        print("\n正在测试向量检索...")
			
 
				-        vector = [0.1] * 768
			
 
				-        vector_result = es.knn_search(
			
 
				-            index_name=index_name,
			
 
				-            vector_field="vector_768_vec",
			
 
				-            vector=vector,
			
 
				-            k=3
			
 
				-        )
			
 
				-        print(f"向量检索结果: {vector_result['hits']['total']} 个命中")
			
 
				-        
			
 
				-        # 测试混合检索
			
 
				-        print("\n正在测试混合检索...")
			
 
				-        hybrid_result = es.hybrid_search(
			
 
				-            index_name=index_name,
			
 
				-            text_query="测试",
			
 
				-            vector_field="vector_768_vec",
			
 
				-            vector=vector,
			
 
				-            size=5
			
 
				-        )
			
 
				-        print(f"混合检索结果: {hybrid_result['hits']['total']} 个命中")
			
 
				-        
			
 
				-        # 打印命中的文档
			
 
				-        print("\n混合检索命中的文档:")
			
 
				-        for hit in hybrid_result['hits']['hits']:
			
 
				-            doc = hit['_source']
			
 
				-            print(f"  - 标题: {doc['title']}, 相似度分数: {hit['_score']:.4f}")
			
 
				-        
			
 
				-        # 测试文档删除
			
 
				-        print(f"\n正在删除索引: {index_name}")
			
 
				-        es.es.indices.delete(index=index_name, ignore=[400, 404])
			
 
				-        print(f"索引 {index_name} 删除成功！")
			
 
				-        
			
 
				-        # 关闭连接
			
 
				-        es.close()
			
 
				-        print("\n所有测试完成！")
			
 
				-        return True
			
 
				-        
			
 
				-    except Exception as e:
			
 
				-        print(f"测试失败: {e}")
			
 
				-        return False
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_es_connection()
			
--- a/test/test_fastapi_hybrid_search.py
+++ b/test/test_fastapi_hybrid_search.py
@@ -1,74 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-"""
			
 
				-测试混合检索FastAPI服务
			
 
				-"""
			
 
				-
			
 
				-import requests
			
 
				-import json
			
 
				-
			
 
				-# 测试数据
			
 
				-test_data = {
			
 
				-    "text_query": "测试",
			
 
				-    "image": "https://example.com/image.jpg",
			
 
				-    "topn": 2
			
 
				-}
			
 
				-
			
 
				-# 发送POST请求
			
 
				-def test_hybrid_search():
			
 
				-    url = "http://localhost:18001/hybrid_search"
			
 
				-    headers = {
			
 
				-        "Content-Type": "application/json"
			
 
				-    }
			
 
				-    
			
 
				-    print("开始测试混合检索FastAPI服务...")
			
 
				-    print(f"请求URL: {url}")
			
 
				-    print(f"请求数据: {json.dumps(test_data, indent=2, ensure_ascii=False)}")
			
 
				-    
			
 
				-    try:
			
 
				-        # 发送POST请求
			
 
				-        response = requests.post(url, headers=headers, json=test_data, timeout=10)
			
 
				-        
			
 
				-        # 打印响应结果
			
 
				-        print(f"\n响应状态码: {response.status_code}")
			
 
				-        print(f"响应头: {dict(response.headers)}")
			
 
				-        print(f"响应内容: {response.text}")
			
 
				-        
			
 
				-        if response.status_code == 200:
			
 
				-            # 解析JSON响应
			
 
				-            response_data = response.json()
			
 
				-            print(f"\n解析后的响应数据: {json.dumps(response_data, indent=2, ensure_ascii=False)}")
			
 
				-            print("测试成功!")
			
 
				-        else:
			
 
				-            print(f"\n测试失败，状态码: {response.status_code}")
			
 
				-    except Exception as e:
			
 
				-        print(f"\n测试失败，请求异常: {str(e)}")
			
 
				-
			
 
				-# 测试健康检查接口
			
 
				-def test_health_check():
			
 
				-    url = "http://localhost:18001/health"
			
 
				-    
			
 
				-    print("\n开始测试健康检查接口...")
			
 
				-    print(f"请求URL: {url}")
			
 
				-    
			
 
				-    try:
			
 
				-        # 发送GET请求
			
 
				-        response = requests.get(url, timeout=5)
			
 
				-        
			
 
				-        # 打印响应结果
			
 
				-        print(f"\n响应状态码: {response.status_code}")
			
 
				-        print(f"响应头: {dict(response.headers)}")
			
 
				-        print(f"响应内容: {response.text}")
			
 
				-        
			
 
				-        if response.status_code == 200:
			
 
				-            # 解析JSON响应
			
 
				-            response_data = response.json()
			
 
				-            print(f"\n解析后的响应数据: {json.dumps(response_data, indent=2, ensure_ascii=False)}")
			
 
				-            print("健康检查测试成功!")
			
 
				-        else:
			
 
				-            print(f"\n健康检查测试失败，状态码: {response.status_code}")
			
 
				-    except Exception as e:
			
 
				-        print(f"\n健康检查测试失败，请求异常: {str(e)}")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_hybrid_search()
			
 
				-    test_health_check()
			
--- a/test/test_file_upload.py
+++ b/test/test_file_upload.py
@@ -1,82 +0,0 @@
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				-
			
 
				-def test_file_upload():
			
 
				-    """测试文件上传功能"""
			
 
				-    try:
			
 
				-        from services.utils.http_client import HTTPClient
			
 
				-        
			
 
				-        # 创建HTTP客户端实例
			
 
				-        http_client = HTTPClient(
			
 
				-            base_url="http://localhost:8000",  # 替换为实际的API URL
			
 
				-            api_key="your_api_key"  # 替换为实际的API密钥
			
 
				-        )
			
 
				-        
			
 
				-        # 测试文件路径
			
 
				-        test_file_path = r"D:\project\work\ragflow_plugs\book\不一样的卡梅拉1-我想去看海.pdf"
			
 
				-        
			
 
				-        # 打开文件并构建files字典
			
 
				-        with open(test_file_path, 'rb') as f:
			
 
				-            files = {'file': (os.path.basename(test_file_path), f)}
			
 
				-            
			
 
				-            print(f"测试文件上传: {test_file_path}")
			
 
				-            print(f"文件字典: {files}")
			
 
				-            
			
 
				-            # 发送POST请求，测试文件上传
			
 
				-            response = http_client.post(
			
 
				-                "/api/v1/test/upload",  # 替换为实际的上传端点
			
 
				-                files=files
			
 
				-            )
			
 
				-            
			
 
				-            print(f"上传响应: {response}")
			
 
				-            print("✓ 文件上传测试通过")
			
 
				-            return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ 文件上传测试失败: {str(e)}")
			
 
				-        import traceback
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-
			
 
				-def test_post_without_files():
			
 
				-    """测试不带文件的POST请求"""
			
 
				-    try:
			
 
				-        from services.utils.http_client import HTTPClient
			
 
				-        
			
 
				-        # 创建HTTP客户端实例
			
 
				-        http_client = HTTPClient(
			
 
				-            base_url="http://localhost:8000",  # 替换为实际的API URL
			
 
				-            api_key="your_api_key"  # 替换为实际的API密钥
			
 
				-        )
			
 
				-        
			
 
				-        # 发送普通POST请求
			
 
				-        response = http_client.post(
			
 
				-            "/api/v1/test/post",  # 替换为实际的POST端点
			
 
				-            json={"key": "value"}
			
 
				-        )
			
 
				-        
			
 
				-        print(f"普通POST响应: {response}")
			
 
				-        print("✓ 普通POST请求测试通过")
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ 普通POST请求测试失败: {str(e)}")
			
 
				-        import traceback
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-
			
 
				-def main():
			
 
				-    """主测试函数"""
			
 
				-    print("=== 测试文件上传修复 ===")
			
 
				-    
			
 
				-    # 测试文件上传
			
 
				-    test_file_upload()
			
 
				-    
			
 
				-    # 测试普通POST请求
			
 
				-    test_post_without_files()
			
 
				-    
			
 
				-    print("\n=== 测试完成 ===")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
--- a/test/test_full_service.py
+++ b/test/test_full_service.py
@@ -1,85 +0,0 @@
 
				-"""完整PDF解析服务测试脚本"""
			
 
				-
			
 
				-import sys
			
 
				-import os
			
 
				-import json
			
 
				-from pathlib import Path
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				-
			
 
				-from services.pdf_parser.main import PDFParsingService
			
 
				-
			
 
				-def test_full_service():
			
 
				-    """测试完整的PDF解析服务"""
			
 
				-    print("=" * 50)
			
 
				-    print("完整PDF解析服务测试")
			
 
				-    print("=" * 50)
			
 
				-    
			
 
				-    # 检查PDF文件
			
 
				-    pdf_path = r"D:\project\work\ragflow_plugs\book\不一样的卡梅拉1-我想去看海.pdf"
			
 
				-    pdf = Path(pdf_path)
			
 
				-    if not pdf.exists():
			
 
				-        print(f"✗ PDF文件不存在: {pdf_path}")
			
 
				-        print("请确保PDF文件存在，或修改脚本中的pdf_path变量")
			
 
				-        return False
			
 
				-    
			
 
				-    print(f"✓ PDF文件存在: {pdf_path}")
			
 
				-    print(f"  大小: {pdf.stat().st_size} 字节")
			
 
				-    print()
			
 
				-    
			
 
				-    try:
			
 
				-        # 创建服务实例
			
 
				-        print("创建PDF解析服务实例...")
			
 
				-        service = PDFParsingService(model_name="gpt-4o")
			
 
				-        print("✓ 服务实例创建成功")
			
 
				-        print(f"  使用模型: {service.model_name}")
			
 
				-        print()
			
 
				-        
			
 
				-        # 测试PDF拆分功能
			
 
				-        print("测试PDF拆分功能...")
			
 
				-        from services.pdf_parser.pdf_splitter import PDFSplitter
			
 
				-        splitter = PDFSplitter()
			
 
				-        pages = splitter.split_pdf(pdf_path)
			
 
				-        print(f"✓ PDF拆分成功，共 {len(pages)} 页")
			
 
				-        print(f"  第1页图像大小: {pages[0]['image'].size}")
			
 
				-        print()
			
 
				-        
			
 
				-        # 注意：完整解析需要模型API密钥，这里只测试到拆分阶段
			
 
				-        print("注意：完整解析需要配置模型API密钥")
			
 
				-        print("当前只测试了PDF拆分功能，模型解析需要配置API_KEY")
			
 
				-        print()
			
 
				-        
			
 
				-        print("=" * 50)
			
 
				-        print("测试完成！")
			
 
				-        print("✓ PDF拆分功能正常工作")
			
 
				-        print("✓ 服务实例创建成功")
			
 
				-        print("✓ 依赖配置正确")
			
 
				-        print("=" * 50)
			
 
				-        
			
 
				-        # 输出使用说明
			
 
				-        print("\n使用说明：")
			
 
				-        print("1. 配置.env文件：")
			
 
				-        print("   - API_KEY=your-api-key")
			
 
				-        print("   - BASE_URL=https://api.openai.com/v1")
			
 
				-        print("   - MODEL_NAME=qwen3-vl")
			
 
				-        print("   - MODEL_PROVIDER=openai")
			
 
				-        print("2. 运行解析：")
			
 
				-        print("   service = PDFParsingService()")
			
 
				-        print("   result = service.parse_pdf('your_pdf_file.pdf')")
			
 
				-        
			
 
				-        return True
			
 
				-        
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ 测试失败: {str(e)}")
			
 
				-        print("可能的解决方案:")
			
 
				-        print("1. 确保已安装所有依赖: pip install -r requirements.txt")
			
 
				-        print("2. 检查PDF文件是否损坏")
			
 
				-        print("3. 检查PyMuPDF版本是否兼容")
			
 
				-        return False
			
 
				-
			
 
				-def main():
			
 
				-    """主函数"""
			
 
				-    test_full_service()
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
--- a/test/test_http_hybrid_search.py
+++ b/test/test_http_hybrid_search.py
@@ -1,61 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-"""
			
 
				-测试混合检索HTTP服务
			
 
				-"""
			
 
				-
			
 
				-import requests
			
 
				-import json
			
 
				-
			
 
				-# 测试数据 - JSON-RPC 2.0格式
			
 
				-test_data = {
			
 
				-    "jsonrpc": "2.0",
			
 
				-    "method": "hybrid_search",
			
 
				-    "params": {
			
 
				-        "text_query": "测试",
			
 
				-        "image": "https://example.com/image.jpg",
			
 
				-        "topn": 2
			
 
				-    },
			
 
				-    "id": "test-123"
			
 
				-}
			
 
				-
			
 
				-# 发送POST请求
			
 
				-def test_hybrid_search():
			
 
				-    headers = {
			
 
				-        "Content-Type": "application/json",
			
 
				-        "Accept": "application/json, text/event-stream"
			
 
				-    }
			
 
				-    
			
 
				-    # 尝试不同的URL路径
			
 
				-    test_urls = [
			
 
				-        "http://localhost:18000",
			
 
				-        "http://localhost:18000/mcp",
			
 
				-        "http://localhost:18000/tools/hybrid_search",
			
 
				-        "http://localhost:18000/api/hybrid_search"
			
 
				-    ]
			
 
				-    
			
 
				-    print("开始测试混合检索HTTP服务...")
			
 
				-    print(f"请求数据: {json.dumps(test_data, indent=2, ensure_ascii=False)}")
			
 
				-    
			
 
				-    for url in test_urls:
			
 
				-        print(f"\n尝试URL: {url}")
			
 
				-        try:
			
 
				-            # 发送POST请求
			
 
				-            response = requests.post(url, headers=headers, json=test_data, timeout=10)
			
 
				-            
			
 
				-            # 打印响应结果
			
 
				-            print(f"响应状态码: {response.status_code}")
			
 
				-            print(f"响应内容: {response.text[:200]}...")
			
 
				-            
			
 
				-            if response.status_code == 200:
			
 
				-                # 解析JSON响应
			
 
				-                response_data = response.json()
			
 
				-                print(f"解析后的响应数据: {json.dumps(response_data, indent=2, ensure_ascii=False)}")
			
 
				-                print("测试成功!")
			
 
				-                return
			
 
				-        except Exception as e:
			
 
				-            print(f"请求失败: {str(e)}")
			
 
				-    
			
 
				-    print("所有URL路径都测试失败")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_hybrid_search()
			
--- a/test/test_image_compression.py
+++ b/test/test_image_compression.py
@@ -1,58 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-"""
			
 
				-测试图片压缩功能
			
 
				-"""
			
 
				-
			
 
				-import os
			
 
				-import sys
			
 
				-from io import BytesIO
			
 
				-from PIL import Image
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-from utils.minio.image_util import image_util
			
 
				-
			
 
				-# 生成一个大的测试图片
			
 
				-def generate_test_image(width=2000, height=2000, color=(255, 0, 0)):
			
 
				-    """
			
 
				-    生成一个大的测试图片
			
 
				-    """
			
 
				-    print(f"生成测试图片，大小: {width}x{height}")
			
 
				-    img = Image.new('RGB', (width, height), color=color)
			
 
				-    img_stream = BytesIO()
			
 
				-    img.save(img_stream, format='PNG')
			
 
				-    img_stream.seek(0)
			
 
				-    return img_stream
			
 
				-
			
 
				-# 测试图片压缩功能
			
 
				-def test_image_compression():
			
 
				-    """
			
 
				-    测试图片压缩功能
			
 
				-    """
			
 
				-    print("开始测试图片压缩功能...")
			
 
				-    
			
 
				-    # 生成测试图片
			
 
				-    img_stream = generate_test_image()
			
 
				-    
			
 
				-    # 检查压缩前大小
			
 
				-    img_stream.seek(0, 2)
			
 
				-    original_size = img_stream.tell() / 1024
			
 
				-    img_stream.seek(0)
			
 
				-    print(f"压缩前大小: {original_size:.2f}KB")
			
 
				-    
			
 
				-    # 调用压缩方法
			
 
				-    compressed_stream = image_util._compress_image(img_stream, "test_image.png", max_size_kb=5000)
			
 
				-    
			
 
				-    # 检查压缩后大小
			
 
				-    compressed_stream.seek(0, 2)
			
 
				-    compressed_size = compressed_stream.tell() / 1024
			
 
				-    compressed_stream.seek(0)
			
 
				-    print(f"压缩后大小: {compressed_size:.2f}KB")
			
 
				-    
			
 
				-    # 验证压缩后大小
			
 
				-    assert compressed_size <= 5000, f"压缩后大小 {compressed_size:.2f}KB 超过了最大限制 5000KB"
			
 
				-    print("图片压缩测试成功!")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_image_compression()
			
--- a/test/test_image_compression_bytes.py
+++ b/test/test_image_compression_bytes.py
@@ -1,60 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-"""
			
 
				-测试图片压缩到字节流功能
			
 
				-"""
			
 
				-
			
 
				-import os
			
 
				-import sys
			
 
				-from io import BytesIO
			
 
				-from PIL import Image
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-from utils.minio.image_util import image_util
			
 
				-
			
 
				-# 生成一个大的测试图片
			
 
				-def generate_test_image(width=2000, height=2000, color=(255, 0, 0)):
			
 
				-    """
			
 
				-    生成一个大的测试图片
			
 
				-    """
			
 
				-    print(f"生成测试图片，大小: {width}x{height}")
			
 
				-    img = Image.new('RGB', (width, height), color=color)
			
 
				-    img_stream = BytesIO()
			
 
				-    img.save(img_stream, format='PNG')
			
 
				-    img_stream.seek(0)
			
 
				-    return img_stream
			
 
				-
			
 
				-# 测试图片压缩到字节流功能
			
 
				-def test_image_compression_to_bytes():
			
 
				-    """
			
 
				-    测试图片压缩到字节流功能
			
 
				-    """
			
 
				-    print("开始测试图片压缩到字节流功能...")
			
 
				-    
			
 
				-    # 生成测试图片
			
 
				-    img_stream = generate_test_image()
			
 
				-    
			
 
				-    # 检查压缩前大小
			
 
				-    img_stream.seek(0, 2)
			
 
				-    original_size = img_stream.tell() / 1024
			
 
				-    img_stream.seek(0)
			
 
				-    print(f"压缩前大小: {original_size:.2f}KB")
			
 
				-    
			
 
				-    # 调用压缩到字节流方法
			
 
				-    compressed_bytes = image_util._compress_image_to_bytes(img_stream, "test_image.png", max_size_kb=5000)
			
 
				-    
			
 
				-    # 检查压缩后大小
			
 
				-    compressed_size = len(compressed_bytes) / 1024
			
 
				-    print(f"压缩后大小: {compressed_size:.2f}KB")
			
 
				-    
			
 
				-    # 验证压缩后大小
			
 
				-    assert compressed_size <= 5000, f"压缩后大小 {compressed_size:.2f}KB 超过了最大限制 5000KB"
			
 
				-    
			
 
				-    # 验证返回类型
			
 
				-    assert isinstance(compressed_bytes, bytes), f"返回类型应为bytes，实际为 {type(compressed_bytes)}"
			
 
				-    
			
 
				-    print("图片压缩到字节流测试成功!")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_image_compression_to_bytes()
			
--- a/test/test_image_compression_fix.py
+++ b/test/test_image_compression_fix.py
@@ -1,100 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-测试图片压缩修复
			
 
				-验证修改后的压缩方法是否能成功将图片压缩到5000KB以内
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-from io import BytesIO
			
 
				-from PIL import Image
			
 
				-import random
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-from utils.minio.image_util import ImageUtil
			
 
				-
			
 
				-def create_large_test_image(width=3000, height=3000) -> BytesIO:
			
 
				-    """
			
 
				-    创建一个大尺寸测试图片
			
 
				-    
			
 
				-    Args:
			
 
				-        width: 图片宽度
			
 
				-        height: 图片高度
			
 
				-        
			
 
				-    Returns:
			
 
				-        BytesIO: 大尺寸图片流
			
 
				-    """
			
 
				-    print(f"创建 {width}x{height} 的测试图片...")
			
 
				-    
			
 
				-    # 创建一个大尺寸图片，使用随机颜色填充
			
 
				-    img = Image.new('RGB', (width, height), color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)))
			
 
				-    
			
 
				-    # 将图片保存到BytesIO流
			
 
				-    img_stream = BytesIO()
			
 
				-    img.save(img_stream, format='PNG')
			
 
				-    img_stream.seek(0)
			
 
				-    
			
 
				-    # 检查图片大小
			
 
				-    img_stream.seek(0, 2)
			
 
				-    size_kb = img_stream.tell() / 1024
			
 
				-    img_stream.seek(0)
			
 
				-    
			
 
				-    print(f"测试图片创建完成，大小为 {size_kb:.2f}KB")
			
 
				-    return img_stream
			
 
				-
			
 
				-def test_image_compression():
			
 
				-    """
			
 
				-    测试图片压缩方法
			
 
				-    """
			
 
				-    print("开始测试图片压缩方法...")
			
 
				-    
			
 
				-    # 创建ImageUtil实例
			
 
				-    image_util = ImageUtil()
			
 
				-    
			
 
				-    # 测试不同尺寸的图片压缩
			
 
				-    test_sizes = [
			
 
				-        (3000, 3000),   # 约 25MB
			
 
				-        (4000, 4000),   # 约 45MB
			
 
				-        (5000, 5000)    # 约 70MB
			
 
				-    ]
			
 
				-    
			
 
				-    for width, height in test_sizes:
			
 
				-        print(f"\n=== 测试 {width}x{height} 图片压缩 ===")
			
 
				-        
			
 
				-        # 创建大尺寸测试图片
			
 
				-        img_stream = create_large_test_image(width, height)
			
 
				-        
			
 
				-        # 调用压缩方法
			
 
				-        compressed_stream = image_util._compress_image(img_stream, "test_large_image.png")
			
 
				-        
			
 
				-        # 检查压缩后的大小
			
 
				-        compressed_stream.seek(0, 2)
			
 
				-        compressed_size_kb = compressed_stream.tell() / 1024
			
 
				-        compressed_stream.seek(0)
			
 
				-        
			
 
				-        print(f"压缩后大小: {compressed_size_kb:.2f}KB")
			
 
				-        
			
 
				-        # 验证压缩结果
			
 
				-        if compressed_size_kb <= 5000:
			
 
				-            print("✅ 压缩成功！压缩后大小小于等于5000KB")
			
 
				-        else:
			
 
				-            print("❌ 压缩失败！压缩后大小仍大于5000KB")
			
 
				-    
			
 
				-    # 测试_compress_image_to_bytes方法
			
 
				-    print(f"\n=== 测试 _compress_image_to_bytes 方法 ===")
			
 
				-    img_stream = create_large_test_image(4000, 4000)
			
 
				-    compressed_bytes = image_util._compress_image_to_bytes(img_stream)
			
 
				-    compressed_size_kb = len(compressed_bytes) / 1024
			
 
				-    print(f"压缩后字节大小: {compressed_size_kb:.2f}KB")
			
 
				-    
			
 
				-    if compressed_size_kb <= 5000:
			
 
				-        print("✅ _compress_image_to_bytes 压缩成功！")
			
 
				-    else:
			
 
				-        print("❌ _compress_image_to_bytes 压缩失败！")
			
 
				-    
			
 
				-    print("\n=== 所有测试完成 ===")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_image_compression()
			
--- a/test/test_image_compression_real.py
+++ b/test/test_image_compression_real.py
@@ -1,132 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-测试图片压缩修复 - 真实场景
			
 
				-使用更真实的大图片验证压缩方法
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-from io import BytesIO
			
 
				-from PIL import Image, ImageDraw, ImageFont
			
 
				-import random
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-from utils.minio.image_util import ImageUtil
			
 
				-
			
 
				-def create_complex_test_image(width=3000, height=3000) -> BytesIO:
			
 
				-    """
			
 
				-    创建一个复杂的大尺寸测试图片，包含多种元素以增加文件大小
			
 
				-    
			
 
				-    Args:
			
 
				-        width: 图片宽度
			
 
				-        height: 图片高度
			
 
				-        
			
 
				-    Returns:
			
 
				-        BytesIO: 复杂大尺寸图片流
			
 
				-    """
			
 
				-    print(f"创建 {width}x{height} 的复杂测试图片...")
			
 
				-    
			
 
				-    # 创建一个白色背景图片
			
 
				-    img = Image.new('RGB', (width, height), color=(255, 255, 255))
			
 
				-    draw = ImageDraw.Draw(img)
			
 
				-    
			
 
				-    # 添加大量随机形状和颜色，增加图片复杂度
			
 
				-    for _ in range(10000):
			
 
				-        # 随机位置
			
 
				-        x1 = random.randint(0, width)
			
 
				-        y1 = random.randint(0, height)
			
 
				-        x2 = x1 + random.randint(10, 100)
			
 
				-        y2 = y1 + random.randint(10, 100)
			
 
				-        
			
 
				-        # 随机颜色
			
 
				-        color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
			
 
				-        
			
 
				-        # 随机形状
			
 
				-        shape_type = random.choice(['rectangle', 'ellipse', 'line'])
			
 
				-        if shape_type == 'rectangle':
			
 
				-            draw.rectangle([x1, y1, x2, y2], fill=color)
			
 
				-        elif shape_type == 'ellipse':
			
 
				-            draw.ellipse([x1, y1, x2, y2], fill=color)
			
 
				-        else:
			
 
				-            draw.line([x1, y1, x2, y2], fill=color, width=random.randint(1, 5))
			
 
				-    
			
 
				-    # 添加一些随机文本
			
 
				-    try:
			
 
				-        # 尝试使用默认字体
			
 
				-        font = ImageFont.load_default()
			
 
				-        for _ in range(1000):
			
 
				-            text = f"Test {random.randint(1, 1000)}"
			
 
				-            x = random.randint(0, width - 100)
			
 
				-            y = random.randint(0, height - 20)
			
 
				-            draw.text((x, y), text, fill=(0, 0, 0), font=font)
			
 
				-    except Exception as e:
			
 
				-        print(f"添加文本失败: {e}")
			
 
				-    
			
 
				-    # 将图片保存到BytesIO流，使用JPEG格式以获得更大的文件大小
			
 
				-    img_stream = BytesIO()
			
 
				-    img.save(img_stream, format='JPEG', quality=100)  # 使用最高质量生成大文件
			
 
				-    img_stream.seek(0)
			
 
				-    
			
 
				-    # 检查图片大小
			
 
				-    img_stream.seek(0, 2)
			
 
				-    size_kb = img_stream.tell() / 1024
			
 
				-    img_stream.seek(0)
			
 
				-    
			
 
				-    print(f"测试图片创建完成，大小为 {size_kb:.2f}KB")
			
 
				-    return img_stream
			
 
				-
			
 
				-def test_image_compression():
			
 
				-    """
			
 
				-    测试图片压缩方法
			
 
				-    """
			
 
				-    print("开始测试图片压缩方法...")
			
 
				-    
			
 
				-    # 创建ImageUtil实例
			
 
				-    image_util = ImageUtil()
			
 
				-    
			
 
				-    # 测试不同尺寸的复杂图片压缩
			
 
				-    test_sizes = [
			
 
				-        (3000, 3000),   # 约 25MB
			
 
				-        (4000, 4000),   # 约 45MB
			
 
				-    ]
			
 
				-    
			
 
				-    for width, height in test_sizes:
			
 
				-        print(f"\n=== 测试 {width}x{height} 复杂图片压缩 ===")
			
 
				-        
			
 
				-        # 创建大尺寸测试图片
			
 
				-        img_stream = create_complex_test_image(width, height)
			
 
				-        
			
 
				-        # 调用压缩方法
			
 
				-        compressed_stream = image_util._compress_image(img_stream, "test_complex_image.jpg")
			
 
				-        
			
 
				-        # 检查压缩后的大小
			
 
				-        compressed_stream.seek(0, 2)
			
 
				-        compressed_size_kb = compressed_stream.tell() / 1024
			
 
				-        compressed_stream.seek(0)
			
 
				-        
			
 
				-        print(f"压缩后大小: {compressed_size_kb:.2f}KB")
			
 
				-        
			
 
				-        # 验证压缩结果
			
 
				-        if compressed_size_kb <= 5000:
			
 
				-            print("✅ 压缩成功！压缩后大小小于等于5000KB")
			
 
				-        else:
			
 
				-            print("❌ 压缩失败！压缩后大小仍大于5000KB")
			
 
				-    
			
 
				-    # 测试_compress_image_to_bytes方法
			
 
				-    print(f"\n=== 测试 _compress_image_to_bytes 方法 ===")
			
 
				-    img_stream = create_complex_test_image(4000, 4000)
			
 
				-    compressed_bytes = image_util._compress_image_to_bytes(img_stream)
			
 
				-    compressed_size_kb = len(compressed_bytes) / 1024
			
 
				-    print(f"压缩后字节大小: {compressed_size_kb:.2f}KB")
			
 
				-    
			
 
				-    if compressed_size_kb <= 5000:
			
 
				-        print("✅ _compress_image_to_bytes 压缩成功！")
			
 
				-    else:
			
 
				-        print("❌ _compress_image_to_bytes 压缩失败！")
			
 
				-    
			
 
				-    print("\n=== 所有测试完成 ===")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_image_compression()
			
--- a/test/test_infinity_encapsulation.py
+++ b/test/test_infinity_encapsulation.py
@@ -1,180 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-
			
 
				-"""
			
 
				-测试Infinity向量数据库封装
			
 
				-"""
			
 
				-
			
 
				-from services.utils.infinity_util import InfinityVectorDB
			
 
				-from conf.config import VectorDBConfig
			
 
				-
			
 
				-def test_infinity_connection():
			
 
				-    """
			
 
				-    测试Infinity连接
			
 
				-    """
			
 
				-    print("=== 测试Infinity连接 ===")
			
 
				-    
			
 
				-    try:
			
 
				-        # 初始化InfinityVectorDB
			
 
				-        infinity_db = InfinityVectorDB()
			
 
				-        print("✅ InfinityVectorDB初始化成功")
			
 
				-        
			
 
				-        # 测试索引创建
			
 
				-        index_name = "test_collection"
			
 
				-        print(f"\n测试创建索引: {index_name}")
			
 
				-        result = infinity_db.create_index(index_name)
			
 
				-        if result:
			
 
				-            print(f"✅ 索引 {index_name} 创建成功")
			
 
				-        else:
			
 
				-            print(f"❌ 索引 {index_name} 创建失败")
			
 
				-            return False
			
 
				-        
			
 
				-        # 测试索引存在检查
			
 
				-        print(f"\n测试检查索引存在: {index_name}")
			
 
				-        exists = infinity_db.index_exists(index_name)
			
 
				-        if exists:
			
 
				-            print(f"✅ 索引 {index_name} 存在")
			
 
				-        else:
			
 
				-            print(f"❌ 索引 {index_name} 不存在")
			
 
				-            return False
			
 
				-        
			
 
				-        # 测试插入文档
			
 
				-        print(f"\n测试插入文档")
			
 
				-        document = {
			
 
				-            "file_name": "test.pdf",
			
 
				-            "file_page_count": 10,
			
 
				-            "page_number": 1,
			
 
				-            "text": "这是一个测试文档",
			
 
				-            "image_path": "test.png",
			
 
				-            "sparse_vector": [],
			
 
				-            "dense_vector_1024": [0.1] * 1024,
			
 
				-            "dataset_id": "test_dataset",
			
 
				-            "document_id": "test_doc_id"
			
 
				-        }
			
 
				-        
			
 
				-        insert_result = infinity_db.insert_document(index_name, document)
			
 
				-        if insert_result:
			
 
				-            print(f"✅ 文档插入成功")
			
 
				-        else:
			
 
				-            print(f"❌ 文档插入失败")
			
 
				-            return False
			
 
				-        
			
 
				-        # 测试批量插入
			
 
				-        print(f"\n测试批量插入文档")
			
 
				-        documents = []
			
 
				-        for i in range(2, 5):
			
 
				-            doc = {
			
 
				-                "file_name": "test.pdf",
			
 
				-                "file_page_count": 10,
			
 
				-                "page_number": i,
			
 
				-                "text": f"这是第 {i} 页",
			
 
				-                "image_path": f"test_{i}.png",
			
 
				-                "sparse_vector": [],
			
 
				-                "dense_vector_1024": [0.1] * 1024,
			
 
				-                "dataset_id": "test_dataset",
			
 
				-                "document_id": "test_doc_id"
			
 
				-            }
			
 
				-            documents.append(doc)
			
 
				-        
			
 
				-        bulk_result = infinity_db.bulk_insert(index_name, documents)
			
 
				-        if bulk_result["success"] == len(documents):
			
 
				-            print(f"✅ 批量插入成功，共插入 {bulk_result['success']} 个文档")
			
 
				-        else:
			
 
				-            print(f"❌ 批量插入失败，成功 {bulk_result['success']} 个，失败 {bulk_result['failed']} 个")
			
 
				-            return False
			
 
				-        
			
 
				-        # 测试向量检索
			
 
				-        print(f"\n测试向量检索")
			
 
				-        vector = [0.1] * 1024
			
 
				-        search_result = infinity_db.vector_search(index_name, "dense_vector_1024", vector, size=5)
			
 
				-        if search_result["hits"]["total"] > 0:
			
 
				-            print(f"✅ 向量检索成功，找到 {search_result['hits']['total']} 个结果")
			
 
				-        else:
			
 
				-            print(f"❌ 向量检索失败，未找到结果")
			
 
				-        
			
 
				-        # 测试混合检索
			
 
				-        print(f"\n测试混合检索")
			
 
				-        hybrid_result = infinity_db.hybrid_search(
			
 
				-            index_name,
			
 
				-            text_query="测试",
			
 
				-            vector_field="dense_vector_1024",
			
 
				-            vector=vector,
			
 
				-            size=5
			
 
				-        )
			
 
				-        if hybrid_result["hits"]["total"] > 0:
			
 
				-            print(f"✅ 混合检索成功，找到 {hybrid_result['hits']['total']} 个结果")
			
 
				-        else:
			
 
				-            print(f"❌ 混合检索失败，未找到结果")
			
 
				-        
			
 
				-        # 测试删除索引
			
 
				-        print(f"\n测试删除索引: {index_name}")
			
 
				-        delete_result = infinity_db.delete_index(index_name)
			
 
				-        if delete_result:
			
 
				-            print(f"✅ 索引 {index_name} 删除成功")
			
 
				-        else:
			
 
				-            print(f"❌ 索引 {index_name} 删除失败")
			
 
				-            return False
			
 
				-        
			
 
				-        # 关闭连接
			
 
				-        infinity_db.close()
			
 
				-        print(f"\n✅ 成功关闭连接")
			
 
				-        
			
 
				-        return True
			
 
				-        
			
 
				-    except Exception as e:
			
 
				-        print(f"\n❌ 测试失败: {e}")
			
 
				-        import traceback
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-def test_vector_db_factory():
			
 
				-    """
			
 
				-    测试VectorDBFactory
			
 
				-    """
			
 
				-    print("\n=== 测试VectorDBFactory ===")
			
 
				-    
			
 
				-    try:
			
 
				-        from services.utils.vector_db import VectorDBFactory
			
 
				-        
			
 
				-        # 获取向量数据库实例
			
 
				-        vector_db = VectorDBFactory.get_vector_db()
			
 
				-        print(f"✅ 成功获取向量数据库实例: {type(vector_db).__name__}")
			
 
				-        
			
 
				-        # 测试创建索引
			
 
				-        index_name = "test_factory_collection"
			
 
				-        result = vector_db.create_index(index_name)
			
 
				-        if result:
			
 
				-            print(f"✅ 通过工厂创建索引 {index_name} 成功")
			
 
				-        else:
			
 
				-            print(f"❌ 通过工厂创建索引 {index_name} 失败")
			
 
				-        
			
 
				-        vector_db.close()
			
 
				-        print(f"✅ 成功关闭通过工厂获取的连接")
			
 
				-        
			
 
				-        return True
			
 
				-        
			
 
				-    except Exception as e:
			
 
				-        print(f"\n❌ 工厂测试失败: {e}")
			
 
				-        import traceback
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    print("开始测试Infinity向量数据库封装...")
			
 
				-    
			
 
				-    # 测试Infinity连接
			
 
				-    connection_result = test_infinity_connection()
			
 
				-    
			
 
				-    # 测试VectorDBFactory
			
 
				-    factory_result = test_vector_db_factory()
			
 
				-    
			
 
				-    # 总结
			
 
				-    print("\n=== 测试总结 ===")
			
 
				-    if connection_result and factory_result:
			
 
				-        print("✅ 所有测试通过！")
			
 
				-        exit(0)
			
 
				-    else:
			
 
				-        print("❌ 部分测试失败！")
			
 
				-        exit(1)
			
--- a/test/test_infinity_http.py
+++ b/test/test_infinity_http.py
@@ -1,56 +0,0 @@
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				-
			
 
				-def test_infinity_http_import():
			
 
				-    """测试Infinity HTTP实现导入"""
			
 
				-    try:
			
 
				-        from services.utils.infinity_util import InfinityVectorDB
			
 
				-        print("✓ InfinityVectorDB导入成功")
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ InfinityVectorDB导入失败: {str(e)}")
			
 
				-        import traceback
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-
			
 
				-def test_infinity_http_init():
			
 
				-    """测试Infinity HTTP实现初始化"""
			
 
				-    try:
			
 
				-        from services.utils.infinity_util import InfinityVectorDB
			
 
				-        from conf.config import VectorDBConfig
			
 
				-        
			
 
				-        # 打印配置信息，方便调试
			
 
				-        print(f"\nInfinity配置:")
			
 
				-        print(f"  Host: {VectorDBConfig.get_infinity_host()}")
			
 
				-        print(f"  Port: {VectorDBConfig.get_infinity_port()}")
			
 
				-        print(f"  User: {VectorDBConfig.get_infinity_user()}")
			
 
				-        print(f"  Password: {VectorDBConfig.get_infinity_password()}")
			
 
				-        print(f"  Database: {VectorDBConfig.get_infinity_database()}")
			
 
				-        
			
 
				-        # 尝试初始化，但不实际连接
			
 
				-        # 这里只检查初始化逻辑是否正确
			
 
				-        print("\n✓ InfinityVectorDB初始化逻辑检查通过")
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ InfinityVectorDB初始化失败: {str(e)}")
			
 
				-        import traceback
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-
			
 
				-def main():
			
 
				-    """主测试函数"""
			
 
				-    print("=== 测试Infinity HTTP实现 ===")
			
 
				-    
			
 
				-    # 测试导入
			
 
				-    test_infinity_http_import()
			
 
				-    
			
 
				-    # 测试初始化
			
 
				-    test_infinity_http_init()
			
 
				-    
			
 
				-    print("\n=== 测试完成 ===")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
--- a/test/test_infinity_sdk.py
+++ b/test/test_infinity_sdk.py
@@ -1,60 +0,0 @@
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				-
			
 
				-def test_infinity_import():
			
 
				-    """测试Infinity SDK导入"""
			
 
				-    try:
			
 
				-        from services.utils.infinity_util import InfinityVectorDB
			
 
				-        print("✓ InfinityVectorDB导入成功")
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ InfinityVectorDB导入失败: {str(e)}")
			
 
				-        return False
			
 
				-
			
 
				-def test_infinity_sdk_version():
			
 
				-    """测试Infinity SDK版本"""
			
 
				-    try:
			
 
				-        import infinity
			
 
				-        print(f"✓ Infinity SDK版本: {infinity.__version__}")
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ 无法获取Infinity SDK版本: {str(e)}")
			
 
				-        return False
			
 
				-
			
 
				-def test_infinity_api():
			
 
				-    """测试Infinity API可用性"""
			
 
				-    try:
			
 
				-        import infinity
			
 
				-        from infinity.common import ConflictType
			
 
				-        
			
 
				-        print(f"✓ infinity模块: {infinity}")
			
 
				-        print(f"✓ ConflictType: {ConflictType}")
			
 
				-        print(f"✓ NetworkAddress: {infinity.NetworkAddress}")
			
 
				-        print(f"✓ DataType: {infinity.DataType}")
			
 
				-        print(f"✓ IndexType: {infinity.IndexType}")
			
 
				-        print(f"✓ MetricType: {infinity.MetricType}")
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ 无法访问Infinity API: {str(e)}")
			
 
				-        return False
			
 
				-
			
 
				-def main():
			
 
				-    """主测试函数"""
			
 
				-    print("=== 测试Infinity SDK实现 ===")
			
 
				-    
			
 
				-    # 测试导入
			
 
				-    test_infinity_import()
			
 
				-    
			
 
				-    # 测试SDK版本
			
 
				-    test_infinity_sdk_version()
			
 
				-    
			
 
				-    # 测试API可用性
			
 
				-    test_infinity_api()
			
 
				-    
			
 
				-    print("\n=== 测试完成 ===")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
--- a/test/test_mcp.py
+++ b/test/test_mcp.py
@@ -1,6 +0,0 @@
 
				-from PIL import Image
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    image = Image.open("http://image.dawn-infinite.cn/file/1.png")
			
 
				-    # 打开一个网络图片转换为Image.Image
			
 
				-    
			
--- a/test/test_mcp_hybrid_search.py
+++ b/test/test_mcp_hybrid_search.py
@@ -1,72 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-"""
			
 
				-测试MCP服务的混合检索功能
			
 
				-"""
			
 
				-
			
 
				-import sys
			
 
				-import os
			
 
				-import json
			
 
				-import unittest
			
 
				-import requests
			
 
				-from typing import Dict, Any
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-class TestMCPHybridSearch(unittest.TestCase):
			
 
				-    """测试MCP服务的混合检索功能"""
			
 
				-    
			
 
				-    def setUp(self):
			
 
				-        """设置测试环境"""
			
 
				-        # MCP服务的基础URL
			
 
				-        self.base_url = "http://localhost:18000"
			
 
				-        
			
 
				-        # 测试数据
			
 
				-        # 测试图片
			
 
				-        self.test_image = r"D:\project\work\ragflow_plugs\book\output\temp\2.png"
			
 
				-        # 测试文本查询
			
 
				-        self.test_text_query = "卡梅拉"
			
 
				-
			
 
				-    
			
 
				-    def test_hybrid_search(self):
			
 
				-        """测试混合检索API"""
			
 
				-        print("测试混合检索API...")
			
 
				-        
			
 
				-        # 构建请求数据
			
 
				-        data = {
			
 
				-            "text_query": self.test_text_query,
			
 
				-            "image": self.test_image,
			
 
				-            "topn": 2
			
 
				-        }
			
 
				-        
			
 
				-        # 发送请求
			
 
				-        response = requests.post(
			
 
				-            f"{self.base_url}/tools/hybrid_search",
			
 
				-            json=data
			
 
				-        )
			
 
				-        
			
 
				-        # 验证响应
			
 
				-        self.assertEqual(response.status_code, 200, f"请求失败: {response.text}")
			
 
				-        result = response.json()
			
 
				-        self.assertTrue(result["success"], f"API调用失败: {result.get('message', '未知错误')}")
			
 
				-        self.assertIn("output", result, "响应中缺少hits字段")
			
 
				-        self.assertIn("total", result, "响应中缺少total字段")
			
 
				-        self.assertIsInstance(result["output"], list, "hits字段应该是一个列表")
			
 
				-        self.assertIsInstance(result["total"], int, "total字段应该是一个整数")
			
 
				-        
			
 
				-        print(f"✓ 混合检索API测试通过，命中数量: {result['total']}")
			
 
				-    
			
 
				- 
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    """运行测试"""
			
 
				-    print("开始测试MCP服务的混合检索功能...\n")
			
 
				-    
			
 
				-    # 创建测试套件
			
 
				-    suite = unittest.TestLoader().loadTestsFromTestCase(TestMCPHybridSearch)
			
 
				-    
			
 
				-    # 运行测试
			
 
				-    runner = unittest.TextTestRunner(verbosity=2)
			
 
				-    result = runner.run(suite)
			
 
				-    
			
 
				-    print(f"\n测试完成，共运行 {result.testsRun} 个测试，成功 {result.testsRun - len(result.failures) - len(result.errors)} 个，失败 {len(result.failures)} 个，错误 {len(result.errors)} 个")
			
--- a/test/test_mcp_simple.py
+++ b/test/test_mcp_simple.py
@@ -1,47 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-"""
			
 
				-简单测试MCP服务的API路径
			
 
				-"""
			
 
				-
			
 
				-import requests
			
 
				-
			
 
				-# MCP服务的基础URL
			
 
				-BASE_URL = "http://localhost:18000"
			
 
				-
			
 
				-# 测试不同的API路径格式
			
 
				-test_paths = [
			
 
				-    "/vectorize_store",
			
 
				-    "/tools/vectorize_store",
			
 
				-    "/mcp/tools/vectorize_store",
			
 
				-    "/api/vectorize_store",
			
 
				-    "/",
			
 
				-    "/docs",
			
 
				-    "/openapi.json"
			
 
				-]
			
 
				-
			
 
				-# 测试数据
			
 
				-test_data = {
			
 
				-    "dataset_id": "test_dataset_001",
			
 
				-    "book_name": "测试书籍",
			
 
				-    "document_id": "test_doc_001",
			
 
				-    "parsed_results": [
			
 
				-        {
			
 
				-            "page_number": 1,
			
 
				-            "content": "这是测试书籍的第1页内容",
			
 
				-            "image_url": "https://example.com/image1.jpg"
			
 
				-        }
			
 
				-    ]
			
 
				-}
			
 
				-
			
 
				-print("开始测试MCP服务的API路径...\n")
			
 
				-
			
 
				-for path in test_paths:
			
 
				-    url = f"{BASE_URL}{path}"
			
 
				-    print(f"测试路径: {url}")
			
 
				-    try:
			
 
				-        response = requests.post(url, json=test_data, timeout=5)
			
 
				-        print(f"状态码: {response.status_code}")
			
 
				-        print(f"响应内容: {response.text[:100]}...")
			
 
				-    except Exception as e:
			
 
				-        print(f"请求失败: {str(e)}")
			
 
				-    print("-" * 50)
			
--- a/test/test_multimodal_embedding.py
+++ b/test/test_multimodal_embedding.py
@@ -1,59 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-"""
			
 
				-测试MultimodalEmbedding类的修复
			
 
				-"""
			
 
				-
			
 
				-from services.model.multimodal_embedding import MultimodalEmbedding
			
 
				-
			
 
				-def test_multimodal_embedding_init():
			
 
				-    """测试MultimodalEmbedding实例化"""
			
 
				-    print("=== 测试MultimodalEmbedding实例化 ===")
			
 
				-    try:
			
 
				-        # 尝试实例化MultimodalEmbedding类
			
 
				-        embedding = MultimodalEmbedding()
			
 
				-        print("✓ MultimodalEmbedding实例化成功")
			
 
				-        print(f"  模型提供商: {embedding.model_provider}")
			
 
				-        print(f"  模型名称: {embedding.model_name}")
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ MultimodalEmbedding实例化失败: {str(e)}")
			
 
				-        return False
			
 
				-
			
 
				-def test_multimodal_embedding_methods():
			
 
				-    """测试MultimodalEmbedding方法"""
			
 
				-    print("\n=== 测试MultimodalEmbedding方法 ===")
			
 
				-    try:
			
 
				-        embedding = MultimodalEmbedding()
			
 
				-        
			
 
				-        # 测试方法是否存在
			
 
				-        methods_to_test = [
			
 
				-            'get_text_embedding',
			
 
				-            'get_texts_embedding',
			
 
				-            'get_image_embedding',
			
 
				-            'get_multimodal_embedding'
			
 
				-        ]
			
 
				-        
			
 
				-        for method_name in methods_to_test:
			
 
				-            if hasattr(embedding, method_name) and callable(getattr(embedding, method_name)):
			
 
				-                print(f"✓ 方法 {method_name} 存在且可调用")
			
 
				-            else:
			
 
				-                print(f"✗ 方法 {method_name} 不存在或不可调用")
			
 
				-                return False
			
 
				-        
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ 测试方法存在性失败: {str(e)}")
			
 
				-        return False
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    print("开始测试MultimodalEmbedding修复...")
			
 
				-    
			
 
				-    test1 = test_multimodal_embedding_init()
			
 
				-    test2 = test_multimodal_embedding_methods()
			
 
				-    
			
 
				-    if test1 and test2:
			
 
				-        print("\n🎉 所有测试通过！MultimodalEmbedding修复成功。")
			
 
				-        exit(0)
			
 
				-    else:
			
 
				-        print("\n❌ 测试失败！MultimodalEmbedding修复存在问题。")
			
 
				-        exit(1)
			
--- a/test/test_mysql_conn.py
+++ b/test/test_mysql_conn.py
@@ -1,126 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-MySQL连接工具类测试脚本
			
 
				-"""
			
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-from utils.mysql_conn import get_mysql_conn
			
 
				-
			
 
				-def test_mysql_conn():
			
 
				-    """
			
 
				-    测试MySQL连接工具类
			
 
				-    """
			
 
				-    print("测试MySQL连接工具类...")
			
 
				-    
			
 
				-    try:
			
 
				-        # 获取MySQL连接管理器实例
			
 
				-        mysql_conn = get_mysql_conn(
			
 
				-            host="localhost",
			
 
				-            port=3306,
			
 
				-            user="root",
			
 
				-            password="password",
			
 
				-            database="test_db",
			
 
				-            pool_size=3
			
 
				-        )
			
 
				-        
			
 
				-        print("✓ 成功获取MySQL连接管理器实例")
			
 
				-        
			
 
				-        # 测试创建表
			
 
				-        create_table_sql = """
			
 
				-        CREATE TABLE IF NOT EXISTS test_users (
			
 
				-            id INT AUTO_INCREMENT PRIMARY KEY,
			
 
				-            name VARCHAR(50) NOT NULL,
			
 
				-            email VARCHAR(100) NOT NULL UNIQUE,
			
 
				-            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
			
 
				-        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
			
 
				-        """
			
 
				-        
			
 
				-        result = mysql_conn.execute(create_table_sql)
			
 
				-        print("✓ 成功创建测试表")
			
 
				-        
			
 
				-        # 测试插入数据
			
 
				-        insert_sql = "INSERT INTO test_users (name, email) VALUES (%s, %s)"
			
 
				-        insert_params = ("测试用户", "test@example.com")
			
 
				-        
			
 
				-        row_count = mysql_conn.execute(insert_sql, insert_params)
			
 
				-        print(f"✓ 成功插入 {row_count} 条数据")
			
 
				-        
			
 
				-        # 测试查询数据
			
 
				-        select_sql = "SELECT * FROM test_users WHERE name = %s"
			
 
				-        select_params = ("测试用户",)
			
 
				-        
			
 
				-        user = mysql_conn.fetch_one(select_sql, select_params)
			
 
				-        if user:
			
 
				-            print(f"✓ 成功查询数据: {user}")
			
 
				-        else:
			
 
				-            print("✗ 查询数据失败")
			
 
				-        
			
 
				-        # 测试批量插入
			
 
				-        bulk_insert_sql = "INSERT INTO test_users (name, email) VALUES (%s, %s)"
			
 
				-        bulk_params = [
			
 
				-            ("批量用户1", "batch1@example.com"),
			
 
				-            ("批量用户2", "batch2@example.com"),
			
 
				-            ("批量用户3", "batch3@example.com")
			
 
				-        ]
			
 
				-        
			
 
				-        bulk_row_count = mysql_conn.bulk_insert(bulk_insert_sql, bulk_params)
			
 
				-        print(f"✓ 成功批量插入 {bulk_row_count} 条数据")
			
 
				-        
			
 
				-        # 测试查询所有数据
			
 
				-        select_all_sql = "SELECT * FROM test_users"
			
 
				-        all_users = mysql_conn.fetch_all(select_all_sql)
			
 
				-        print(f"✓ 成功查询所有数据，共 {len(all_users)} 条")
			
 
				-        
			
 
				-        # 测试更新数据
			
 
				-        update_sql = "UPDATE test_users SET name = %s WHERE id = %s"
			
 
				-        update_params = ("更新后的测试用户", user["id"])
			
 
				-        
			
 
				-        update_row_count = mysql_conn.execute(update_sql, update_params)
			
 
				-        print(f"✓ 成功更新 {update_row_count} 条数据")
			
 
				-        
			
 
				-        # 测试删除数据
			
 
				-        delete_sql = "DELETE FROM test_users WHERE id = %s"
			
 
				-        delete_params = (user["id"],)
			
 
				-        
			
 
				-        delete_row_count = mysql_conn.execute(delete_sql, delete_params)
			
 
				-        print(f"✓ 成功删除 {delete_row_count} 条数据")
			
 
				-        
			
 
				-        # 测试事务
			
 
				-        print("测试事务处理...")
			
 
				-        conn, cursor = mysql_conn.begin_transaction()
			
 
				-        try:
			
 
				-            # 在事务中执行多个操作
			
 
				-            cursor.execute("INSERT INTO test_users (name, email) VALUES (%s, %s)", ("事务用户1", "transaction1@example.com"))
			
 
				-            cursor.execute("INSERT INTO test_users (name, email) VALUES (%s, %s)", ("事务用户2", "transaction2@example.com"))
			
 
				-            mysql_conn.commit_transaction(conn, cursor)
			
 
				-            print("✓ 事务提交成功")
			
 
				-        except Exception as e:
			
 
				-            mysql_conn.rollback_transaction(conn, cursor)
			
 
				-            print(f"✗ 事务回滚: {e}")
			
 
				-        
			
 
				-        # 清理测试数据
			
 
				-        drop_table_sql = "DROP TABLE IF EXISTS test_users"
			
 
				-        mysql_conn.execute(drop_table_sql)
			
 
				-        print("✓ 成功清理测试表")
			
 
				-        
			
 
				-        # 关闭连接池
			
 
				-        mysql_conn.close()
			
 
				-        print("✓ 成功关闭连接池")
			
 
				-        
			
 
				-        print("\n🎉 所有测试通过！MySQL连接工具类工作正常。")
			
 
				-        
			
 
				-    except Exception as e:
			
 
				-        print(f"\n❌ 测试失败: {e}")
			
 
				-        import traceback
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-    
			
 
				-    return True
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_mysql_conn()
			
--- a/test/test_simple.py
+++ b/test/test_simple.py
@@ -1,34 +0,0 @@
 
				-"""简单测试脚本，直接测试PDF解析服务"""
			
 
				-
			
 
				-import sys
			
 
				-import os
			
 
				-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				-
			
 
				-from services.pdf_parser.main import PDFParsingService
			
 
				-
			
 
				-def test_simple():
			
 
				-    """简单测试函数"""
			
 
				-    print("开始简单测试PDF解析服务...")
			
 
				-    print("=" * 50)
			
 
				-    
			
 
				-    try:
			
 
				-        # 测试服务初始化
			
 
				-        service = PDFParsingService()
			
 
				-        print("✓ 服务初始化成功")
			
 
				-        
			
 
				-        # 测试核心功能
			
 
				-        print("✓ 核心功能可用")
			
 
				-        
			
 
				-        print("\n服务功能测试完成！")
			
 
				-        print("使用示例:")
			
 
				-        print("python -m services.pdf_parser.main --pdf_path <pdf文件路径> --output <输出json路径>")
			
 
				-        print("\n例如:")
			
 
				-        print("python -m services.pdf_parser.main --pdf_path sample.pdf --output result.json")
			
 
				-        
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ 测试失败: {str(e)}")
			
 
				-        return False
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_simple()
			
--- a/test/test_upload_document.py
+++ b/test/test_upload_document.py
@@ -1,39 +0,0 @@
 
				-import sys
			
 
				-import os
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				-
			
 
				-from services.utils.http_client import HTTPClient
			
 
				-from services.ragflow.document_service import DocumentService
			
 
				-
			
 
				-# 配置信息
			
 
				-API_URL = "http://localhost:8000"  # 替换为实际的RAGFlow API URL
			
 
				-API_KEY = "your_api_key"  # 替换为实际的API密钥
			
 
				-DATASET_ID = "your_dataset_id"  # 替换为实际的数据集ID
			
 
				-PDF_PATH = r"D:\project\work\ragflow_plugs\book\不一样的卡梅拉1-我想去看海.pdf"  # 使用已有的测试PDF文件
			
 
				-
			
 
				-def test_upload_document():
			
 
				-    """测试上传文档功能"""
			
 
				-    try:
			
 
				-        # 创建HTTP客户端实例
			
 
				-        http_client = HTTPClient(base_url=API_URL, api_key=API_KEY)
			
 
				-        
			
 
				-        # 创建文档服务实例
			
 
				-        document_service = DocumentService(http_client)
			
 
				-        
			
 
				-        # 调用上传文档方法
			
 
				-        print(f"开始上传文档: {PDF_PATH}")
			
 
				-        result = document_service.upload_document(DATASET_ID, PDF_PATH)
			
 
				-        
			
 
				-        # 打印结果
			
 
				-        print(f"文档上传成功: {result}")
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"文档上传失败: {str(e)}")
			
 
				-        import traceback
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_upload_document()
			
--- a/test/test_vector_db.py
+++ b/test/test_vector_db.py
@@ -1,104 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-
			
 
				-"""
			
 
				-向量数据库测试脚本
			
 
				-测试向量数据库工厂类的基本功能和配置切换
			
 
				-"""
			
 
				-
			
 
				-from services.utils.vector_db import VectorDBFactory
			
 
				-from conf.config import VectorDBConfig
			
 
				-
			
 
				-def test_vector_db_factory():
			
 
				-    """
			
 
				-    测试向量数据库工厂类
			
 
				-    """
			
 
				-    print("=== 测试向量数据库工厂类 ===")
			
 
				-    
			
 
				-    # 获取配置的向量数据库类型
			
 
				-    vector_db_type = VectorDBConfig.get_vector_db_type()
			
 
				-    print(f"当前配置的向量数据库类型: {vector_db_type}")
			
 
				-    
			
 
				-    try:
			
 
				-        # 获取向量数据库实例
			
 
				-        vector_db = VectorDBFactory.get_vector_db()
			
 
				-        print(f"成功获取向量数据库实例: {type(vector_db).__name__}")
			
 
				-        
			
 
				-        # 测试创建索引
			
 
				-        index_name = "test_index"
			
 
				-        print(f"\n测试创建索引: {index_name}")
			
 
				-        result = vector_db.create_index(index_name)
			
 
				-        print(f"创建索引结果: {result}")
			
 
				-        
			
 
				-        # 测试向量检索接口
			
 
				-        print(f"\n测试向量检索接口")
			
 
				-        vector = [0.1] * 768
			
 
				-        result = vector_db.vector_search(index_name, "vector_768_vec", vector, size=5)
			
 
				-        print(f"向量检索结果: {result}")
			
 
				-        
			
 
				-        # 测试混合检索接口
			
 
				-        print(f"\n测试混合检索接口")
			
 
				-        result = vector_db.hybrid_search(
			
 
				-            index_name, 
			
 
				-            text_query="测试", 
			
 
				-            vector_field="vector_768_vec", 
			
 
				-            vector=vector, 
			
 
				-            size=5
			
 
				-        )
			
 
				-        print(f"混合检索结果: {result}")
			
 
				-        
			
 
				-        # 关闭连接
			
 
				-        vector_db.close()
			
 
				-        print(f"\n成功关闭向量数据库连接")
			
 
				-        
			
 
				-        return True
			
 
				-        
			
 
				-    except Exception as e:
			
 
				-        print(f"测试失败: {e}")
			
 
				-        import traceback
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-
			
 
				-def test_vector_db_switch():
			
 
				-    """
			
 
				-    测试向量数据库切换功能
			
 
				-    """
			
 
				-    print("\n=== 测试向量数据库切换功能 ===")
			
 
				-    
			
 
				-    # 测试不同类型的向量数据库
			
 
				-    test_types = ["es", "infinity"]
			
 
				-    
			
 
				-    for db_type in test_types:
			
 
				-        print(f"\n测试向量数据库类型: {db_type}")
			
 
				-        try:
			
 
				-            # 临时修改配置（实际使用时通过环境变量配置）
			
 
				-            from conf.config import VectorDBConfig
			
 
				-            
			
 
				-            # 注意：这里我们不能直接修改配置类的静态方法返回值
			
 
				-            # 所以我们通过工厂类的实现来测试
			
 
				-            
			
 
				-            # 这里只测试工厂类是否能正确创建不同类型的向量数据库
			
 
				-            if db_type == "es":
			
 
				-                from services.utils.vector_db import ElasticsearchVectorDB
			
 
				-                vector_db = ElasticsearchVectorDB()
			
 
				-            else:
			
 
				-                from services.utils.vector_db import InfinityVectorDB
			
 
				-                vector_db = InfinityVectorDB()
			
 
				-            
			
 
				-            print(f"成功创建{db_type}向量数据库实例: {type(vector_db).__name__}")
			
 
				-            vector_db.close()
			
 
				-            print(f"成功关闭{db_type}向量数据库连接")
			
 
				-            
			
 
				-        except Exception as e:
			
 
				-            print(f"测试{db_type}失败: {e}")
			
 
				-            import traceback
			
 
				-            traceback.print_exc()
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    # 测试向量数据库工厂类
			
 
				-    test_vector_db_factory()
			
 
				-    
			
 
				-    # 测试向量数据库切换功能
			
 
				-    test_vector_db_switch()
			
 
				-    
			
 
				-    print("\n=== 测试完成 ===")
			
--- a/test/test_workflow.py
+++ b/test/test_workflow.py
@@ -1,71 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-
			
 
				-"""
			
 
				-PDF解析工作流测试脚本
			
 
				-测试包含向量化入库的完整工作流
			
 
				-"""
			
 
				-
			
 
				-import os
			
 
				-import sys
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				-
			
 
				-from services.pdf_parser.workflow import PDFParsingWorkflow
			
 
				-
			
 
				-
			
 
				-def test_pdf_parsing_workflow():
			
 
				-    """
			
 
				-    测试PDF解析工作流，包括向量化入库
			
 
				-    """
			
 
				-    print("=== 测试PDF解析工作流 ===")
			
 
				-    
			
 
				-    # 测试参数
			
 
				-    pdf_path = "test/sample.pdf"  # 替换为实际的测试PDF路径
			
 
				-    dataset_id = "test_dataset"
			
 
				-    ragflow_api_url = "http://localhost:8000/"  # 替换为实际的RAGFLOW API URL
			
 
				-    rag_flow_api_key = "test_api_key"  # 替换为实际的API密钥
			
 
				-    
			
 
				-    try:
			
 
				-        # 检查测试PDF文件是否存在
			
 
				-        if not os.path.exists(pdf_path):
			
 
				-            print(f"测试PDF文件不存在: {pdf_path}")
			
 
				-            print("请将测试PDF文件放置在指定位置")
			
 
				-            return False
			
 
				-        
			
 
				-        # 初始化工作流
			
 
				-        workflow = PDFParsingWorkflow()
			
 
				-        print(f"工作流初始化成功")
			
 
				-        
			
 
				-        # 运行工作流
			
 
				-        print(f"开始运行工作流，解析PDF: {pdf_path}")
			
 
				-        result = workflow.run(
			
 
				-            pdf_path=pdf_path,
			
 
				-            dataset_id=dataset_id,
			
 
				-            ragflow_api_url=ragflow_api_url,
			
 
				-            rag_flow_api_key=rag_flow_api_key
			
 
				-        )
			
 
				-        
			
 
				-        # 打印结果
			
 
				-        print(f"\n工作流运行完成")
			
 
				-        print(f"解析页面数量: {len(result.get('parsed_results', []))}")
			
 
				-        print(f"向量化页面数量: {result.get('vectorized_pages', 0)}")
			
 
				-        print(f"向量化结果数量: {len(result.get('vectorized_results', []))}")
			
 
				-        
			
 
				-        # 检查结果
			
 
				-        if result.get('is_complete', False):
			
 
				-            print("\n✅ 工作流运行成功！")
			
 
				-            return True
			
 
				-        else:
			
 
				-            print("\n❌ 工作流运行失败！")
			
 
				-            return False
			
 
				-            
			
 
				-    except Exception as e:
			
 
				-        print(f"\n❌ 测试失败: {e}")
			
 
				-        import traceback
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    test_pdf_parsing_workflow()
			
--- a/test/verify_pdf_splitter.py
+++ b/test/verify_pdf_splitter.py
@@ -1,99 +0,0 @@
 
				-"""PDF拆分功能验证脚本"""
			
 
				-
			
 
				-import sys
			
 
				-import os
			
 
				-from pathlib import Path
			
 
				-
			
 
				-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				-
			
 
				-def check_pymupdf():
			
 
				-    """检查PyMuPDF是否安装"""
			
 
				-    print("检查PyMuPDF是否安装...")
			
 
				-    try:
			
 
				-        import fitz
			
 
				-        print(f"✓ PyMuPDF已安装")
			
 
				-        print(f"  版本: {fitz.__version__}")
			
 
				-        return True
			
 
				-    except ImportError:
			
 
				-        print("✗ 未安装PyMuPDF，请运行: pip install PyMuPDF")
			
 
				-        return False
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ 检查PyMuPDF时出错: {str(e)}")
			
 
				-        return False
			
 
				-
			
 
				-def check_pdf_file(pdf_path):
			
 
				-    """检查PDF文件是否存在"""
			
 
				-    print(f"检查PDF文件: {pdf_path}")
			
 
				-    pdf = Path(pdf_path)
			
 
				-    if pdf.exists():
			
 
				-        print(f"✓ PDF文件存在，大小: {pdf.stat().st_size} 字节")
			
 
				-        return True
			
 
				-    else:
			
 
				-        print(f"✗ PDF文件不存在: {pdf_path}")
			
 
				-        return False
			
 
				-
			
 
				-def test_pdf_splitter():
			
 
				-    """测试PDF拆分功能"""
			
 
				-    print("=" * 50)
			
 
				-    print("PDF拆分功能验证")
			
 
				-    print("=" * 50)
			
 
				-    
			
 
				-    # 检查PyMuPDF
			
 
				-    pymupdf_ok = check_pymupdf()
			
 
				-    print()
			
 
				-    
			
 
				-    # 检查示例PDF文件
			
 
				-    pdf_path = r"D:\project\work\ragflow_plugs\book\不一样的卡梅拉1-我想去看海.pdf"
			
 
				-    pdf_ok = check_pdf_file(pdf_path)
			
 
				-    print()
			
 
				-    
			
 
				-    if not pymupdf_ok:
			
 
				-        print("=" * 50)
			
 
				-        print("PyMuPDF安装指南:")
			
 
				-        print("1. 运行命令安装: pip install PyMuPDF")
			
 
				-        print("2. 安装完成后重试")
			
 
				-        print("=" * 50)
			
 
				-        return False
			
 
				-    
			
 
				-    if not pdf_ok:
			
 
				-        print("=" * 50)
			
 
				-        print(r"请确保PDF文件存在: D:\project\work\ragflow_plugs\book\不一样的卡梅拉1-我想去看海.pdf")
			
 
				-        print("或修改脚本中的pdf_path变量为实际的PDF文件路径")
			
 
				-        print("=" * 50)
			
 
				-        return False
			
 
				-    
			
 
				-    # 测试PDF拆分功能
			
 
				-    print("开始测试PDF拆分功能...")
			
 
				-    try:
			
 
				-        from services.pdf_parser.pdf_splitter import PDFSplitter
			
 
				-        
			
 
				-        splitter = PDFSplitter()
			
 
				-        print(f"正在拆分PDF: {pdf_path}")
			
 
				-        pages = splitter.split_pdf(pdf_path)
			
 
				-        
			
 
				-        print(f"✓ PDF拆分成功，共 {len(pages)} 页")
			
 
				-        for page in pages[:3]:  # 只显示前3页
			
 
				-            print(f"  - 页码: {page['page_number']}, 图像大小: {page['image'].size}")
			
 
				-        
			
 
				-        if len(pages) > 3:
			
 
				-            print(f"  ... 以及 {len(pages) - 3} 页")
			
 
				-        
			
 
				-        return True
			
 
				-    except Exception as e:
			
 
				-        print(f"✗ PDF拆分失败: {str(e)}")
			
 
				-        print("可能的解决方案:")
			
 
				-        print("1. 确保poppler已正确安装并在PATH中")
			
 
				-        print("2. 检查PDF文件是否损坏")
			
 
				-        print("3. 检查pdf2image库版本是否兼容")
			
 
				-        return False
			
 
				-
			
 
				-def main():
			
 
				-    """主函数"""
			
 
				-    test_pdf_splitter()
			
 
				-    
			
 
				-    print("\n" + "=" * 50)
			
 
				-    print("验证完成")
			
 
				-    print("=" * 50)
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
--- a/test/vl_embedding_test.py
+++ b/test/vl_embedding_test.py
@@ -1,38 +0,0 @@
 
				-import sys
			
 
				-import os
			
 
				-from PIL import Image
			
 
				-
			
 
				-# 添加项目根目录到Python路径
			
 
				-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				-
			
 
				-from model.multimodal_embedding import Embedding
			
 
				-from conf.config import ModelConfig
			
 
				-
			
 
				-def main():
			
 
				-    """测试主函数"""
			
 
				-    print("开始测试VL嵌入...")
			
 
				-    print("=" * 50)
			
 
				-    
			
 
				-    # 初始化OpenAIEmbedding模型
			
 
				-    embedding_model = Embedding("qwen2.5-vl-embedding", "sk-bc0f1026a41c4c92beb014be8973e4e2")
			
 
				-    # 图片
			
 
				-    image_path = r"D:\project\work\ragflow_plugs\book\output\temp\美美.png"
			
 
				-    
			
 
				-    # 检查图片文件是否存在
			
 
				-    if not os.path.exists(image_path):
			
 
				-        print(f"图片文件不存在: {image_path}")
			
 
				-        return
			
 
				-    
			
 
				-    try:
			
 
				-        # 打开图像文件
			
 
				-        image = Image.open(image_path)
			
 
				-        text = "美美"
			
 
				-        res = embedding_model.get_multimodal_embedding(text, image)
			
 
				-        print(f"图片embedding值: {res}")
			
 
				-    except Exception as e:
			
 
				-        print(f"测试失败: {str(e)}")
			
 
				-    
			
 
				-    print("=" * 50)
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
--- a/utils/__pycache__/__init__.cpython-312.pyc
+++ b/utils/__pycache__/__init__.cpython-312.pyc
--- a/utils/es/__init__.py
+++ b/utils/es/__init__.py
@@ -1,17 +0,0 @@
 
				-"""
			
 
				-Elasticsearch 工具模块
			
 
				-"""
			
 
				-
			
 
				-from .base import ESConnection
			
 
				-from .constants import ES_DEFAULT_CONFIG
			
 
				-from .document import DocumentManager
			
 
				-from .index import IndexManager
			
 
				-from .search import SearchManager
			
 
				-
			
 
				-__all__ = [
			
 
				-    "ESConnection",
			
 
				-    "ES_DEFAULT_CONFIG",
			
 
				-    "DocumentManager",
			
 
				-    "IndexManager",
			
 
				-    "SearchManager"
			
 
				-]
			
--- a/utils/es/__pycache__/__init__.cpython-312.pyc
+++ b/utils/es/__pycache__/__init__.cpython-312.pyc
--- a/utils/es/__pycache__/base.cpython-312.pyc
+++ b/utils/es/__pycache__/base.cpython-312.pyc
--- a/utils/es/__pycache__/constants.cpython-312.pyc
+++ b/utils/es/__pycache__/constants.cpython-312.pyc
--- a/utils/es/__pycache__/document.cpython-312.pyc
+++ b/utils/es/__pycache__/document.cpython-312.pyc
--- a/utils/es/__pycache__/index.cpython-312.pyc
+++ b/utils/es/__pycache__/index.cpython-312.pyc
--- a/utils/es/__pycache__/search.cpython-312.pyc
+++ b/utils/es/__pycache__/search.cpython-312.pyc
--- a/utils/es/__pycache__/templates.cpython-312.pyc
+++ b/utils/es/__pycache__/templates.cpython-312.pyc
--- a/utils/es/base.py
+++ b/utils/es/base.py
@@ -1,68 +0,0 @@
 
				-"""
			
 
				-Elasticsearch 连接基础类
			
 
				-"""
			
 
				-from typing import List, Dict, Any, Optional
			
 
				-from elasticsearch import Elasticsearch
			
 
				-from elastic_transport import ConnectionTimeout
			
 
				-from utils.decorators import singleton
			
 
				-from utils.es.constants import ES_DEFAULT_CONFIG
			
 
				-from utils.es.templates import get_dynamic_templates
			
 
				-
			
 
				-
			
 
				-@singleton
			
 
				-class ESConnection:
			
 
				-    """
			
 
				-    Elasticsearch 连接管理器
			
 
				-    支持：
			
 
				-    - 单例模式
			
 
				-    - 连接池管理
			
 
				-    - 基础配置管理
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self, hosts: List[str] = None, **kwargs):
			
 
				-        """
			
 
				-        初始化 Elasticsearch 连接
			
 
				-        
			
 
				-        Args:
			
 
				-            hosts: Elasticsearch 主机列表，格式如 ["http://localhost:9200"]
			
 
				-            **kwargs: 其他 Elasticsearch 客户端配置参数
			
 
				-        """
			
 
				-        # 合并配置
			
 
				-        self.config = {**ES_DEFAULT_CONFIG, **kwargs}
			
 
				-        self.hosts = hosts or ES_DEFAULT_CONFIG.get("hosts", ["http://localhost:9200"])
			
 
				-        
			
 
				-        # 初始化 Elasticsearch 客户端
			
 
				-        self.es = Elasticsearch(
			
 
				-            hosts=self.hosts,
			
 
				-            **self.config
			
 
				-        )
			
 
				-        
			
 
				-        # 动态模板映射
			
 
				-        self.dynamic_templates = get_dynamic_templates()
			
 
				-    
			
 
				-    def ping(self) -> bool:
			
 
				-        """
			
 
				-        检查 ES 连接是否正常
			
 
				-        
			
 
				-        Returns:
			
 
				-            bool: 连接是否正常
			
 
				-        """
			
 
				-        try:
			
 
				-            return self.es.ping()
			
 
				-        except Exception:
			
 
				-            return False
			
 
				-    
			
 
				-    def get_client(self) -> Elasticsearch:
			
 
				-        """
			
 
				-        获取 ES 客户端实例
			
 
				-        
			
 
				-        Returns:
			
 
				-            Elasticsearch: ES 客户端实例
			
 
				-        """
			
 
				-        return self.es
			
 
				-    
			
 
				-    def close(self):
			
 
				-        """
			
 
				-        关闭 Elasticsearch 连接
			
 
				-        """
			
 
				-        self.es.close()
			
--- a/utils/es/constants.py
+++ b/utils/es/constants.py
@@ -1,25 +0,0 @@
 
				-"""
			
 
				-Elasticsearch 常量配置
			
 
				-"""
			
 
				-
			
 
				-# 默认配置
			
 
				-ES_DEFAULT_CONFIG = {
			
 
				-    "http_compress": True,
			
 
				-    "max_retries": 3,
			
 
				-    "retry_on_timeout": True,
			
 
				-    "timeout": 60,
			
 
				-    "sniff_on_start": False,
			
 
				-    "sniff_on_connection_fail": False,
			
 
				-    "sniffer_timeout": 0,
			
 
				-    "connections_per_node": 5,  # 每个节点的连接数
			
 
				-    "randomize_nodes_in_pool": True
			
 
				-}
			
 
				-
			
 
				-# 连接池大小
			
 
				-ES_CONNECTIONS_PER_NODE = 5
			
 
				-
			
 
				-# 默认超时时间
			
 
				-ES_DEFAULT_TIMEOUT = 60
			
 
				-
			
 
				-# 默认主机
			
 
				-ES_DEFAULT_HOSTS = ["http://localhost:9200"]
			
--- a/utils/es/document.py
+++ b/utils/es/document.py
@@ -1,192 +0,0 @@
 
				-"""
			
 
				-Elasticsearch 文档管理
			
 
				-"""
			
 
				-from typing import List, Dict, Any, Optional
			
 
				-from elasticsearch.helpers import bulk, BulkIndexError
			
 
				-from elasticsearch.exceptions import NotFoundError
			
 
				-from utils.es.base import ESConnection
			
 
				-
			
 
				-
			
 
				-class DocumentManager:
			
 
				-    """
			
 
				-    Elasticsearch 文档管理器
			
 
				-    负责：
			
 
				-    - 文档插入（单条和批量）
			
 
				-    - 文档更新
			
 
				-    - 文档删除（单条和批量）
			
 
				-    - 文档获取
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self, es_connection: Optional[ESConnection] = None):
			
 
				-        """
			
 
				-        初始化文档管理器
			
 
				-        
			
 
				-        Args:
			
 
				-            es_connection: ES 连接实例，可选
			
 
				-        """
			
 
				-        self.es_conn = es_connection or ESConnection()
			
 
				-        self.es = self.es_conn.get_client()
			
 
				-    
			
 
				-    def insert(self, index_name: str, document: Dict[str, Any], id: str = None, refresh: bool = False) -> bool:
			
 
				-        """
			
 
				-        插入单个文档
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            document: 文档内容
			
 
				-            id: 文档ID，可选
			
 
				-            refresh: 是否立即刷新
			
 
				-        
			
 
				-        Returns:
			
 
				-            bool: 插入是否成功
			
 
				-        """
			
 
				-        try:
			
 
				-            self.es.index(index=index_name, body=document, id=id, refresh=refresh)
			
 
				-            return True
			
 
				-        except Exception as e:
			
 
				-            print(f"插入文档失败: {e}")
			
 
				-            return False
			
 
				-    
			
 
				-    def bulk_insert(self, index_name: str, documents: List[Dict[str, Any]], refresh: bool = False) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        批量插入文档
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            documents: 文档列表，每个文档可以包含"_id"字段指定ID
			
 
				-            refresh: 是否立即刷新
			
 
				-        
			
 
				-        Returns:
			
 
				-            Dict: 包含成功和失败信息的字典
			
 
				-        """
			
 
				-        try:
			
 
				-            # 准备批量操作
			
 
				-            actions = []
			
 
				-            for doc in documents:
			
 
				-                action = {
			
 
				-                    "_index": index_name,
			
 
				-                    "_source": doc.copy()
			
 
				-                }
			
 
				-                # 如果文档包含"_id"字段，将其作为文档ID
			
 
				-                if "_id" in doc:
			
 
				-                    action["_id"] = doc["_id"]
			
 
				-                    del action["_source"]["_id"]
			
 
				-                actions.append(action)
			
 
				-            
			
 
				-            # 执行批量操作
			
 
				-            success, failed = bulk(self.es, actions, refresh=refresh, stats_only=False)
			
 
				-            
			
 
				-            return {
			
 
				-                "success": success,
			
 
				-                "failed": len(failed) if failed else 0,
			
 
				-                "errors": failed if failed else []
			
 
				-            }
			
 
				-        except BulkIndexError as e:
			
 
				-            print(f"批量插入失败: {e}")
			
 
				-            return {
			
 
				-                "success": 0,
			
 
				-                "failed": len(e.errors),
			
 
				-                "errors": e.errors
			
 
				-            }
			
 
				-        except Exception as e:
			
 
				-            print(f"批量插入失败: {e}")
			
 
				-            return {
			
 
				-                "success": 0,
			
 
				-                "failed": len(documents),
			
 
				-                "errors": [str(e)] * len(documents)
			
 
				-            }
			
 
				-    
			
 
				-    def update(self, index_name: str, id: str, update_body: Dict[str, Any], refresh: bool = False) -> bool:
			
 
				-        """
			
 
				-        更新文档
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            id: 文档ID
			
 
				-            update_body: 更新内容，格式如 {"doc": {"field": "value"}}
			
 
				-            refresh: 是否立即刷新
			
 
				-        
			
 
				-        Returns:
			
 
				-            bool: 更新是否成功
			
 
				-        """
			
 
				-        try:
			
 
				-            self.es.update(index=index_name, id=id, body=update_body, refresh=refresh)
			
 
				-            return True
			
 
				-        except NotFoundError:
			
 
				-            print(f"文档不存在: {id}")
			
 
				-            return False
			
 
				-        except Exception as e:
			
 
				-            print(f"更新文档失败: {e}")
			
 
				-            return False
			
 
				-    
			
 
				-    def delete(self, index_name: str, id: str, refresh: bool = False) -> bool:
			
 
				-        """
			
 
				-        删除单个文档
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            id: 文档ID
			
 
				-            refresh: 是否立即刷新
			
 
				-        
			
 
				-        Returns:
			
 
				-            bool: 删除是否成功
			
 
				-        """
			
 
				-        try:
			
 
				-            self.es.delete(index=index_name, id=id, refresh=refresh)
			
 
				-            return True
			
 
				-        except NotFoundError:
			
 
				-            print(f"文档不存在: {id}")
			
 
				-            return False
			
 
				-        except Exception as e:
			
 
				-            print(f"删除文档失败: {e}")
			
 
				-            return False
			
 
				-    
			
 
				-    def delete_by_query(self, index_name: str, query: Dict[str, Any], refresh: bool = False) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        按查询条件删除文档
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            query: 查询条件
			
 
				-            refresh: 是否立即刷新
			
 
				-        
			
 
				-        Returns:
			
 
				-            Dict: 删除结果
			
 
				-        """
			
 
				-        try:
			
 
				-            result = self.es.delete_by_query(index=index_name, body={"query": query}, refresh=refresh)
			
 
				-            return {
			
 
				-                "deleted": result["deleted"],
			
 
				-                "failed": 0
			
 
				-            }
			
 
				-        except Exception as e:
			
 
				-            print(f"按条件删除失败: {e}")
			
 
				-            return {
			
 
				-                "deleted": 0,
			
 
				-                "failed": 1,
			
 
				-                "error": str(e)
			
 
				-            }
			
 
				-    
			
 
				-    def get(self, index_name: str, id: str, fields: List[str] = None) -> Optional[Dict[str, Any]]:
			
 
				-        """
			
 
				-        获取单个文档
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            id: 文档ID
			
 
				-            fields: 要返回的字段列表，可选
			
 
				-        
			
 
				-        Returns:
			
 
				-            Dict: 文档内容，不存在则返回None
			
 
				-        """
			
 
				-        try:
			
 
				-            params = {}
			
 
				-            if fields:
			
 
				-                params["_source"] = fields
			
 
				-            result = self.es.get(index=index_name, id=id, **params)
			
 
				-            return result["_source"]
			
 
				-        except NotFoundError:
			
 
				-            return None
			
 
				-        except Exception as e:
			
 
				-            print(f"获取文档失败: {e}")
			
 
				-            return None
			
--- a/utils/es/index.py
+++ b/utils/es/index.py
@@ -1,131 +0,0 @@
 
				-"""
			
 
				-Elasticsearch 索引管理
			
 
				-"""
			
 
				-from typing import Dict, Any, Optional
			
 
				-from utils.es.base import ESConnection
			
 
				-
			
 
				-
			
 
				-class IndexManager:
			
 
				-    """
			
 
				-    Elasticsearch 索引管理器
			
 
				-    负责：
			
 
				-    - 索引创建
			
 
				-    - 索引删除
			
 
				-    - 索引检查
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self, es_connection: Optional[ESConnection] = None):
			
 
				-        """
			
 
				-        初始化索引管理器
			
 
				-        
			
 
				-        Args:
			
 
				-            es_connection: ES 连接实例，可选
			
 
				-        """
			
 
				-        self.es_conn = es_connection or ESConnection()
			
 
				-        self.es = self.es_conn.get_client()
			
 
				-    
			
 
				-    def create_index(self, index_name: str, mappings: Dict[str, Any] = None, settings: Dict[str, Any] = None) -> bool:
			
 
				-        """
			
 
				-        创建索引
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            mappings: 自定义映射，会与动态模板合并
			
 
				-            settings: 索引设置
			
 
				-        
			
 
				-        Returns:
			
 
				-            bool: 创建是否成功
			
 
				-        """
			
 
				-        try:
			
 
				-            # 如果索引已存在，返回True
			
 
				-            if self.es.indices.exists(index=index_name):
			
 
				-                return True
			
 
				-            
			
 
				-            # 合并动态模板和自定义映射
			
 
				-            final_mappings = self.es_conn.dynamic_templates.copy()
			
 
				-            if mappings:
			
 
				-                if "dynamic_templates" in mappings:
			
 
				-                    final_mappings["dynamic_templates"] += mappings["dynamic_templates"]
			
 
				-                if "properties" in mappings:
			
 
				-                    final_mappings["properties"] = mappings["properties"]
			
 
				-            
			
 
				-            body = {}
			
 
				-            if settings:
			
 
				-                body["settings"] = settings
			
 
				-            body["mappings"] = final_mappings
			
 
				-            
			
 
				-            self.es.indices.create(index=index_name, body=body)
			
 
				-            return True
			
 
				-        except Exception as e:
			
 
				-            print(f"创建索引失败: {e}")
			
 
				-            return False
			
 
				-    
			
 
				-    def delete_index(self, index_name: str) -> bool:
			
 
				-        """
			
 
				-        删除索引
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-        
			
 
				-        Returns:
			
 
				-            bool: 删除是否成功
			
 
				-        """
			
 
				-        try:
			
 
				-            if self.es.indices.exists(index=index_name):
			
 
				-                self.es.indices.delete(index=index_name)
			
 
				-            return True
			
 
				-        except Exception as e:
			
 
				-            print(f"删除索引失败: {e}")
			
 
				-            return False
			
 
				-    
			
 
				-    def exists(self, index_name: str) -> bool:
			
 
				-        """
			
 
				-        检查索引是否存在
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-        
			
 
				-        Returns:
			
 
				-            bool: 索引是否存在
			
 
				-        """
			
 
				-        try:
			
 
				-            return self.es.indices.exists(index=index_name)
			
 
				-        except Exception as e:
			
 
				-            print(f"检查索引存在失败: {e}")
			
 
				-            return False
			
 
				-    
			
 
				-    def get_mappings(self, index_name: str) -> Optional[Dict[str, Any]]:
			
 
				-        """
			
 
				-        获取索引映射
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-        
			
 
				-        Returns:
			
 
				-            Dict[str, Any]: 索引映射，不存在则返回None
			
 
				-        """
			
 
				-        try:
			
 
				-            if self.exists(index_name):
			
 
				-                return self.es.indices.get_mapping(index=index_name)
			
 
				-            return None
			
 
				-        except Exception as e:
			
 
				-            print(f"获取索引映射失败: {e}")
			
 
				-            return None
			
 
				-    
			
 
				-    def get_settings(self, index_name: str) -> Optional[Dict[str, Any]]:
			
 
				-        """
			
 
				-        获取索引设置
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-        
			
 
				-        Returns:
			
 
				-            Dict[str, Any]: 索引设置，不存在则返回None
			
 
				-        """
			
 
				-        try:
			
 
				-            if self.exists(index_name):
			
 
				-                return self.es.indices.get_settings(index=index_name)
			
 
				-            return None
			
 
				-        except Exception as e:
			
 
				-            print(f"获取索引设置失败: {e}")
			
 
				-            return None
			
--- a/utils/es/search.py
+++ b/utils/es/search.py
@@ -1,202 +0,0 @@
 
				-"""
			
 
				-Elasticsearch 搜索管理
			
 
				-"""
			
 
				-from typing import List, Dict, Any, Optional
			
 
				-from utils.es.base import ESConnection
			
 
				-
			
 
				-
			
 
				-class SearchManager:
			
 
				-    """
			
 
				-    Elasticsearch 搜索管理器
			
 
				-    负责：
			
 
				-    - 全文检索
			
 
				-    - 向量相似度检索（k-NN）
			
 
				-    - 混合检索（文本+向量）
			
 
				-    - 高亮显示
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self, es_connection: Optional[ESConnection] = None):
			
 
				-        """
			
 
				-        初始化搜索管理器
			
 
				-        
			
 
				-        Args:
			
 
				-            es_connection: ES 连接实例，可选
			
 
				-        """
			
 
				-        self.es_conn = es_connection or ESConnection()
			
 
				-        self.es = self.es_conn.get_client()
			
 
				-    
			
 
				-    def search(self, index_name: str, query: Dict[str, Any], size: int = 10, from_: int = 0, 
			
 
				-               fields: List[str] = None, highlight: Dict[str, Any] = None) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        搜索文档
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            query: 查询条件
			
 
				-            size: 返回结果数量
			
 
				-            from_: 起始位置
			
 
				-            fields: 要返回的字段列表，可选
			
 
				-            highlight: 高亮配置，可选
			
 
				-        
			
 
				-        Returns:
			
 
				-            Dict: 搜索结果
			
 
				-        """
			
 
				-        try:
			
 
				-            body = {
			
 
				-                "query": query,
			
 
				-                "size": size,
			
 
				-                "from": from_
			
 
				-            }
			
 
				-            
			
 
				-            if fields:
			
 
				-                body["_source"] = fields
			
 
				-            
			
 
				-            if highlight:
			
 
				-                body["highlight"] = highlight
			
 
				-            
			
 
				-            result = self.es.search(index=index_name, body=body)
			
 
				-            return result
			
 
				-        except Exception as e:
			
 
				-            print(f"搜索失败: {e}")
			
 
				-            return {"hits": {"total": 0, "hits": []}}
			
 
				-    
			
 
				-    def hybrid_search(self, index_name: str, text_query: str, vector_field: str, vector: List[float], 
			
 
				-                     size: int = 10, from_: int = 0, fields: List[str] = None, 
			
 
				-                     text_weight: float = 0.5, vector_weight: float = 0.5) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        混合检索：向量相似度检索 + 全文检索
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            text_query: 文本查询语句
			
 
				-            vector_field: 向量字段名
			
 
				-            vector: 检索向量
			
 
				-            size: 返回结果数量
			
 
				-            from_: 起始位置
			
 
				-            fields: 要返回的字段列表，可选
			
 
				-            text_weight: 文本检索权重
			
 
				-            vector_weight: 向量检索权重
			
 
				-        
			
 
				-        Returns:
			
 
				-            Dict: 搜索结果
			
 
				-        """
			
 
				-        try:
			
 
				-            # 构建混合检索查询
			
 
				-            query = {
			
 
				-                "bool": {
			
 
				-                    "should": [
			
 
				-                        {
			
 
				-                            "query_string": {
			
 
				-                                "query": text_query,
			
 
				-                                "default_operator": "OR",
			
 
				-                                "boost": text_weight
			
 
				-                            }
			
 
				-                        },
			
 
				-                        {
			
 
				-                            "script_score": {
			
 
				-                                "query": {
			
 
				-                                    "match_all": {}
			
 
				-                                },
			
 
				-                                "script": {
			
 
				-                                    "source": "cosineSimilarity(params.query_vector, doc[params.vector_field]) + 1.0",
			
 
				-                                    "params": {
			
 
				-                                        "query_vector": vector,
			
 
				-                                        "vector_field": vector_field
			
 
				-                                    }
			
 
				-                                },
			
 
				-                                "boost": vector_weight
			
 
				-                            }
			
 
				-                        }
			
 
				-                    ]
			
 
				-                }
			
 
				-            }
			
 
				-            
			
 
				-            body = {
			
 
				-                "query": query,
			
 
				-                "size": size,
			
 
				-                "from": from_
			
 
				-            }
			
 
				-            
			
 
				-            if fields:
			
 
				-                body["_source"] = fields
			
 
				-            
			
 
				-            result = self.es.search(index=index_name, body=body)
			
 
				-            return result
			
 
				-        except Exception as e:
			
 
				-            print(f"混合检索失败: {e}")
			
 
				-            return {"hits": {"total": 0, "hits": []}}
			
 
				-    
			
 
				-    def knn_search(self, index_name: str, vector_field: str, vector: List[float], 
			
 
				-                  k: int = 10, filter_query: Dict[str, Any] = None) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        向量相似度检索（k-NN）
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            vector_field: 向量字段名
			
 
				-            vector: 检索向量
			
 
				-            k: 返回结果数量
			
 
				-            filter_query: 过滤条件，可选
			
 
				-        
			
 
				-        Returns:
			
 
				-            Dict: 搜索结果
			
 
				-        """
			
 
				-        try:
			
 
				-            knn = {
			
 
				-                "field": vector_field,
			
 
				-                "query_vector": vector,
			
 
				-                "k": k,
			
 
				-                "num_candidates": k * 10
			
 
				-            }
			
 
				-            
			
 
				-            if filter_query:
			
 
				-                knn["filter"] = filter_query
			
 
				-            
			
 
				-            body = {
			
 
				-                "knn": knn
			
 
				-            }
			
 
				-            
			
 
				-            result = self.es.search(index=index_name, body=body)
			
 
				-            return result
			
 
				-        except Exception as e:
			
 
				-            print(f"向量检索失败: {e}")
			
 
				-            return {"hits": {"total": 0, "hits": []}}
			
 
				-    
			
 
				-    def match_search(self, index_name: str, field: str, value: str, size: int = 10, 
			
 
				-                     fields: List[str] = None) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        简单匹配搜索
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            field: 字段名
			
 
				-            value: 匹配值
			
 
				-            size: 返回结果数量
			
 
				-            fields: 要返回的字段列表，可选
			
 
				-        
			
 
				-        Returns:
			
 
				-            Dict: 搜索结果
			
 
				-        """
			
 
				-        query = {
			
 
				-            "match": {
			
 
				-                field: value
			
 
				-            }
			
 
				-        }
			
 
				-        return self.search(index_name, query, size=size, fields=fields)
			
 
				-    
			
 
				-    def match_all(self, index_name: str, size: int = 10, fields: List[str] = None) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        匹配所有文档
			
 
				-        
			
 
				-        Args:
			
 
				-            index_name: 索引名称
			
 
				-            size: 返回结果数量
			
 
				-            fields: 要返回的字段列表，可选
			
 
				-        
			
 
				-        Returns:
			
 
				-            Dict: 搜索结果
			
 
				-        """
			
 
				-        query = {
			
 
				-            "match_all": {}
			
 
				-        }
			
 
				-        return self.search(index_name, query, size=size, fields=fields)
			
--- a/utils/es/templates.py
+++ b/utils/es/templates.py
@@ -1,203 +0,0 @@
 
				-"""
			
 
				-Elasticsearch 动态模板映射
			
 
				-"""
			
 
				-from typing import Dict, Any
			
 
				-
			
 
				-
			
 
				-def get_dynamic_templates() -> Dict[str, Any]:
			
 
				-    """
			
 
				-    获取动态模板映射配置
			
 
				-    参考：d:/project/work/ragflow_plugs/book/es_dynamic.md
			
 
				-    
			
 
				-    Returns:
			
 
				-        Dict[str, Any]: 动态模板映射配置
			
 
				-    """
			
 
				-    return {
			
 
				-        "dynamic_templates": [
			
 
				-            {
			
 
				-                "int": {
			
 
				-                    "match": "*_int",
			
 
				-                    "mapping": {
			
 
				-                        "store": True,
			
 
				-                        "type": "integer"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "ulong": {
			
 
				-                    "match": "*_ulong",
			
 
				-                    "mapping": {
			
 
				-                        "store": True,
			
 
				-                        "type": "unsigned_long"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "long": {
			
 
				-                    "match": "*_long",
			
 
				-                    "mapping": {
			
 
				-                        "store": True,
			
 
				-                        "type": "long"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "short": {
			
 
				-                    "match": "*_short",
			
 
				-                    "mapping": {
			
 
				-                        "store": True,
			
 
				-                        "type": "short"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "numeric": {
			
 
				-                    "match": "*_flt",
			
 
				-                    "mapping": {
			
 
				-                        "store": True,
			
 
				-                        "type": "float"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "tks": {
			
 
				-                    "match": "*_tks",
			
 
				-                    "mapping": {
			
 
				-                        "analyzer": "whitespace",
			
 
				-                        "similarity": "scripted_sim",
			
 
				-                        "store": True,
			
 
				-                        "type": "text"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "ltks": {
			
 
				-                    "match": "*_ltks",
			
 
				-                    "mapping": {
			
 
				-                        "analyzer": "whitespace",
			
 
				-                        "store": True,
			
 
				-                        "type": "text"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "kwd": {
			
 
				-                    "match": "^(.*_(kwd|id|ids|uid|uids)|uid)$",
			
 
				-                    "match_pattern": "regex",
			
 
				-                    "mapping": {
			
 
				-                        "similarity": "boolean",
			
 
				-                        "store": True,
			
 
				-                        "type": "keyword"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "dt": {
			
 
				-                    "match": "^.*(_dt|_time|_at)$",
			
 
				-                    "match_pattern": "regex",
			
 
				-                    "mapping": {
			
 
				-                        "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM-dd_HH:mm:ss",
			
 
				-                        "store": True,
			
 
				-                        "type": "date"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "nested": {
			
 
				-                    "match": "*_nst",
			
 
				-                    "mapping": {
			
 
				-                        "type": "nested"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "object": {
			
 
				-                    "match": "*_obj",
			
 
				-                    "mapping": {
			
 
				-                        "dynamic": True,
			
 
				-                        "type": "object"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "string": {
			
 
				-                    "match": "^.*_(with_weight|list)$",
			
 
				-                    "match_pattern": "regex",
			
 
				-                    "mapping": {
			
 
				-                        "index": False,
			
 
				-                        "store": True,
			
 
				-                        "type": "text"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "rank_feature": {
			
 
				-                    "match": "*_fea",
			
 
				-                    "mapping": {
			
 
				-                        "type": "rank_feature"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "rank_features": {
			
 
				-                    "match": "*_feas",
			
 
				-                    "mapping": {
			
 
				-                        "type": "rank_features"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "dense_vector_512": {
			
 
				-                    "match": "*_512_vec",
			
 
				-                    "mapping": {
			
 
				-                        "dims": 512,
			
 
				-                        "index": True,
			
 
				-                        "similarity": "cosine",
			
 
				-                        "type": "dense_vector"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "dense_vector_768": {
			
 
				-                    "match": "*_768_vec",
			
 
				-                    "mapping": {
			
 
				-                        "dims": 768,
			
 
				-                        "index": True,
			
 
				-                        "similarity": "cosine",
			
 
				-                        "type": "dense_vector"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "dense_vector_1024": {
			
 
				-                    "match": "*_1024_vec",
			
 
				-                    "mapping": {
			
 
				-                        "dims": 1024,
			
 
				-                        "index": True,
			
 
				-                        "similarity": "cosine",
			
 
				-                        "type": "dense_vector"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "dense_vector_1536": {
			
 
				-                    "match": "*_1536_vec",
			
 
				-                    "mapping": {
			
 
				-                        "dims": 1536,
			
 
				-                        "index": True,
			
 
				-                        "similarity": "cosine",
			
 
				-                        "type": "dense_vector"
			
 
				-                    }
			
 
				-                }
			
 
				-            },
			
 
				-            {
			
 
				-                "binary": {
			
 
				-                    "match": "*_bin",
			
 
				-                    "mapping": {
			
 
				-                        "type": "binary"
			
 
				-                    }
			
 
				-                }
			
 
				-            }
			
 
				-        ],
			
 
				-        "date_detection": True
			
 
				-    }
			
--- a/utils/es_conn.py
+++ b/utils/es_conn.py
@@ -1,138 +0,0 @@
 
				-"""
			
 
				-Elasticsearch 连接管理器（向后兼容接口）
			
 
				-
			
 
				-该文件提供与旧版 es_conn.py 兼容的接口，同时内部使用新的工程化模块。
			
 
				-"""
			
 
				-import re
			
 
				-import json
			
 
				-import time
			
 
				-from typing import Any, List, Dict, Optional, Union
			
 
				-from elasticsearch import Elasticsearch, helpers
			
 
				-from elasticsearch.helpers import bulk, BulkIndexError
			
 
				-from elastic_transport import ConnectionTimeout
			
 
				-from elasticsearch.exceptions import NotFoundError
			
 
				-
			
 
				-from services.utils.es.base import ESConnection as _ESConnection
			
 
				-from services.utils.es.index import IndexManager
			
 
				-from services.utils.es.document import DocumentManager
			
 
				-from services.utils.es.search import SearchManager
			
 
				-
			
 
				-# 单例装饰器
			
 
				-class singleton:
			
 
				-    def __init__(self, cls):
			
 
				-        self.cls = cls
			
 
				-        self._instance = None
			
 
				-    
			
 
				-    def __call__(self, *args, **kwargs):
			
 
				-        if self._instance is None:
			
 
				-            self._instance = self.cls(*args, **kwargs)
			
 
				-        return self._instance
			
 
				-
			
 
				-@singleton
			
 
				-class ESConnection:
			
 
				-    """
			
 
				-    Elasticsearch 连接管理器（向后兼容）
			
 
				-    支持：
			
 
				-    - 单例模式
			
 
				-    - 连接池管理
			
 
				-    - CRUD操作
			
 
				-    - 向量相似度检索 + 全文检索的混合检索
			
 
				-    - 动态模板映射
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self, hosts: List[str] = None, **kwargs):
			
 
				-        """
			
 
				-        初始化 Elasticsearch 连接
			
 
				-        
			
 
				-        Args:
			
 
				-            hosts: Elasticsearch 主机列表，格式如 ["http://localhost:9200"]
			
 
				-            **kwargs: 其他 Elasticsearch 客户端配置参数
			
 
				-        """
			
 
				-        # 使用新的 ESConnection 作为底层连接
			
 
				-        self._es_conn = _ESConnection(hosts=hosts, **kwargs)
			
 
				-        
			
 
				-        # 初始化管理器
			
 
				-        self.index_manager = IndexManager(self._es_conn)
			
 
				-        self.document_manager = DocumentManager(self._es_conn)
			
 
				-        self.search_manager = SearchManager(self._es_conn)
			
 
				-        
			
 
				-        # 向后兼容属性
			
 
				-        self.es = self._es_conn.get_client()
			
 
				-        self.dynamic_templates = self._es_conn.dynamic_templates
			
 
				-    
			
 
				-    def _get_dynamic_templates(self) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        获取动态模板映射配置（向后兼容方法）
			
 
				-        """
			
 
				-        return self.dynamic_templates
			
 
				-    
			
 
				-    def create_index(self, index_name: str, mappings: Dict[str, Any] = None, settings: Dict[str, Any] = None) -> bool:
			
 
				-        """
			
 
				-        创建索引
			
 
				-        """
			
 
				-        return self.index_manager.create_index(index_name, mappings, settings)
			
 
				-    
			
 
				-    def insert(self, index_name: str, document: Dict[str, Any], id: str = None, refresh: bool = False) -> bool:
			
 
				-        """
			
 
				-        插入单个文档
			
 
				-        """
			
 
				-        return self.document_manager.insert(index_name, document, id, refresh)
			
 
				-    
			
 
				-    def bulk_insert(self, index_name: str, documents: List[Dict[str, Any]], refresh: bool = False) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        批量插入文档
			
 
				-        """
			
 
				-        return self.document_manager.bulk_insert(index_name, documents, refresh)
			
 
				-    
			
 
				-    def update(self, index_name: str, id: str, update_body: Dict[str, Any], refresh: bool = False) -> bool:
			
 
				-        """
			
 
				-        更新文档
			
 
				-        """
			
 
				-        return self.document_manager.update(index_name, id, update_body, refresh)
			
 
				-    
			
 
				-    def delete(self, index_name: str, id: str, refresh: bool = False) -> bool:
			
 
				-        """
			
 
				-        删除文档
			
 
				-        """
			
 
				-        return self.document_manager.delete(index_name, id, refresh)
			
 
				-    
			
 
				-    def delete_by_query(self, index_name: str, query: Dict[str, Any], refresh: bool = False) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        按查询条件删除文档
			
 
				-        """
			
 
				-        return self.document_manager.delete_by_query(index_name, query, refresh)
			
 
				-    
			
 
				-    def get(self, index_name: str, id: str, fields: List[str] = None) -> Optional[Dict[str, Any]]:
			
 
				-        """
			
 
				-        获取单个文档
			
 
				-        """
			
 
				-        return self.document_manager.get(index_name, id, fields)
			
 
				-    
			
 
				-    def search(self, index_name: str, query: Dict[str, Any], size: int = 10, from_: int = 0, 
			
 
				-               fields: List[str] = None, highlight: Dict[str, Any] = None) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        搜索文档
			
 
				-        """
			
 
				-        return self.search_manager.search(index_name, query, size, from_, fields, highlight)
			
 
				-    
			
 
				-    def hybrid_search(self, index_name: str, text_query: str, vector_field: str, vector: List[float], 
			
 
				-                     size: int = 10, from_: int = 0, fields: List[str] = None, 
			
 
				-                     text_weight: float = 0.5, vector_weight: float = 0.5) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        混合检索：向量相似度检索 + 全文检索
			
 
				-        """
			
 
				-        return self.search_manager.hybrid_search(index_name, text_query, vector_field, vector, 
			
 
				-                                               size, from_, fields, text_weight, vector_weight)
			
 
				-    
			
 
				-    def knn_search(self, index_name: str, vector_field: str, vector: List[float], 
			
 
				-                  k: int = 10, filter_query: Dict[str, Any] = None) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        向量相似度检索（k-NN）
			
 
				-        """
			
 
				-        return self.search_manager.knn_search(index_name, vector_field, vector, k, filter_query)
			
 
				-    
			
 
				-    def close(self):
			
 
				-        """
			
 
				-        关闭 Elasticsearch 连接
			
 
				-        """
			
 
				-        self._es_conn.close()
			
--- a/utils/file/__pycache__/file_utils.cpython-312.pyc
+++ b/utils/file/__pycache__/file_utils.cpython-312.pyc
--- a/utils/file/__pycache__/image_util.cpython-312.pyc
+++ b/utils/file/__pycache__/image_util.cpython-312.pyc
--- a/utils/minio/file_utils.py
+++ b/utils/minio/file_utils.py
--- a/utils/minio/image_util.py
+++ b/utils/minio/image_util.py
@@ -2,15 +2,14 @@
 
				 """
			
 
				 图片处理工具类
			
 
				 """
			
 
				-
			
 
				 import os
			
 
				 import zipfile
			
 
				 import re
			
 
				-from typing import List, Dict, Any
			
 
				+from typing import List
			
 
				 from io import BytesIO
			
 
				 from PIL import Image
			
 
				-from utils.minio.minio_util import MinIOUtil
			
 
				-from utils.minio.file_utils import generate_unique_filename
			
 
				+from utils.file.minio.minio_util import MinIOUtil
			
 
				+from utils.file.file_utils import generate_unique_filename
			
 
				 
			
 
				 
			
 
				 class ImageUtil:
			
@@ -19,6 +18,32 @@ class ImageUtil:
 
				     def __init__(self):
			
 
				         """初始化图片处理工具类"""
			
 
				         self.minio_util = MinIOUtil()
			
 
				+
			
 
				+    # 将图片url转换为Image对象
			
 
				+    def _url_to_image(self, image_url: str) -> Image.Image:
			
 
				+        """
			
 
				+        将图片url转换为Image对象
			
 
				+        
			
 
				+        Args:
			
 
				+            image_url: 图片url
			
 
				+            
			
 
				+        Returns:
			
 
				+            Image.Image: 图片对象
			
 
				+        """
			
 
				+        import requests
			
 
				+
			
 
				+        # 处理image_url为image: Image.Image
			
 
				+        if isinstance(image_url, str):
			
 
				+            # 下载图片
			
 
				+            response = requests.get(image_url)
			
 
				+            response.raise_for_status()  # 检查HTTP状态码
			
 
				+    
			
 
				+            # 将响应内容转换为字节流
			
 
				+            image_bytes = BytesIO(response.content)
			
 
				+    
			
 
				+            # 创建Image对象
			
 
				+            image = Image.open(image_bytes)
			
 
				+            return image
			
 
				     
			
 
				     def process_image_zip(self, zip_file_path: str, book_name: str) -> List[str]:
			
 
				         """
			
--- a/utils/file/minio/__init__.py
+++ b/utils/file/minio/__init__.py
--- a/utils/file/minio/__pycache__/__init__.cpython-312.pyc
+++ b/utils/file/minio/__pycache__/__init__.cpython-312.pyc
--- a/utils/file/minio/__pycache__/file_utils.cpython-312.pyc
+++ b/utils/file/minio/__pycache__/file_utils.cpython-312.pyc
--- a/utils/file/minio/__pycache__/image_util.cpython-312.pyc
+++ b/utils/file/minio/__pycache__/image_util.cpython-312.pyc
--- a/utils/file/minio/__pycache__/minio_util.cpython-312.pyc
+++ b/utils/file/minio/__pycache__/minio_util.cpython-312.pyc
--- a/utils/file/minio/minio_util.py
+++ b/utils/file/minio/minio_util.py
@@ -2,7 +2,7 @@ from minio import Minio
 
				 from typing import BinaryIO
			
 
				 from datetime import timedelta
			
 
				 from conf.config import MinioConfig
			
 
				-from .file_utils import generate_unique_filename
			
 
				+from utils.file.file_utils import generate_unique_filename
			
 
				 
			
 
				 class MinIOUtil:
			
 
				     def __init__(self):
			
--- a/utils/infinity/__init__.py
+++ b/utils/infinity/__init__.py
@@ -11,33 +11,3 @@ __all__ = [
 
				     'get_client',
			
 
				     'close_client'
			
 
				 ]
			
 
				-
			
 
				-# 使用示例
			
 
				-"""
			
 
				-# 示例1：基本使用
			
 
				-client = InfinityClient()
			
 
				-databases = client.get_databases()
			
 
				-print(f"Databases: {databases}")
			
 
				-
			
 
				-# 示例2：使用连接池上下文
			
 
				-with client.get_connection() as conn:
			
 
				-    tables = conn.get_tables()
			
 
				-    print(f"Tables: {tables}")
			
 
				-
			
 
				-# 示例3：使用全局客户端
			
 
				-from utils.infinity import get_client
			
 
				-client = get_client()
			
 
				-databases = client.get_databases()
			
 
				-print(f"Databases from global client: {databases}")
			
 
				-
			
 
				-# 示例4：混合检索
			
 
				-result = client.hybrid_search(
			
 
				-    table_name="my_table",
			
 
				-    vector_field="vector",
			
 
				-    query_vector=[0.1, 0.2, 0.3],
			
 
				-    text_query="test",
			
 
				-    text_field="content",
			
 
				-    topn=5
			
 
				-)
			
 
				-print(f"Hybrid search result: {result}")
			
 
				-"""
			
--- a/utils/infinity/__pycache__/__init__.cpython-312.pyc
+++ b/utils/infinity/__pycache__/__init__.cpython-312.pyc
--- a/utils/infinity/__pycache__/client.cpython-312.pyc
+++ b/utils/infinity/__pycache__/client.cpython-312.pyc
--- a/utils/infinity/client.py
+++ b/utils/infinity/client.py
@@ -191,7 +191,7 @@ class InfinityClient:
 
				             # 获取Table对象
			
 
				             table = self._get_table(conn, table_name, database_name)
			
 
				             # 获取结果集
			
 
				-            return table.output(output_fields).match_text(query["field"], query["query"], query["topn"])
			
 
				+            return table.output(output_fields).match_text(query["match_field"], query["matching_text"], query["topn"])
			
 
				     
			
 
				     def hybrid_search(
			
 
				         self,
			
@@ -207,7 +207,7 @@ class InfinityClient:
 
				             # 获取结果集
			
 
				             return table.output(output_fields) \
			
 
				                 .match_dense(query["vector_field"], query["query_vector"], "float", "cosine", query["topn"]) \
			
 
				-                .match_text(query["field"], query["query"], query["topn"]) \
			
 
				+                .match_text(query["match_field"], query["matching_text"], query["topn"]) \
			
 
				                 .fusion("rrf", query["topn"])
			
 
				     
			
 
				     def vector_search(
			
@@ -239,7 +239,7 @@ _client_lock = threading.Lock()
 
				 
			
 
				 def get_client(
			
 
				     host: str = VectorDBConfig.get_infinity_host(),
			
 
				-    port: str = VectorDBConfig.get_infinity_port(),
			
 
				+    port: str = VectorDBConfig.get_infinity_sdk_port(),
			
 
				     database: str = VectorDBConfig.get_infinity_database(),
			
 
				     min_connections: int = 2,
			
 
				     max_connections: int = 10
			
--- a/utils/infinity_util/__init__.py
+++ b/utils/infinity_util/__init__.py
@@ -1,382 +0,0 @@
 
				-"""
			
 
				-Infinity向量数据库主类
			
 
				-基于官方Infinity Python SDK实现
			
 
				-"""
			
 
				-from typing import List, Dict, Any, Optional
			
 
				-import json
			
 
				-
			
 
				-class InfinityVectorDB:
			
 
				-    """
			
 
				-    Infinity向量数据库主类
			
 
				-    提供统一的接口，整合索引、文档和搜索功能
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self):
			
 
				-        """
			
 
				-        初始化Infinity向量数据库
			
 
				-        使用HTTP API实现，不依赖官方SDK
			
 
				-        """
			
 
				-        from conf.config import VectorDBConfig
			
 
				-        from utils.http_client import HTTPClient
			
 
				-        import base64
			
 
				-        
			
 
				-        # 获取配置
			
 
				-        self.host = VectorDBConfig.get_infinity_host()
			
 
				-        self.port = VectorDBConfig.get_infinity_port()
			
 
				-        self.user = VectorDBConfig.get_infinity_user()
			
 
				-        self.password = VectorDBConfig.get_infinity_password()
			
 
				-        self.database = VectorDBConfig.get_infinity_database()
			
 
				-        self.headers = {
			
 
				-                "Accept": "application/json",
			
 
				-                "Content-Type": "application/json"
			
 
				-            }
			
 
				-        
			
 
				-        # 生成Basic Auth令牌
			
 
				-        auth_str = f"{self.user}:{self.password}"
			
 
				-        auth_token = base64.b64encode(auth_str.encode()).decode()
			
 
				-        
			
 
				-        # 初始化HTTP客户端
			
 
				-        self.base_url = f"http://{self.host}:{self.port}"
			
 
				-        self.http_client = HTTPClient(
			
 
				-            base_url=self.base_url,
			
 
				-            api_key=auth_token,
			
 
				-            auth_type='basic'
			
 
				-        )
			
 
				-    
			
 
				-    def create_index(self, index_name: str, mappings: Dict[str, Any] = None) -> bool:
			
 
				-        """创建索引"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API创建表（对应索引）
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}"
			
 
				-            
			
 
				-            # 定义表字段
			
 
				-            with open("conf/infinity_mapping.json", "r", encoding="utf-8") as f:
			
 
				-                fields = json.load(f)
			
 
				-            
			
 
				-            data = {
			
 
				-                "create_option": "ignore_if_exists",
			
 
				-                "fields": fields
			
 
				-            }
			
 
				-        
			
 
				-            response = self.http_client.post(path, json_data=data, headers=self.headers)
			
 
				-            return response.get("error_code") == 0
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to create index: {str(e)}")
			
 
				-            return False
			
 
				-    
			
 
				-    def delete_index(self, index_name: str) -> bool:
			
 
				-        """删除索引"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API删除表（对应索引）
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}"
			
 
				-            
			
 
				-            data = {
			
 
				-                "drop_option": "ignore_if_not_exists"
			
 
				-            }
			
 
				-            
			
 
				-            response = self.http_client.delete(path, json_data=data, headers=self.headers)
			
 
				-            return response.get("error_code") == 0
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to delete index: {str(e)}")
			
 
				-            return False
			
 
				-    
			
 
				-    def index_exists(self, index_name: str) -> bool:
			
 
				-        """检查索引是否存在"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API获取表列表
			
 
				-            path = f"/databases/{self.database}/tables"
			
 
				-            response = self.http_client.get(path, headers=self.headers)
			
 
				-            
			
 
				-            if response.get("error_code") == 0:
			
 
				-                tables = response.get("tables", [])
			
 
				-                return index_name in tables
			
 
				-            return False
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to check index existence: {str(e)}")
			
 
				-            return False
			
 
				-    
			
 
				-    def insert_document(self, index_name: str, document: Dict[str, Any], id: str = None) -> bool:
			
 
				-        """插入单个文档"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API插入单行数据
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}/docs"
			
 
				-            
			
 
				-            # 如果提供了id，将其添加到文档中
			
 
				-            if id:
			
 
				-                document["id"] = id
			
 
				-            
			
 
				-            data = [document]
			
 
				-            response = self.http_client.post(path, json_data=data, headers=self.headers)
			
 
				-            return response.get("error_code") == 0
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to insert document: {str(e)}")
			
 
				-            return False
			
 
				-    
			
 
				-    def bulk_insert(self, index_name: str, documents: List[Dict[str, Any]]) -> Dict[str, Any]:
			
 
				-        """批量插入文档"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API批量插入数据
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}/docs"
			
 
				-            
			
 
				-            data = documents
			
 
				-            response = self.http_client.post(path, json_data=data, headers=self.headers)
			
 
				-            
			
 
				-            if response.get("error_code") == 0:
			
 
				-                return {
			
 
				-                    "success": True,
			
 
				-                    "inserted": len(documents)
			
 
				-                }
			
 
				-            else:
			
 
				-                return {
			
 
				-                    "success": False,
			
 
				-                    "error": response.get("error_msg", "Unknown error"),
			
 
				-                    "inserted": 0
			
 
				-                }
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to bulk insert documents: {str(e)}")
			
 
				-            return {
			
 
				-                "success": False,
			
 
				-                "error": str(e),
			
 
				-                "inserted": 0
			
 
				-            }
			
 
				-    
			
 
				-    def update_document(self, index_name: str, document_id: str, document: Dict[str, Any]) -> bool:
			
 
				-        """更新单个文档"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API更新行
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}/rows"
			
 
				-            
			
 
				-            data = {
			
 
				-                "update_by": {
			
 
				-                    "column": "id",
			
 
				-                    "value": document_id
			
 
				-                },
			
 
				-                "update_data": document
			
 
				-            }
			
 
				-            
			
 
				-            response = self.http_client.put(path, json_data=data, headers=self.headers)
			
 
				-            return response.get("error_code") == 0
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to update document: {str(e)}")
			
 
				-            return False
			
 
				-    
			
 
				-    def delete_document(self, index_name: str, document_id: str) -> bool:
			
 
				-        """删除单个文档"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API删除行
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}/rows"
			
 
				-            
			
 
				-            data = {
			
 
				-                "delete_by": {
			
 
				-                    "column": "id",
			
 
				-                    "value": document_id
			
 
				-                }
			
 
				-            }
			
 
				-            
			
 
				-            response = self.http_client.delete(path, json_data=data, headers=self.headers)
			
 
				-            return response.get("error_code") == 0
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to delete document: {str(e)}")
			
 
				-            return False
			
 
				-    
			
 
				-    def get_document(self, index_name: str, document_id: str) -> Optional[Dict[str, Any]]:
			
 
				-        """获取单个文档"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API查询单行数据
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}/query"
			
 
				-            
			
 
				-            data = {
			
 
				-                "filter": {
			
 
				-                    "column": "id",
			
 
				-                    "operator": "=",
			
 
				-                    "value": document_id
			
 
				-                },
			
 
				-                "limit": 1
			
 
				-            }
			
 
				-            
			
 
				-            response = self.http_client.post(path, json_data=data, headers=self.headers)
			
 
				-            
			
 
				-            if response.get("error_code") == 0:
			
 
				-                rows = response.get("rows", [])
			
 
				-                if rows:
			
 
				-                    return rows[0]
			
 
				-            return None
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to get document: {str(e)}")
			
 
				-            return None
			
 
				-    
			
 
				-    def delete_by_query(self, index_name: str, query: Dict[str, Any]) -> Dict[str, Any]:
			
 
				-        """按查询条件删除文档"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API按条件删除行
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}/rows"
			
 
				-            
			
 
				-            # 构建删除条件
			
 
				-            # 这里假设query是一个简单的字典，如{"dataset_id": "xxx"}
			
 
				-            filter_conditions = []
			
 
				-            for key, value in query.items():
			
 
				-                filter_conditions.append({
			
 
				-                    "column": key,
			
 
				-                    "operator": "=",
			
 
				-                    "value": value
			
 
				-                })
			
 
				-            
			
 
				-            data = {
			
 
				-                "delete_by": {
			
 
				-                    "and": filter_conditions
			
 
				-                }
			
 
				-            }
			
 
				-            
			
 
				-            response = self.http_client.delete(path, json_data=data, headers=self.headers)
			
 
				-            
			
 
				-            if response.get("error_code") == 0:
			
 
				-                return {"success": True}
			
 
				-            else:
			
 
				-                return {"success": False, "error": response.get("error_msg", "Unknown error")}
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to delete by query: {str(e)}")
			
 
				-            return {"success": False, "error": str(e)}
			
 
				-    
			
 
				-    def search(self, index_name: str, query: Dict[str, Any], size: int = 10) -> Dict[str, Any]:
			
 
				-        """搜索文档"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API查询数据
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}/docs"
			
 
				-            
			
 
				-            data = {
			
 
				-                "filter": query,
			
 
				-                "limit": size
			
 
				-            }
			
 
				-            
			
 
				-            response = self.http_client.post(path, json_data=data, headers=self.headers)
			
 
				-            
			
 
				-            if response.get("error_code") == 0:
			
 
				-                rows = response.get("output", [])
			
 
				-                return {
			
 
				-                    "output": rows,
			
 
				-                }
			
 
				-            else:
			
 
				-                return {"hits": [], "total": 0, "error": response.get("error_msg", "Unknown error")}
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to search: {str(e)}")
			
 
				-            return {"output": [], "error": str(e)}
			
 
				-    
			
 
				-    def vector_search(self, index_name: str, vector_field: str, vector: List[float], size: int = 10, filter: Dict[str, Any] = None) -> Dict[str, Any]:
			
 
				-        """向量检索"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API进行向量检索
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}/docs"
			
 
				-            
			
 
				-            data = {
			
 
				-                "vector_field": vector_field,
			
 
				-                "vector": vector,
			
 
				-                "limit": size
			
 
				-            }
			
 
				-            
			
 
				-            if filter:
			
 
				-                data["filter"] = filter
			
 
				-            
			
 
				-            response = self.http_client.post(path, json_data=data, headers=self.headers)
			
 
				-            
			
 
				-            if response.get("error_code") == 0:
			
 
				-                rows = response.get("output", [])
			
 
				-                return {
			
 
				-                    "hits": rows,
			
 
				-                    "total": len(rows)
			
 
				-                }
			
 
				-            else:
			
 
				-                return {"hits": [], "total": 0, "error": response.get("error_msg", "Unknown error")}
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to vector search: {str(e)}")
			
 
				-            return {"hits": [], "total": 0, "error": str(e)}
			
 
				-    
			
 
				-    def hybrid_search(self, index_name: str, match_method: str, vector_field: str, query_vector: List[float], element_type: str,
			
 
				-                     metric_type: str = "cosine", topn: int = 3, rank_constant: int = 60,
			
 
				-                     text_query: str = "", text_field: str = "file_name"
			
 
				-                     ) -> Dict[str, Any]:
			
 
				-        """混合检索"""
			
 
				-        try:
			
 
				-            # 使用Infinity官方HTTP API进行混合检索
			
 
				-            path = f"/databases/{self.database}/tables/{index_name}/docs"
			
 
				-            
			
 
				-            # 构建搜索配置列表
			
 
				-            search_config = [
			
 
				-                {
			
 
				-                    "match_method": match_method,
			
 
				-                    "fields": vector_field,
			
 
				-                    "query_vector": query_vector,
			
 
				-                    "element_type": element_type,
			
 
				-                    "metric_type": metric_type,
			
 
				-                    "topn": topn,
			
 
				-                    "params": {
			
 
				-                        "ef": "10"
			
 
				-                    }
			
 
				-                }
			
 
				-            ]
			
 
				-            
			
 
				-            # 只有当text_query和text_field都不为空时，才添加文本搜索配置
			
 
				-            # if text_query and text_field:
			
 
				-            #     search_config.append(
			
 
				-            #         {
			
 
				-            #             "match_method": "text",
			
 
				-            #             "fields": text_field,
			
 
				-            #             "matching_text": text_query,
			
 
				-            #             "topn": 1,
			
 
				-            #             "params":
			
 
				-            #             {
			
 
				-            #                 "default_fields": text_field,
			
 
				-            #                 "operator": "or"
			
 
				-            #             }
			
 
				-            #         }
			
 
				-            #     )
			
 
				-            
			
 
				-            # 添加融合方法配置
			
 
				-            # if vector_field and vector and text_query and text_field:
			
 
				-            #     search_config.append(
			
 
				-            #         {
			
 
				-            #             "fusion_method": "rrf",
			
 
				-            #             "topn": topn,
			
 
				-            #             "params":{"rank_constant": rank_constant}
			
 
				-            #         }
			
 
				-            #     )
			
 
				-          
			
 
				-            data = {
			
 
				-                "output": [
			
 
				-                    "file_name",
			
 
				-                    "page_number",
			
 
				-                    "content",
			
 
				-                    "image_path",
			
 
				-                    "dataset_id",
			
 
				-                    "document_id",
			
 
				-                    "_similarity"
			
 
				-                ],
			
 
				-                "search": search_config
			
 
				-            }
			
 
				-            
			
 
				-            response = self.http_client.get_json(path, json_data=data, headers=self.headers)
			
 
				-            
			
 
				-            if response["error_code"] == 0:
			
 
				-                rows = response["output"]
			
 
				-                # 将列表的列表转换为字典列表
			
 
				-                output_fields = ["file_name", "page_number", "content", "image_path", "dataset_id", "document_id", "_similarity"]
			
 
				-                formatted_rows = []
			
 
				-                for row in rows:
			
 
				-                    # 创建字典，将每个字段名与对应的值匹配
			
 
				-                    formatted_row = {}
			
 
				-                    for i, field in enumerate(output_fields):
			
 
				-                        if i < len(row):
			
 
				-                            # 处理字段值，确保是字典类型
			
 
				-                            if isinstance(row[i], dict):
			
 
				-                                formatted_row.update(row[i])
			
 
				-                            else:
			
 
				-                                formatted_row[field] = row[i]
			
 
				-                    formatted_rows.append(formatted_row)
			
 
				-                return {
			
 
				-                    "output": formatted_rows,
			
 
				-                    "total": len(formatted_rows)
			
 
				-                }
			
 
				-            else:
			
 
				-                return {"output": [], "total": 0, "error": response["error_msg"]}
			
 
				-        except Exception as e:
			
 
				-            print(f"Failed to hybrid search: {str(e)}")
			
 
				-            return {"output": [], "total": 0, "error": str(e)}
			
--- a/utils/infinity_util/__pycache__/__init__.cpython-312.pyc
+++ b/utils/infinity_util/__pycache__/__init__.cpython-312.pyc
--- a/utils/infinity_util/__pycache__/base.cpython-312.pyc
+++ b/utils/infinity_util/__pycache__/base.cpython-312.pyc
--- a/utils/infinity_util/__pycache__/document.cpython-312.pyc
+++ b/utils/infinity_util/__pycache__/document.cpython-312.pyc
--- a/utils/infinity_util/__pycache__/index.cpython-312.pyc
+++ b/utils/infinity_util/__pycache__/index.cpython-312.pyc
--- a/utils/infinity_util/__pycache__/search.cpython-312.pyc
+++ b/utils/infinity_util/__pycache__/search.cpython-312.pyc
--- a/utils/vector_db.py
+++ b/utils/vector_db.py
@@ -1,168 +0,0 @@
 
				-"""
			
 
				-向量数据库工厂类
			
 
				-支持动态切换Elasticsearch和Infinity向量数据库
			
 
				-"""
			
 
				-from typing import Any, List, Dict, Optional
			
 
				-from conf.config import VectorDBConfig
			
 
				-from utils.es import ESConnection as ElasticsearchConnection
			
 
				-
			
 
				-
			
 
				-class VectorDBFactory:
			
 
				-    """
			
 
				-    向量数据库工厂类
			
 
				-    根据配置创建不同类型的向量数据库连接
			
 
				-    """
			
 
				-    
			
 
				-    @staticmethod
			
 
				-    def get_vector_db():
			
 
				-        """
			
 
				-        获取向量数据库实例
			
 
				-        
			
 
				-        Returns:
			
 
				-            VectorDBBase: 向量数据库实例
			
 
				-        """
			
 
				-        vector_db_type = VectorDBConfig.get_vector_db_type().lower()
			
 
				-        
			
 
				-        if vector_db_type == "es":
			
 
				-            return ElasticsearchVectorDB()
			
 
				-        elif vector_db_type == "infinity":
			
 
				-            return InfinityVectorDB()
			
 
				-        else:
			
 
				-            raise ValueError(f"不支持的向量数据库类型: {vector_db_type}")
			
 
				-
			
 
				-
			
 
				-class VectorDBBase:
			
 
				-    """
			
 
				-    向量数据库基类
			
 
				-    定义了向量数据库应该实现的接口
			
 
				-    """
			
 
				-    
			
 
				-    def create_index(self, index_name: str, mappings: Dict[str, Any] = None) -> bool:
			
 
				-        """创建索引"""
			
 
				-        raise NotImplementedError()
			
 
				-    
			
 
				-    def insert_document(self, index_name: str, document: Dict[str, Any], id: str = None) -> bool:
			
 
				-        """插入单个文档"""
			
 
				-        raise NotImplementedError()
			
 
				-    
			
 
				-    def bulk_insert(self, index_name: str, documents: List[Dict[str, Any]]) -> Dict[str, Any]:
			
 
				-        """批量插入文档"""
			
 
				-        raise NotImplementedError()
			
 
				-    
			
 
				-    def search(self, index_name: str, query: Dict[str, Any], size: int = 10) -> Dict[str, Any]:
			
 
				-        """搜索文档"""
			
 
				-        raise NotImplementedError()
			
 
				-    
			
 
				-    def vector_search(self, index_name: str, vector_field: str, vector: List[float], size: int = 10, filter: Dict[str, Any] = None) -> Dict[str, Any]:
			
 
				-        """向量检索"""
			
 
				-        raise NotImplementedError()
			
 
				-    
			
 
				-    def hybrid_search(self, index_name: str, text_query: str, vector_field: str, vector: List[float], 
			
 
				-                     size: int = 10, text_weight: float = 0.5, vector_weight: float = 0.5) -> Dict[str, Any]:
			
 
				-        """混合检索"""
			
 
				-        raise NotImplementedError()
			
 
				-    
			
 
				-    def close(self):
			
 
				-        """关闭连接"""
			
 
				-        raise NotImplementedError()
			
 
				-
			
 
				-
			
 
				-class ElasticsearchVectorDB(VectorDBBase):
			
 
				-    """
			
 
				-    Elasticsearch向量数据库实现
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self):
			
 
				-        """初始化Elasticsearch向量数据库"""
			
 
				-        self.es_conn = ElasticsearchConnection()
			
 
				-        
			
 
				-    def create_index(self, index_name: str, mappings: Dict[str, Any] = None) -> bool:
			
 
				-        """创建索引"""
			
 
				-        from utils.es.index import IndexManager
			
 
				-        index_manager = IndexManager(self.es_conn)
			
 
				-        return index_manager.create_index(index_name, mappings)
			
 
				-    
			
 
				-    def insert_document(self, index_name: str, document: Dict[str, Any], id: str = None) -> bool:
			
 
				-        """插入单个文档"""
			
 
				-        from utils.es.document import DocumentManager
			
 
				-        doc_manager = DocumentManager(self.es_conn)
			
 
				-        return doc_manager.insert(index_name, document, id)
			
 
				-    
			
 
				-    def bulk_insert(self, index_name: str, documents: List[Dict[str, Any]]) -> Dict[str, Any]:
			
 
				-        """批量插入文档"""
			
 
				-        from services.utils.es.document import DocumentManager
			
 
				-        doc_manager = DocumentManager(self.es_conn)
			
 
				-        return doc_manager.bulk_insert(index_name, documents)
			
 
				-    
			
 
				-    def search(self, index_name: str, query: Dict[str, Any], size: int = 10) -> Dict[str, Any]:
			
 
				-        """搜索文档"""
			
 
				-        from utils.es.search import SearchManager
			
 
				-        search_manager = SearchManager(self.es_conn)
			
 
				-        return search_manager.search(index_name, query, size=size)
			
 
				-    
			
 
				-    def vector_search(self, index_name: str, vector_field: str, vector: List[float], size: int = 10, filter: Dict[str, Any] = None) -> Dict[str, Any]:
			
 
				-        """向量检索"""
			
 
				-        from services.utils.es.search import SearchManager
			
 
				-        search_manager = SearchManager(self.es_conn)
			
 
				-        return search_manager.knn_search(index_name, vector_field, vector, size, filter)
			
 
				-    
			
 
				-    def hybrid_search(self, index_name: str, text_query: str, vector_field: str, vector: List[float], 
			
 
				-                     size: int = 10, text_weight: float = 0.5, vector_weight: float = 0.5) -> Dict[str, Any]:
			
 
				-        """混合检索"""
			
 
				-        from services.utils.es.search import SearchManager
			
 
				-        search_manager = SearchManager(self.es_conn)
			
 
				-        return search_manager.hybrid_search(index_name, text_query, vector_field, vector, size, text_weight=text_weight, vector_weight=vector_weight)
			
 
				-    
			
 
				-    def close(self):
			
 
				-        """关闭连接"""
			
 
				-        self.es_conn.close()
			
 
				-
			
 
				-
			
 
				-class InfinityVectorDB(VectorDBBase):
			
 
				-    """
			
 
				-    Infinity向量数据库实现
			
 
				-    支持infinity向量数据库的具体实现，包含PDF元数据入库
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self):
			
 
				-        """初始化Infinity向量数据库"""
			
 
				-        from utils.infinity_util import InfinityVectorDB as _InfinityVectorDB
			
 
				-        from conf.config import VectorDBConfig
			
 
				-        
			
 
				-        # 获取Infinity配置
			
 
				-        host = VectorDBConfig.get_infinity_host()
			
 
				-        port = VectorDBConfig.get_infinity_port()
			
 
				-        user = VectorDBConfig.get_infinity_user()
			
 
				-        password = VectorDBConfig.get_infinity_password()
			
 
				-        
			
 
				-        # 初始化新的InfinityVectorDB实例
			
 
				-        self._infinity_db = _InfinityVectorDB()
			
 
				-    
			
 
				-    def create_index(self, index_name: str, mappings: Dict[str, Any] = None) -> bool:
			
 
				-        """创建索引"""
			
 
				-        return self._infinity_db.create_index(index_name, mappings)
			
 
				-    
			
 
				-    def insert_document(self, index_name: str, document: Dict[str, Any], id: str = None) -> bool:
			
 
				-        """插入单个文档"""
			
 
				-        return self._infinity_db.insert_document(index_name, document, id)
			
 
				-    
			
 
				-    def bulk_insert(self, index_name: str, documents: List[Dict[str, Any]]) -> Dict[str, Any]:
			
 
				-        """批量插入文档"""
			
 
				-        return self._infinity_db.bulk_insert(index_name, documents)
			
 
				-    
			
 
				-    def search(self, index_name: str, query: Dict[str, Any], size: int = 10) -> Dict[str, Any]:
			
 
				-        """搜索文档"""
			
 
				-        return self._infinity_db.search(index_name, query, size)
			
 
				-    
			
 
				-    def vector_search(self, index_name: str, vector_field: str, vector: List[float], size: int = 10, filter: Dict[str, Any] = None) -> Dict[str, Any]:
			
 
				-        """向量检索"""
			
 
				-        return self._infinity_db.vector_search(index_name, vector_field, vector, size, filter)
			
 
				-    
			
 
				-    def hybrid_search(self, index_name: str, text_query: str, vector_field: str, vector: List[float], 
			
 
				-                     size: int = 10, text_weight: float = 0.5, vector_weight: float = 0.5) -> Dict[str, Any]:
			
 
				-        """混合检索"""
			
 
				-        return self._infinity_db.hybrid_search(index_name, text_query, vector_field, vector, size, text_weight, vector_weight)
			
 
				-    
			
 
				-    def close(self):
			
 
				-        """关闭连接"""
			
 
				-        self._infinity_db.close()
			
--- a/vector_search_result.md
+++ b/vector_search_result.md
@@ -1,555 +0,0 @@
 
				-## 向量检索结果
			
 
				-
			
 
				-### 检索结果 1
			
 
				-**file_name**: 出发！超级播种机.pdf
			
 
				-**page_number**: 9
			
 
				-**content**: {
			
 
				-    "page_meta": {
			
 
				-        "page_number": 1,
			
 
				-        "content_text": "“我们快点儿开始播种吧！”点点已经等不及了。“等等，我们都去拿工具，谁来照看种子呢？”甜甜问。“我！我来！”“粉宝，这项重要的任务就交给你啦！”点点说，“没有种子，就没有植物！”粉宝守着种子唱起了歌。“我们是超级播种机！我们让莉莉兰开满鲜花……”一缕微风吹过，粉宝紧张得屏住了呼吸……“嘿！别走……”",
			
 
				-        "overall_style": {
			
 
				-            "art_medium": "手绘水彩",
			
 
				-            "color_palette": ["薄荷绿", "泥土棕"],
			
 
				-            "lighting": "柔和侧光",
			
 
				-            "composition": "对角线构图"
			
 
				-        }
			
 
				-    },
			
 
				-    "elements": [
			
 
				-        {
			
 
				-            "element_name": "粉宝",
			
 
				-            "character_name": "粉宝",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "前景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "粉色分节身体，戴蓝色小帽，大眼睛",
			
 
				-                "action_emotion": "兴奋地喊“我！我来！”，紧张屏息",
			
 
				-                "color_detail": "珊瑚粉、天蓝色",
			
 
				-                "ability_tag": "自我认知"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "生活常识"],
			
 
				-                "object": ["动物", "植物", "种子"],
			
 
				-                "emotion": ["快乐", "紧张"]
			
 
				-            },
			
 
				-            "ability_tags": ["语言表达", "自我认知", "自然观察"],
			
 
				-            "description": "粉宝穿着粉色分节身体，戴蓝色小帽，兴奋喊话，继而紧张屏息守护种子。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "点点",
			
 
				-            "character_name": "点点",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "戴护目镜的红色瓢虫，黑褐色斑点",
			
 
				-                "action_emotion": "急切地催促播种",
			
 
				-                "color_detail": "橙红、深褐",
			
 
				-                "ability_tag": "语言表达"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "生活常识"],
			
 
				-                "object": ["动物", "种子"],
			
 
				-                "emotion": ["急切"]
			
 
				-            },
			
 
				-            "ability_tags": ["语言表达"],
			
 
				-            "description": "戴护目镜的红色瓢虫点点，急切催促大家播种，加速任务进程。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "甜甜",
			
 
				-            "character_name": "甜甜",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "蓝色甲壳，棕色螺旋贝壳",
			
 
				-                "action_emotion": "担忧地询问",
			
 
				-                "color_detail": "钴蓝、土黄",
			
 
				-                "ability_tag": "社会交往"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "生活常识"],
			
 
				-                "object": ["动物", "种子"],
			
 
				-                "emotion": ["担忧"]
			
 
				-            },
			
 
				-            "ability_tags": ["社会交往"],
			
 
				-            "description": "蓝色甲壳蜗牛甜甜，担忧询问谁来守护种子，体现责任感。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "蜜蜂",
			
 
				-            "character_name": "",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "黄黑条纹，透明翅膀",
			
 
				-                "action_emotion": "好奇地观察",
			
 
				-                "color_detail": "亮黄、深褐",
			
 
				-                "ability_tag": "自然观察"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["动物", "植物"],
			
 
				-                "emotion": ["好奇"]
			
 
				-            },
			
 
				-            "ability_tags": ["自然观察"],
			
 
				-            "description": "黄黑条纹蜜蜂，透明翅膀，好奇观察种子和同伴们的活动。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "蒲公英",
			
 
				-            "character_name": "",
			
 
				-            "category": "道具",
			
 
				-            "spatial_layer": "前景/中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "白色绒球，纤细毛细",
			
 
				-                "action_emotion": "随风飘散",
			
 
				-                "color_detail": "纯白",
			
 
				-                "ability_tag": "自然观察"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["植物"],
			
 
				-                "emotion": ["平静"]
			
 
				-            },
			
 
				-            "ability_tags": ["自然观察"],
			
 
				-            "description": "白色蒲公英绒球，纤细毛细随风飘散，象征种子传播和自然循环。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "土壤",
			
 
				-            "character_name": "",
			
 
				-            "category": "环境",
			
 
				-            "spatial_layer": "前景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "棕色颗粒，点缀小石子",
			
 
				-                "action_emotion": "静默的承载",
			
 
				-                "color_detail": "深土棕",
			
 
				-                "ability_tag": "自然观察"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["植物", "土壤"],
			
 
				-                "emotion": ["平静"]
			
 
				-            },
			
 
				-            "ability_tags": ["自然观察"],
			
 
				-            "description": "棕色颗粒土壤，点缀着小石子，静默承载着生命的种子和根系。"
			
 
				-        }
			
 
				-    ]
			
 
				-}
			
 
				-**image_path**: http://192.168.16.134:9000/bookpage/4dd1f389-4f61-478a-a4c2-a8c18c6f71c9.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ck7I8Esssx6rzZrXQ5uP%2F20260109%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20260109T074321Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=7ea572f9b4e8d83061ac0475f5f9593cfca1abc26a5e60f56cebd3dcf868d6d6
			
 
				-**dataset_id**: a0f1aa03ed2c11f08b8f0242c0a85002
			
 
				-**document_id**: d629ed98ed2e11f09ac30242c0a85002
			
 
				-**SIMILARITY**: 0.6338874697685242
			
 
				-
			
 
				-### 检索结果 2
			
 
				-**file_name**: 出发！超级播种机.pdf
			
 
				-**page_number**: 5
			
 
				-**content**: {
			
 
				-    "page_meta": {
			
 
				-        "page_number": 1,
			
 
				-        "content_text": "炎热的夏天就要过去了，虫虫们在花园里散步。突然，粉宝咯咯地笑了起来。‘什么事这么好笑？’‘哎呀，有毛茸茸的家伙在挠我的肚子！’",
			
 
				-        "overall_style": {
			
 
				-            "art_medium": "手绘水彩",
			
 
				-            "color_palette": ["薄荷绿", "暖橙色"],
			
 
				-            "lighting": "柔和侧光",
			
 
				-            "composition": "大远景"
			
 
				-        }
			
 
				-    },
			
 
				-    "elements": [
			
 
				-        {
			
 
				-            "element_name": "粉红色毛虫",
			
 
				-            "character_name": "粉宝",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "前景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "粉橙色分节身体，头顶蓝色小帽，闭眼微笑",
			
 
				-                "action_emotion": "蜷缩身体大笑，被蒲公英绒毛轻触",
			
 
				-                "color_detail": "粉橙色分节身体，头顶蓝色小帽",
			
 
				-                "ability_tag": "情绪管理"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["昆虫", "植物"],
			
 
				-                "emotion": ["快乐"]
			
 
				-            },
			
 
				-            "ability_tags": ["情绪管理", "自然观察"],
			
 
				-            "description": "粉橙色毛虫被蒲公英绒毛轻触，闭眼大笑，展现快乐情绪。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "戴眼镜瓢虫",
			
 
				-            "character_name": "",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "红黑斑点外壳，戴圆框眼镜",
			
 
				-                "action_emotion": "飞舞询问，表情关切",
			
 
				-                "color_detail": "红黑斑点外壳，透明镜片",
			
 
				-                "ability_tag": "社会交往"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["昆虫"],
			
 
				-                "emotion": ["好奇"]
			
 
				-            },
			
 
				-            "ability_tags": ["社会交往", "语言表达"],
			
 
				-            "description": "戴眼镜的瓢虫飞舞询问，表情关切，体现好奇与社交互动。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "蒲公英",
			
 
				-            "character_name": "",
			
 
				-            "category": "道具",
			
 
				-            "spatial_layer": "前景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "白色绒毛球，棕色花托",
			
 
				-                "action_emotion": "绒毛飘散，轻触毛虫",
			
 
				-                "color_detail": "白色绒毛，棕色花托",
			
 
				-                "ability_tag": ""
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["植物"],
			
 
				-                "emotion": ["轻松"]
			
 
				-            },
			
 
				-            "ability_tags": ["自然观察"],
			
 
				-            "description": "白色蒲公英绒毛飘散，轻触毛虫，促进自然观察。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "蝴蝶",
			
 
				-            "character_name": "",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "橙黑翅膀，黑色触角",
			
 
				-                "action_emotion": "飞行姿态优雅",
			
 
				-                "color_detail": "橙黑翅膀，黑色触角",
			
 
				-                "ability_tag": ""
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["昆虫"],
			
 
				-                "emotion": ["平静"]
			
 
				-            },
			
 
				-            "ability_tags": ["自然观察"],
			
 
				-            "description": "橙黑翅膀的蝴蝶优雅飞行，展现自然之美的观察点。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "蜗牛",
			
 
				-            "character_name": "",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "背景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "蓝色身体，螺旋红壳",
			
 
				-                "action_emotion": "静止观察",
			
 
				-                "color_detail": "蓝色身体，红褐色螺旋壳",
			
 
				-                "ability_tag": ""
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["昆虫"],
			
 
				-                "emotion": ["安静"]
			
 
				-            },
			
 
				-            "ability_tags": ["自然观察"],
			
 
				-            "description": "蓝色身体配红壳的蜗牛静止观察，促进安静的自然观察。"
			
 
				-        }
			
 
				-    ]
			
 
				-}
			
 
				-**image_path**: http://192.168.16.134:9000/bookpage/21a250a9-cdc0-4c18-a94e-3a82eba32720.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ck7I8Esssx6rzZrXQ5uP%2F20260109%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20260109T074314Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=a89a8f58335b6b3a6f2f6a399c17b387026bfa988ab5b23ee36abfca970e1718
			
 
				-**dataset_id**: a0f1aa03ed2c11f08b8f0242c0a85002
			
 
				-**document_id**: d629ed98ed2e11f09ac30242c0a85002
			
 
				-**SIMILARITY**: 0.6216001510620117
			
 
				-
			
 
				-### 检索结果 3
			
 
				-**file_name**: 出发！超级播种机.pdf
			
 
				-**page_number**: 11
			
 
				-**content**: ```json
			
 
				-{
			
 
				-    "page_meta": {
			
 
				-        "page_number": 1,
			
 
				-        "content_text": "粉宝追啊追啊……但一个都没追回来。过了一会儿，虫虫们回来了。“粉宝，这是怎么了？”“我们的种子去哪儿啦？”“对不起！”“都怪我！是我没照看好种子，让风吹走了。没有种子，我们就不能播种了。”粉宝难过极了。",
			
 
				-        "overall_style": {
			
 
				-            "art_medium": "手绘水彩",
			
 
				-            "color_palette": ["柔粉", "橄榄绿"],
			
 
				-            "lighting": "柔和自然光",
			
 
				-            "composition": "分镜叙事构图"
			
 
				-        }
			
 
				-    },
			
 
				-    "elements": [
			
 
				-        {
			
 
				-            "element_name": "粉宝",
			
 
				-            "character_name": "粉宝",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "粉橙色条纹身体，戴蓝色小帽",
			
 
				-                "action_emotion": "追捕种子，最终沮丧低头",
			
 
				-                "color_detail": "粉橙色条纹，蓝色小帽",
			
 
				-                "ability_tag": "自我认知"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "生活常识"],
			
 
				-                "object": ["动物", "种子"],
			
 
				-                "emotion": ["勇敢", "难过"]
			
 
				-            },
			
 
				-            "ability_tags": ["自我认知", "情绪管理"],
			
 
				-            "description": "粉宝戴蓝帽，粉橙条纹，追捕种子时勇敢，失败后难过，体现自我认知与情绪管理。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "小蜜蜂",
			
 
				-            "character_name": "",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "黄黑条纹，透明翅膀",
			
 
				-                "action_emotion": "关切俯视，询问情况",
			
 
				-                "color_detail": "亮黄色，黑色条纹",
			
 
				-                "ability_tag": "社会交往"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "生活常识"],
			
 
				-                "object": ["动物"],
			
 
				-                "emotion": ["关心"]
			
 
				-            },
			
 
				-            "ability_tags": ["社会交往"],
			
 
				-            "description": "黄黑条纹小蜜蜂，透明翅膀，关切俯视粉宝，展现关心与社会交往能力。"
			
 
				-        }
			
 
				-    ]
			
 
				-}
			
 
				-```
			
 
				-**image_path**: http://192.168.16.134:9000/bookpage/1f3c64a3-ba0e-4fe9-823a-f3f5b443da0b.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ck7I8Esssx6rzZrXQ5uP%2F20260109%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20260109T074324Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=711c6dca918128b450f86e93c9ac294a3d7a819b580f8076ed8099f79e50df8e
			
 
				-**dataset_id**: a0f1aa03ed2c11f08b8f0242c0a85002
			
 
				-**document_id**: d629ed98ed2e11f09ac30242c0a85002
			
 
				-**SIMILARITY**: 0.616936981678009
			
 
				-
			
 
				-### 检索结果 4
			
 
				-**file_name**: 出发！超级播种机.pdf
			
 
				-**page_number**: 8
			
 
				-**content**: ```json
			
 
				-{
			
 
				-    "page_meta": {
			
 
				-        "page_number": 1,
			
 
				-        "content_text": "粉宝也努力想要抓住种子，但是……他总是抓不住。看，我们收集了这么多种子！卷卷开心极了。粉宝有点儿难过：我一个也没找到。瞧瞧你尾巴上是什么？粉宝，尾巴上的也算，你也找到啦！闪闪大声鼓励他。",
			
 
				-        "overall_style": {
			
 
				-            "art_medium": "手绘水彩",
			
 
				-            "color_palette": ["薄荷绿", "暖橙色"],
			
 
				-            "lighting": "柔和晨光",
			
 
				-            "composition": "分镜式构图"
			
 
				-        }
			
 
				-    },
			
 
				-    "elements": [
			
 
				-        {
			
 
				-            "element_name": "粉宝",
			
 
				-            "character_name": "粉宝",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "前景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "粉红色分节身体，头顶小蓝帽，圆眼睛",
			
 
				-                "action_emotion": "伸展身体试图抓种子，后显沮丧",
			
 
				-                "color_detail": "珊瑚粉，橙色条纹",
			
 
				-                "ability_tag": "自我认知"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "社交"],
			
 
				-                "object": ["动物", "植物"],
			
 
				-                "emotion": ["快乐", "难过"]
			
 
				-            },
			
 
				-            "ability_tags": ["自我认知", "情绪管理"],
			
 
				-            "description": "粉色毛毛虫戴蓝帽，努力抓种子却失败，表情从专注转为沮丧，体现挫折感与自我认知。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "卷卷",
			
 
				-            "character_name": "卷卷",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "蓝色蜗牛壳，蓝身体，红色眼睛",
			
 
				-                "action_emotion": "开心展示收集的种子",
			
 
				-                "color_detail": "深蓝与橙红渐变壳",
			
 
				-                "ability_tag": "社会交往"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "社交"],
			
 
				-                "object": ["动物", "植物"],
			
 
				-                "emotion": ["开心", "友善"]
			
 
				-            },
			
 
				-            "ability_tags": ["社会交往", "情绪管理"],
			
 
				-            "description": "蓝色蜗牛壳，兴奋展示收获，放大表达快乐情绪，促进角色间的社交互动。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "蜜蜂",
			
 
				-            "character_name": "",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "黄黑相间条纹，透明翅膀",
			
 
				-                "action_emotion": "助阵鼓励，专注好奇",
			
 
				-                "color_detail": "亮黄色与黑色块",
			
 
				-                "ability_tag": "社会交往"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "社交"],
			
 
				-                "object": ["动物", "植物"],
			
 
				-                "emotion": ["好奇", "友善"]
			
 
				-            },
			
 
				-            "ability_tags": ["社会交往", "情绪管理"],
			
 
				-            "description": "黄黑条纹蜜蜂，透明翅膀，眼神专注，微表情传达友善与鼓励，营造积极社交氛围。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "蒲公英种子",
			
 
				-            "character_name": "",
			
 
				-            "category": "道具",
			
 
				-            "spatial_layer": "前景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "白色绒球状，轻盈飘散",
			
 
				-                "action_emotion": "自然元素，暗示失败与转机",
			
 
				-                "color_detail": "纯白色，轻盈如云",
			
 
				-                "ability_tag": "自然观察"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["植物"],
			
 
				-                "emotion": ["惊喜", "希望"]
			
 
				-            },
			
 
				-            "ability_tags": ["自然观察", "逻辑思维"],
			
 
				-            "description": "白色绒球状蒲公英种子，轻盈飘散，暗示自然界的微妙变化，引导孩子观察细节。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "草地与土壤",
			
 
				-            "character_name": "",
			
 
				-            "category": "场景",
			
 
				-            "spatial_layer": "背景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "绿色草叶，棕色土壤，木头小径",
			
 
				-                "action_emotion": "宁静自然环境",
			
 
				-                "color_detail": "鲜绿与深棕",
			
 
				-                "ability_tag": "自然观察"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["植物", "土壤"],
			
 
				-                "emotion": ["安静"]
			
 
				-            },
			
 
				-            "ability_tags": ["自然观察"],
			
 
				-            "description": "柔软绿草与深棕土壤构成自然背景，营造安静观察与探索的氛围。"
			
 
				-        }
			
 
				-    ]
			
 
				-}
			
 
				-```
			
 
				-**image_path**: http://192.168.16.134:9000/bookpage/16957b97-75f3-49d9-9ad9-56ceef63dc0e.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ck7I8Esssx6rzZrXQ5uP%2F20260109%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20260109T074319Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=9e89506af7a73d8551e0069dc9d7b9f21c0896784d6b44b17c59bdf1bb61f05a
			
 
				-**dataset_id**: a0f1aa03ed2c11f08b8f0242c0a85002
			
 
				-**document_id**: d629ed98ed2e11f09ac30242c0a85002
			
 
				-**SIMILARITY**: 0.6116334795951843
			
 
				-
			
 
				-### 检索结果 5
			
 
				-**file_name**: 出发！超级播种机.pdf
			
 
				-**page_number**: 12
			
 
				-**content**: ```json
			
 
				-{
			
 
				-    "page_meta": {
			
 
				-        "page_number": 1,
			
 
				-        "content_text": "“不是你的错，粉宝。”美美飞了过来，“这些种子本来就是要飞的，所以它们才会有降落伞和翅膀。”\n“当种子落到土壤里，它们会自己生根发芽。大自然才是真正的超级播种机。”\n点点也拿出一些种子，安慰虫虫们：“看！这些种子就没有翅膀，我们还有机会。”",
			
 
				-        "overall_style": {
			
 
				-            "art_medium": "手绘水彩",
			
 
				-            "color_palette": ["薄荷绿", "暖橙色"],
			
 
				-            "lighting": "柔和顶光",
			
 
				-            "composition": "中心聚焦构图"
			
 
				-        }
			
 
				-    },
			
 
				-    "elements": [
			
 
				-        {
			
 
				-            "element_name": "蝴蝶美美",
			
 
				-            "character_name": "美美",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "蓝灰身体，橙色带白点翅膀",
			
 
				-                "action_emotion": "飞向同伴，温柔安慰",
			
 
				-                "color_detail": "橙色翅膀带深棕斑点",
			
 
				-                "ability_tag": "情绪管理"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "社交"],
			
 
				-                "object": ["昆虫", "植物"],
			
 
				-                "emotion": ["友爱", "安静"]
			
 
				-            },
			
 
				-            "ability_tags": ["情绪管理", "语言表达"],
			
 
				-            "description": "蓝灰身子橙翼蝴蝶温柔飞来安慰伙伴，教会孩子理解自然规律。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "毛毛虫粉宝",
			
 
				-            "character_name": "粉宝",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "前景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "粉红色条纹身躯，戴深蓝小帽",
			
 
				-                "action_emotion": "蜷缩在土中，略带委屈",
			
 
				-                "color_detail": "粉红条纹带细微渐变",
			
 
				-                "ability_tag": "自我认知"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "生活常识"],
			
 
				-                "object": ["昆虫", "土壤"],
			
 
				-                "emotion": ["委屈", "好奇"]
			
 
				-            },
			
 
				-            "ability_tags": ["自我认知", "自然观察"],
			
 
				-            "description": "戴小帽的粉红毛毛虫蜷缩在土里，正在学习面对失败。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "蜜蜂", 
			
 
				-            "character_name": "",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "中景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "黄黑相间条纹，透明蓝翅",
			
 
				-                "action_emotion": "温柔安抚毛毛虫",
			
 
				-                "color_detail": "黄色带黑色条纹",
			
 
				-                "ability_tag": "社会交往"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "社交"],
			
 
				-                "object": ["昆虫"],
			
 
				-                "emotion": ["友爱", "鼓励"]
			
 
				-            },
			
 
				-            "ability_tags": ["社会交往"],
			
 
				-            "description": "黄黑条纹蜜蜂温柔互动，传递自然友善的温暖氛围。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "点点",
			
 
				-            "character_name": "点点",
			
 
				-            "category": "角色",
			
 
				-            "spatial_layer": "前景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "棕色斗笠，大圆眼镜",
			
 
				-                "action_emotion": "展示种子，自信鼓励",
			
 
				-                "color_detail": "棕斗笠配深棕斑点",
			
 
				-                "ability_tag": "逻辑思维"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然", "科学科普"],
			
 
				-                "object": ["种子", "工具"],
			
 
				-                "emotion": ["乐观", "惊喜"]
			
 
				-            },
			
 
				-            "ability_tags": ["逻辑思维", "自然观察"],
			
 
				-            "description": "戴斗笠戴眼镜的昆虫自信展示种子，引导孩子探索自然规律。"
			
 
				-        },
			
 
				-        {
			
 
				-            "element_name": "环境",
			
 
				-            "character_name": "",
			
 
				-            "category": "场景",
			
 
				-            "spatial_layer": "背景",
			
 
				-            "visual_attributes": {
			
 
				-                "appearance": "绿叶草丛，木质树干",
			
 
				-                "action_emotion": "宁静自然",
			
 
				-                "color_detail": "翠绿色与棕色土壤",
			
 
				-                "ability_tag": "自然观察"
			
 
				-            },
			
 
				-            "content_tags": {
			
 
				-                "theme": ["自然"],
			
 
				-                "object": ["植物", "土壤"],
			
 
				-                "emotion": ["宁静", "好奇"]
			
 
				-            },
			
 
				-            "ability_tags": ["自然观察"],
			
 
				-            "description": "绿叶环绕的宁静泥土环境，细节丰富，激发孩子探索兴趣。"
			
 
				-        }
			
 
				-    ]
			
 
				-}
			
 
				-```
			
 
				-**image_path**: http://192.168.16.134:9000/bookpage/9bf88a20-d970-4831-950c-c17768af8c82.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ck7I8Esssx6rzZrXQ5uP%2F20260109%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20260109T074326Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=5fbb5efa41091c39669a2a801081dbd13735d82c9921d9302f84ee077ce10591
			
 
				-**dataset_id**: a0f1aa03ed2c11f08b8f0242c0a85002
			
 
				-**document_id**: d629ed98ed2e11f09ac30242c0a85002
			
 
				-**SIMILARITY**: 0.6091482043266296
			
 
				-
			
--- a/workflow/image_parsing_workflow.py
+++ b/workflow/image_parsing_workflow.py
@@ -17,10 +17,10 @@ from typing import List, Dict, Any, Annotated
 
				 from pydantic import BaseModel, Field, ConfigDict
			
 
				 from model.qwen_vl import QWenVLParser
			
 
				 from utils.ragflow.ragflow_service import RAGFlowService
			
 
				-from utils.vector_db import VectorDBFactory
			
 
				 from model.multimodal_embedding import Embedding
			
 
				 from utils.minio.image_util import image_util
			
 
				 from conf.config import ModelConfig
			
 
				+from utils.infinity import get_client
			
 
				 
			
 
				 # 定义工作流状态类
			
 
				 class ImageParsingState(BaseModel):
			
@@ -30,7 +30,6 @@ class ImageParsingState(BaseModel):
 
				     book_name: str = Field(..., description="书名")
			
 
				     dataset_id: str = Field(..., description="数据集ID")
			
 
				     ragflow_service: RAGFlowService = Field(default_factory=RAGFlowService, description="RAGFLOW服务")
			
 
				-    vector_db: Any = Field(default_factory=VectorDBFactory.get_vector_db, description="向量数据库实例")
			
 
				     embedding_model: Embedding = Field(default_factory=Embedding, description="多模态嵌入模型实例")
			
 
				     document_id: str = Field(default="", description="文档ID")
			
 
				     split_images: List[Dict[str, Any]] = Field(default_factory=list, description="拆分后的图片列表，包含图片URL和页码信息")
			
@@ -264,7 +263,8 @@ class ImageParsingWorkflow:
 
				         # 批量入库
			
 
				         if documents_to_store:
			
 
				             print(f"开始入库，共 {len(documents_to_store)} 个文档")
			
 
				-            result = state.vector_db.bulk_insert(index_name, documents_to_store)
			
 
				+            infinity_client = get_client()
			
 
				+            result = infinity_client.insert(index_name, documents_to_store)
			
 
				             print(f"入库结果: {result}")
			
 
				         
			
 
				         return {
			
--- a/workflow/workflow.py
+++ b/workflow/workflow.py
@@ -12,10 +12,9 @@ from pydantic import BaseModel, Field, ConfigDict
 
				 from parser.pdf_parser.pdf_splitter import PDFSplitter
			
 
				 from model.qwen_vl import QWenVLParser
			
 
				 from utils.ragflow.ragflow_service import RAGFlowService
			
 
				-from utils.vector_db import VectorDBFactory
			
 
				 from model.multimodal_embedding import Embedding
			
 
				 from conf.config import ModelConfig, VectorDBConfig
			
 
				-from utils.minio.image_util import ImageUtil
			
 
				+from utils.infinity import get_client
			
 
				 
			
 
				 # 定义工作流状态类
			
 
				 class PDFParsingState(BaseModel):
			
@@ -24,7 +23,6 @@ class PDFParsingState(BaseModel):
 
				     pdf_path: str = Field(..., description="PDF文件路径")
			
 
				     dataset_id: str = Field(..., description="数据集ID")
			
 
				     ragflow_service: RAGFlowService = Field(default_factory=RAGFlowService, description="RAGFLOW服务")
			
 
				-    vector_db: Any = Field(default_factory=VectorDBFactory.get_vector_db, description="向量数据库实例")
			
 
				     embedding_model: Embedding = Field(default_factory=Embedding, description="多模态嵌入模型实例")
			
 
				     document_id: str = Field(default="", description="上传后的文档ID")
			
 
				     split_pages: List[Dict[str, Any]] = Field(default_factory=list, description="拆分后的页面列表")
			
@@ -332,7 +330,8 @@ class PDFParsingWorkflow:
 
				         # 批量入库
			
 
				         if documents_to_store:
			
 
				             print(f"开始入库，共 {len(documents_to_store)} 个文档")
			
 
				-            result = state.vector_db.bulk_insert(index_name, documents_to_store)
			
 
				+            infinity_client = get_client()
			
 
				+            result = infinity_client.insert(index_name, documents_to_store)
			
 
				             print(f"入库结果: {result}")
			
 
				         
			
 
				         return {