hybrid_search_mcp.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. #!/usr/bin/env python3
  2. """
  3. 混合检索MCP服务
  4. 使用fastmcp框架实现,提供图片解析后的向量化入库和混合检索功能
  5. """
  6. import sys
  7. import os
  8. import requests
  9. from io import BytesIO
  10. from typing import List, Dict, Any
  11. from fastmcp import FastMCP
  12. # 添加项目根目录到Python路径
  13. sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  14. from PIL import Image
  15. from utils.infinity_util import InfinityVectorDB
  16. from model.multimodal_embedding import Embedding
  17. from conf.settings import model_settings, ragflow_settings, vector_db_settings
  18. # 初始化fastmcp应用
  19. mcp = FastMCP("Multi_Vector_Search")
  20. # 初始化向量数据库
  21. vector_db = InfinityVectorDB()
  22. # 初始化多模态嵌入模型
  23. embedding_model = Embedding(
  24. model_name=model_settings.multimodal_embedding_model_name,
  25. api_key=model_settings.dashscope_api_key
  26. )
  27. @mcp.tool(name="hybrid_search")
  28. def hybrid_search(request: Dict[str, Any]) -> Dict[str, Any]:
  29. """
  30. 混合检索API
  31. 使用文本查询和向量查询进行混合检索
  32. """
  33. try:
  34. # 解析请求参数
  35. text_query = request["text_query"]
  36. image_url = request["image"]
  37. topn = request.get("topn", 2)
  38. print(f"开始混合检索,数据库: {vector_db_settings.infinity_database}, 知识库id: {ragflow_settings.dataset_id}, 文本查询: {text_query}, 返回数量: {topn}")
  39. # 构建索引名称
  40. index_name = f"pdf_documents_{ragflow_settings.dataset_id}"
  41. print(f"开始生成多模态嵌入,文本长度: {len(text_query)}")
  42. # 处理image_url为image: Image.Image
  43. if isinstance(image_url, str):
  44. # 下载图片
  45. response = requests.get(image_url)
  46. response.raise_for_status() # 检查HTTP状态码
  47. # 将响应内容转换为字节流
  48. image_bytes = BytesIO(response.content)
  49. # 创建Image对象
  50. image = Image.open(image_bytes)
  51. # 生成多模态嵌入向量
  52. embedding = embedding_model.get_multimodal_embedding(text_query, image)
  53. print(f"多模态嵌入生成完成,向量长度: {len(embedding)}")
  54. # 执行混合检索
  55. result = vector_db.hybrid_search(
  56. index_name=index_name,
  57. match_method="dense",
  58. vector_field="dense_vector_1024",
  59. query_vector=embedding,
  60. element_type="float",
  61. metric_type="cosine",
  62. topn=topn,
  63. text_query=text_query,
  64. text_field="content"
  65. )
  66. print(f"混合检索完成,总命中数: {result.get('total', 0)}")
  67. # 返回成功响应
  68. return {
  69. "success": True,
  70. "message": "混合检索成功",
  71. "output": result.get("output", []),
  72. "total": result.get("total", topn)
  73. }
  74. except Exception as e:
  75. print(f"混合检索失败: {str(e)}")
  76. return {
  77. "success": False,
  78. "message": str(e)
  79. }
  80. if __name__ == "__main__":
  81. mcp.run(transport="sse", host="0.0.0.0", port=18000)
  82. # 启动HTTP服务器,使用uvicorn运行FastAPI应用
  83. # import uvicorn
  84. # uvicorn.run(mcp.http_app, host="0.0.0.0", port=18000, transport="stdio")