hybrid_search_http.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. #!/usr/bin/env python3
  2. """
  3. 混合检索HTTP服务
  4. 使用FastAPI框架实现,提供混合检索的HTTP POST接口
  5. """
  6. import sys
  7. import os
  8. import requests
  9. from io import BytesIO
  10. from typing import List, Dict, Any
  11. from fastapi import FastAPI, HTTPException, Body
  12. from pydantic import BaseModel
  13. from PIL import Image
  14. # 添加项目根目录到Python路径
  15. sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  16. from utils.infinity_util import InfinityVectorDB
  17. from model.multimodal_embedding import Embedding
  18. from conf.config import ModelConfig, VectorDBConfig
  19. # 初始化FastAPI应用
  20. app = FastAPI(
  21. title="混合检索HTTP服务",
  22. description="提供混合检索的HTTP POST接口",
  23. version="1.0.0"
  24. )
  25. # 初始化向量数据库
  26. vector_db = InfinityVectorDB()
  27. # 初始化多模态嵌入模型
  28. embedding_model = Embedding(
  29. model_name=ModelConfig.get_multimodal_embedding_model_name(),
  30. api_key=ModelConfig.get_dashscope_api_key()
  31. )
  32. # 定义请求模型
  33. class HybridSearchRequest(BaseModel):
  34. """混合检索请求模型"""
  35. text_query: str
  36. image: str
  37. topn: int = 2
  38. # 定义响应模型
  39. class HybridSearchResponse(BaseModel):
  40. """混合检索响应模型"""
  41. success: bool
  42. message: str
  43. output: List[Dict[str, Any]] = []
  44. total: int = 0
  45. @app.post("/hybrid_search", response_model=HybridSearchResponse)
  46. def hybrid_search(request: HybridSearchRequest = Body(...)):
  47. """
  48. 混合检索API
  49. 使用文本查询和向量查询进行混合检索
  50. 请求参数:
  51. - text_query: 文本查询
  52. - image: 图片URL
  53. - topn: 返回结果数量,默认2
  54. 返回结果:
  55. - success: 是否成功
  56. - message: 结果消息
  57. - output: 检索结果列表
  58. - total: 总命中数
  59. """
  60. try:
  61. # 解析请求参数
  62. text_query = request.text_query
  63. image_url = request.image
  64. topn = request.topn
  65. print(f"开始混合检索,数据库: {VectorDBConfig.get_infinity_database}, 知识库id: {ModelConfig.get_dataset_id()}, 文本查询: {text_query}, 返回数量: {topn}")
  66. # 构建索引名称
  67. index_name = f"{VectorDBConfig.get_infinity_table_name()}"
  68. print(f"开始生成多模态嵌入,文本长度: {len(text_query)}")
  69. # 处理image_url为image: Image.Image
  70. if isinstance(image_url, str):
  71. # 下载图片
  72. response = requests.get(image_url)
  73. response.raise_for_status() # 检查HTTP状态码
  74. # 将响应内容转换为字节流
  75. image_bytes = BytesIO(response.content)
  76. # 创建Image对象
  77. image = Image.open(image_bytes)
  78. # 生成多模态嵌入向量
  79. embedding = embedding_model.get_multimodal_embedding(text_query, image)
  80. print(f"多模态嵌入生成完成,向量长度: {len(embedding)}")
  81. # 执行混合检索
  82. result = vector_db.hybrid_search(
  83. index_name=index_name,
  84. match_method="dense",
  85. vector_field="dense_vector_1024",
  86. query_vector=embedding,
  87. element_type="float",
  88. metric_type="cosine",
  89. topn=topn,
  90. text_query=text_query,
  91. text_field="content"
  92. )
  93. print(f"混合检索完成,总命中数: {result.get('total', 0)}")
  94. # 返回成功响应
  95. return HybridSearchResponse(
  96. success=True,
  97. message="混合检索成功",
  98. output=result.get("output", []),
  99. total=result.get("total", topn)
  100. )
  101. except Exception as e:
  102. print(f"混合检索失败: {str(e)}")
  103. raise HTTPException(status_code=500, detail=str(e))
  104. @app.get("/health")
  105. def health_check():
  106. """健康检查接口"""
  107. return {
  108. "status": "ok",
  109. "message": "混合检索HTTP服务正常运行"
  110. }
  111. if __name__ == "__main__":
  112. import uvicorn
  113. uvicorn.run(app, host="0.0.0.0", port=18001)