from typing import Dict, Any, List from conf.settings import vector_db_settings from utils.infinity import InfinityClient from utils.file.image_util import image_util from model.multimodal_embedding import get_embedding_model from utils.infinity.result_util import convert_to_basic_types class InfinitySearchService: def __init__(self, infinity_client: InfinityClient, vector_field: str = None, match_field: str = None, match_type: str = None, table_name: str = None): self.infinity_client = infinity_client # 输出字段 self.output_fields = [ "file_name", "page_number", "content", "image_path", "dataset_id", "document_id" ] self.vector_field = vector_field or "dense_vector_1024" self.match_field = match_field or "content" self.match_type = match_type or "cosine" self.table_name = table_name or vector_db_settings.infinity_table_name def search(self, search_query: Dict[str, Any]) -> Dict[str, Any]: """ 执行Infinity数据库搜索 Args: search_query: 搜索查询参数 Returns: 搜索结果,转换为基本类型以便序列化 """ try: # 执行搜索 result = self.infinity_client.search(self.table_name, self.output_fields, search_query) # 将结果转换为基本类型,处理可能的复杂类型 result_dict = result.to_result() # 递归转换所有复杂类型为基本类型 return convert_to_basic_types(result_dict) except Exception as e: raise Exception(f"搜索失败: {str(e)}") def vector_search(self, search_query: Dict[str, Any]): """ 执行Infinity数据库向量检索 Args: search_query: 向量检索查询参数 Returns: 向量检索结果,转换为基本类型以便序列化 """ try: # 1.处理image_url为image: Image.Image image = image_util._url_to_image(search_query["image_url"]) # 2.将图片进行向量化 query_vector = get_embedding_model().get_multimodal_embedding(search_query["matching_text"], image) search_query["vector_field"] = self.vector_field search_query["query_vector"] = query_vector # 执行向量检索 result = self.infinity_client.vector_search(self.table_name, self.output_fields, search_query) # 将结果转换为基本类型,处理可能的复杂类型 result_dict = result.to_result() # 递归转换所有复杂类型为基本类型 return convert_to_basic_types(result_dict) except Exception as e: raise Exception(f"向量检索失败: {str(e)}") def hybrid_search(self, search_query: Dict[str, Any]): """ 执行Infinity数据库混合检索 Args: search_query: 混合检索查询参数 Returns: 混合检索结果,转换为基本类型以便序列化 """ try: # 1.处理image_url为image: Image.Image image = image_util._url_to_image(search_query["image_url"]) # 2.将图片进行向量化 query_vector = get_embedding_model().get_multimodal_embedding(search_query["matching_text"], image) search_query["vector_field"] = self.vector_field search_query["query_vector"] = query_vector search_query["match_field"] = self.match_field # 执行混合检索 result = self.infinity_client.hybrid_search(self.table_name, self.output_fields, search_query) # 将结果转换为基本类型,处理可能的复杂类型 result_dict = result.to_result() # 递归转换所有复杂类型为基本类型 return convert_to_basic_types(result_dict) except Exception as e: raise Exception(f"混合检索失败: {str(e)}")