from typing import Dict, Any, List from conf.config import VectorDBConfig from utils.infinity import InfinityClient from utils.file.image_util import image_util from model.multimodal_embedding import get_embedding_model def convert_to_basic_types(obj: Any) -> Any: """ 递归将对象转换为基本类型,以便Pydantic能够序列化 特殊处理:当字典中的子项包含相同长度的数组时,将其转换为数组对象结构 例如:{"a": [1,2], "b": [3,4]} -> [{"a":1, "b":3}, {"a":2, "b":4}] Args: obj: 要转换的对象 Returns: 转换后的基本类型对象 """ if obj is None: return None elif isinstance(obj, (str, int, float, bool)): return obj elif isinstance(obj, dict): # 先递归转换所有值 converted = {k: convert_to_basic_types(v) for k, v in obj.items()} # 检查是否需要转换为数组对象结构 # 条件:所有值都是列表,且长度一致,且长度大于0 values = list(converted.values()) if all(isinstance(v, list) for v in values): lengths = [len(v) for v in values] if len(set(lengths)) == 1 and lengths[0] > 0: # 转换为数组对象结构 result = [] keys = list(converted.keys()) for i in range(lengths[0]): item = {} for key in keys: # 处理数组中可能存在的None值 if i < len(converted[key]): item[key] = converted[key][i] else: item[key] = None result.append(item) return result return converted elif isinstance(obj, (list, tuple)): return [convert_to_basic_types(item) for item in obj] else: # 对于其他类型,尝试将其转换为字符串或字典 try: return dict(obj) except: return str(obj) class InfinitySearchService: def __init__(self, infinity_client: InfinityClient, vector_field: str = None, match_field: str = None, match_type: str = None, table_name: str = None): self.infinity_client = infinity_client # 输出字段 self.output_fields = [ "file_name", "page_number", "content", "image_path", "dataset_id", "document_id" ] self.vector_field = vector_field or "dense_vector_1024" self.match_field = match_field or "content" self.match_type = match_type or "cosine" self.table_name = table_name or VectorDBConfig.get_infinity_table_name() def search(self, search_query: Dict[str, Any]) -> Dict[str, Any]: """ 执行Infinity数据库搜索 Args: search_query: 搜索查询参数 Returns: 搜索结果,转换为基本类型以便序列化 """ try: # 执行搜索 result = self.infinity_client.search(self.table_name, self.output_fields, search_query) # 将结果转换为基本类型,处理可能的复杂类型 result_dict = result.to_result() # 递归转换所有复杂类型为基本类型 return convert_to_basic_types(result_dict) except Exception as e: raise Exception(f"搜索失败: {str(e)}") def vector_search(self, search_query: Dict[str, Any]): """ 执行Infinity数据库向量检索 Args: search_query: 向量检索查询参数 Returns: 向量检索结果,转换为基本类型以便序列化 """ try: # 1.处理image_url为image: Image.Image image = image_util._url_to_image(search_query["image_url"]) # 2.将图片进行向量化 query_vector = get_embedding_model().get_multimodal_embedding(search_query["matching_text"], image) search_query["vector_field"] = self.vector_field search_query["query_vector"] = query_vector # 执行向量检索 result = self.infinity_client.vector_search(self.table_name, self.output_fields, search_query) # 将结果转换为基本类型,处理可能的复杂类型 result_dict = result.to_result() # 递归转换所有复杂类型为基本类型 return convert_to_basic_types(result_dict) except Exception as e: raise Exception(f"向量检索失败: {str(e)}") def hybrid_search(self, search_query: Dict[str, Any]): """ 执行Infinity数据库混合检索 Args: search_query: 混合检索查询参数 Returns: 混合检索结果,转换为基本类型以便序列化 """ try: # 1.处理image_url为image: Image.Image image = image_util._url_to_image(search_query["image_url"]) # 2.将图片进行向量化 query_vector = get_embedding_model().get_multimodal_embedding(search_query["matching_text"], image) search_query["vector_field"] = self.vector_field search_query["query_vector"] = query_vector search_query["match_field"] = self.match_field # 执行混合检索 result = self.infinity_client.hybrid_search(self.table_name, self.output_fields, search_query) # 将结果转换为基本类型,处理可能的复杂类型 result_dict = result.to_result() # 递归转换所有复杂类型为基本类型 return convert_to_basic_types(result_dict) except Exception as e: raise Exception(f"混合检索失败: {str(e)}")