infinity_search_service.py 4.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. from typing import Dict, Any, List
  2. from src.conf.settings import vector_db_settings
  3. from src.utils.infinity import InfinityClient
  4. from src.utils.file.image_util import image_util
  5. from src.model.multimodal_embedding import get_embedding_model
  6. from src.utils.infinity.result_util import convert_to_basic_types
  7. class InfinitySearchService:
  8. def __init__(self, infinity_client: InfinityClient, vector_field: str = None, match_field: str = None, match_type: str = None, table_name: str = None):
  9. self.infinity_client = infinity_client
  10. # 输出字段
  11. self.output_fields = [
  12. "file_name",
  13. "page_number",
  14. "content",
  15. "image_path",
  16. "dataset_id",
  17. "document_id"
  18. ]
  19. self.vector_field = vector_field or "dense_vector_1024"
  20. self.match_field = match_field or "content"
  21. self.match_type = match_type or "cosine"
  22. self.table_name = table_name or vector_db_settings.infinity_table_name
  23. def search(self, search_query: Dict[str, Any]) -> Dict[str, Any]:
  24. """
  25. 执行Infinity数据库搜索
  26. Args:
  27. search_query: 搜索查询参数
  28. Returns:
  29. 搜索结果,转换为基本类型以便序列化
  30. """
  31. try:
  32. # 执行搜索
  33. result = self.infinity_client.search(self.table_name, self.output_fields, search_query)
  34. # 将结果转换为基本类型,处理可能的复杂类型
  35. result_dict = result.to_result()
  36. # 递归转换所有复杂类型为基本类型
  37. return convert_to_basic_types(result_dict)
  38. except Exception as e:
  39. raise Exception(f"搜索失败: {str(e)}")
  40. def vector_search(self, search_query: Dict[str, Any]):
  41. """
  42. 执行Infinity数据库向量检索
  43. Args:
  44. search_query: 向量检索查询参数
  45. Returns:
  46. 向量检索结果,转换为基本类型以便序列化
  47. """
  48. try:
  49. # 1.处理image_url为image: Image.Image
  50. image = image_util._url_to_image(search_query["image_url"])
  51. # 2.将图片进行向量化
  52. query_vector = get_embedding_model().get_multimodal_embedding(search_query["matching_text"], image)
  53. search_query["vector_field"] = self.vector_field
  54. search_query["query_vector"] = query_vector
  55. # 执行向量检索
  56. result = self.infinity_client.vector_search(self.table_name, self.output_fields, search_query)
  57. # 将结果转换为基本类型,处理可能的复杂类型
  58. result_dict = result.to_result()
  59. # 递归转换所有复杂类型为基本类型
  60. return convert_to_basic_types(result_dict)
  61. except Exception as e:
  62. raise Exception(f"向量检索失败: {str(e)}")
  63. def hybrid_search(self, search_query: Dict[str, Any]):
  64. """
  65. 执行Infinity数据库混合检索
  66. Args:
  67. search_query: 混合检索查询参数
  68. Returns:
  69. 混合检索结果,转换为基本类型以便序列化
  70. """
  71. try:
  72. # 1.处理image_url为image: Image.Image
  73. image = image_util._url_to_image(search_query["image_url"])
  74. # 2.将图片进行向量化
  75. query_vector = get_embedding_model().get_multimodal_embedding(search_query["matching_text"], image)
  76. search_query["vector_field"] = self.vector_field
  77. search_query["query_vector"] = query_vector
  78. search_query["match_field"] = self.match_field
  79. # 执行混合检索
  80. result = self.infinity_client.hybrid_search(self.table_name, self.output_fields, search_query)
  81. # 将结果转换为基本类型,处理可能的复杂类型
  82. result_dict = result.to_result()
  83. # 递归转换所有复杂类型为基本类型
  84. return convert_to_basic_types(result_dict)
  85. except Exception as e:
  86. raise Exception(f"混合检索失败: {str(e)}")