| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187 |
- """
- Infinity向量数据库搜索管理器
- """
- from typing import List, Dict, Any, Optional
- from services.utils.infinity.base import InfinityConnection
- class InfinitySearchManager:
- """
- Infinity向量数据库搜索管理器
- 负责处理各种搜索操作
- """
-
- def __init__(self, infinity_connection: Optional[InfinityConnection] = None):
- """
- 初始化搜索管理器
-
- Args:
- infinity_connection: Infinity连接实例,可选
- """
- self.infinity_conn = infinity_connection or InfinityConnection()
-
- def search(self, index_name: str, query: Dict[str, Any], size: int = 10) -> Dict[str, Any]:
- """
- 全文检索
-
- Args:
- index_name: 索引名称
- query: 查询条件
- size: 返回结果数量
-
- Returns:
- Dict: 搜索结果
- """
- try:
- path = f"/api/collections/{index_name}/search"
- response = self.infinity_conn._make_request("POST", path, {
- "query": query,
- "limit": size
- })
-
- if "error" not in response:
- return {
- "hits": {
- "total": response.get("total", 0),
- "hits": [{
- "_source": doc
- } for doc in response.get("documents", [])]
- }
- }
- return {"hits": {"total": 0, "hits": []}}
- except Exception as e:
- print(f"Infinity搜索失败: {e}")
- return {"hits": {"total": 0, "hits": []}}
-
- def vector_search(self, index_name: str, vector_field: str, vector: List[float],
- size: int = 10, filter: Dict[str, Any] = None) -> Dict[str, Any]:
- """
- 向量检索
-
- Args:
- index_name: 索引名称
- vector_field: 向量字段名
- vector: 检索向量
- size: 返回结果数量
- filter: 过滤条件,可选
-
- Returns:
- Dict: 搜索结果
- """
- try:
- path = f"/api/collections/{index_name}/search"
-
- search_query = {
- "vector": {
- "field": vector_field,
- "query": vector,
- "limit": size
- }
- }
-
- if filter:
- search_query["filter"] = filter
-
- response = self.infinity_conn._make_request("POST", path, search_query)
-
- if "error" not in response:
- return {
- "hits": {
- "total": response.get("total", 0),
- "hits": [{
- "_source": doc
- } for doc in response.get("documents", [])]
- }
- }
- return {"hits": {"total": 0, "hits": []}}
- except Exception as e:
- print(f"Infinity向量检索失败: {e}")
- return {"hits": {"total": 0, "hits": []}}
-
- def hybrid_search(self, index_name: str, text_query: str, vector_field: str, vector: List[float],
- size: int = 10, text_weight: float = 0.5, vector_weight: float = 0.5) -> Dict[str, Any]:
- """
- 混合检索:文本检索 + 向量检索
-
- Args:
- index_name: 索引名称
- text_query: 文本查询
- vector_field: 向量字段名
- vector: 检索向量
- size: 返回结果数量
- text_weight: 文本检索权重
- vector_weight: 向量检索权重
-
- Returns:
- Dict: 搜索结果
- """
- try:
- path = f"/api/collections/{index_name}/search"
-
- search_query = {
- "hybrid": {
- "text": {
- "query": text_query,
- "fields": ["text"],
- "weight": text_weight
- },
- "vector": {
- "field": vector_field,
- "query": vector,
- "weight": vector_weight
- },
- "limit": size
- }
- }
-
- response = self.infinity_conn._make_request("POST", path, search_query)
-
- if "error" not in response:
- return {
- "hits": {
- "total": response.get("total", 0),
- "hits": [{
- "_source": doc
- } for doc in response.get("documents", [])]
- }
- }
- return {"hits": {"total": 0, "hits": []}}
- except Exception as e:
- print(f"Infinity混合检索失败: {e}")
- return {"hits": {"total": 0, "hits": []}}
-
- def match_search(self, index_name: str, field: str, value: str, size: int = 10) -> Dict[str, Any]:
- """
- 简单匹配搜索
-
- Args:
- index_name: 索引名称
- field: 字段名
- value: 匹配值
- size: 返回结果数量
-
- Returns:
- Dict: 搜索结果
- """
- query = {
- "match": {
- field: value
- }
- }
- return self.search(index_name, query, size=size)
-
- def match_all(self, index_name: str, size: int = 10) -> Dict[str, Any]:
- """
- 匹配所有文档
-
- Args:
- index_name: 索引名称
- size: 返回结果数量
-
- Returns:
- Dict: 搜索结果
- """
- query = {
- "match_all": {}
- }
- return self.search(index_name, query, size=size)
|