|
@@ -1,4 +1,4 @@
|
|
|
-import json
|
|
|
|
|
|
|
+import re
|
|
|
from src.conf.settings import vector_db_settings
|
|
from src.conf.settings import vector_db_settings
|
|
|
from src.utils.infinity import InfinityClient
|
|
from src.utils.infinity import InfinityClient
|
|
|
from src.utils.file.image_util import image_util
|
|
from src.utils.file.image_util import image_util
|
|
@@ -8,18 +8,19 @@ from src.api.dataset.models.dify_models import RetrievalRequest
|
|
|
from src.conf.settings import vector_db_settings
|
|
from src.conf.settings import vector_db_settings
|
|
|
|
|
|
|
|
class DifyKnowledgeService:
|
|
class DifyKnowledgeService:
|
|
|
- def __init__(self, infinity_client: InfinityClient, vector_field: str = None, match_field: str = None, match_type: str = None, table_name: str = None):
|
|
|
|
|
|
|
+ def __init__(self, infinity_client: InfinityClient, vector_field: str = None, match_field: str = None,
|
|
|
|
|
+ match_type: str = None, table_name: str = None):
|
|
|
self.infinity_client = infinity_client
|
|
self.infinity_client = infinity_client
|
|
|
# 输出字段
|
|
# 输出字段
|
|
|
self.output_fields = [
|
|
self.output_fields = [
|
|
|
- "file_name",
|
|
|
|
|
- "page_number",
|
|
|
|
|
- "content",
|
|
|
|
|
- "image_path",
|
|
|
|
|
- "dataset_id",
|
|
|
|
|
- "document_id",
|
|
|
|
|
- "_similarity"
|
|
|
|
|
- ]
|
|
|
|
|
|
|
+ "file_name",
|
|
|
|
|
+ "page_number",
|
|
|
|
|
+ "content",
|
|
|
|
|
+ "image_path",
|
|
|
|
|
+ "dataset_id",
|
|
|
|
|
+ "document_id",
|
|
|
|
|
+ "_similarity"
|
|
|
|
|
+ ]
|
|
|
self.vector_field = vector_field or "dense_vector_1024"
|
|
self.vector_field = vector_field or "dense_vector_1024"
|
|
|
self.match_field = match_field or "content"
|
|
self.match_field = match_field or "content"
|
|
|
self.match_type = match_type or "cosine"
|
|
self.match_type = match_type or "cosine"
|
|
@@ -28,10 +29,10 @@ class DifyKnowledgeService:
|
|
|
def dify_database_search(self, request: RetrievalRequest):
|
|
def dify_database_search(self, request: RetrievalRequest):
|
|
|
"""
|
|
"""
|
|
|
执行Dify数据库搜索
|
|
执行Dify数据库搜索
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
Args:
|
|
Args:
|
|
|
retrievalRequest: 搜索查询参数
|
|
retrievalRequest: 搜索查询参数
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
Returns:
|
|
Returns:
|
|
|
搜索结果,转换为基本类型以便序列化
|
|
搜索结果,转换为基本类型以便序列化
|
|
|
"""
|
|
"""
|
|
@@ -42,14 +43,29 @@ class DifyKnowledgeService:
|
|
|
else:
|
|
else:
|
|
|
# 抛出异常
|
|
# 抛出异常
|
|
|
raise Exception("knowledge_id不能为空")
|
|
raise Exception("knowledge_id不能为空")
|
|
|
- # 获取检索参数,并解析为json
|
|
|
|
|
- try:
|
|
|
|
|
- query = json.loads(request.query)
|
|
|
|
|
- # 检查query是否包含match_image或match_text
|
|
|
|
|
- if "match_image" in query or "matching_text" in query:
|
|
|
|
|
- input_image = query.get("match_image")
|
|
|
|
|
- input_text = query.get("matching_text")
|
|
|
|
|
- except json.JSONDecodeError:
|
|
|
|
|
|
|
+ # 解析格式如: matching_text:点点,match_image:http://xxx 或 matching_text:点点,match_image:http://xxx
|
|
|
|
|
+ # 支持中文和英文的逗号、冒号
|
|
|
|
|
+ input_image = None
|
|
|
|
|
+ input_text = None
|
|
|
|
|
+ query_str = request.query
|
|
|
|
|
+ # 将中文逗号替换为英文逗号,用于分割
|
|
|
|
|
+ query_str_normalized = re.sub(r'[,]', ',', query_str)
|
|
|
|
|
+ # 按逗号分割为多个键值对
|
|
|
|
|
+ pairs = query_str_normalized.split(',')
|
|
|
|
|
+ for pair in pairs:
|
|
|
|
|
+ # 将中文冒号替换为英文冒号,用于分割键值
|
|
|
|
|
+ pair_normalized = re.sub(r'[:]', ':', pair, count=1)
|
|
|
|
|
+ if ':' in pair_normalized:
|
|
|
|
|
+ # 只分割第一个冒号,避免URL中的冒号被分割
|
|
|
|
|
+ key, value = pair_normalized.split(':', 1)
|
|
|
|
|
+ key = key.strip()
|
|
|
|
|
+ value = value.strip()
|
|
|
|
|
+ if key == 'match_image':
|
|
|
|
|
+ input_image = value
|
|
|
|
|
+ elif key == 'matching_text':
|
|
|
|
|
+ input_text = value
|
|
|
|
|
+ # 如果没有解析出任何参数,将整个query作为input_text
|
|
|
|
|
+ if input_image is None and input_text is None:
|
|
|
input_text = request.query
|
|
input_text = request.query
|
|
|
|
|
|
|
|
retrieval_setting = request.retrieval_setting
|
|
retrieval_setting = request.retrieval_setting
|
|
@@ -69,7 +85,7 @@ class DifyKnowledgeService:
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
# 执行搜索
|
|
# 执行搜索
|
|
|
- result = self.infinity_client.vector_search(table_name, self.output_fields, search_query)
|
|
|
|
|
|
|
+ result = self.infinity_client.vector_search(table_name, self.output_fields, search_query)
|
|
|
# 将结果转换为基本类型,处理可能的复杂类型
|
|
# 将结果转换为基本类型,处理可能的复杂类型
|
|
|
result_dict = result.to_result()
|
|
result_dict = result.to_result()
|
|
|
# 递归转换所有复杂类型为基本类型
|
|
# 递归转换所有复杂类型为基本类型
|