| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- from typing import Dict, Any, List
- from langchain_core.documents import Document
- def convert_to_basic_types(obj: Any) -> Any:
- """
- 递归将对象转换为基本类型,以便Pydantic能够序列化
- 特殊处理:当字典中的子项包含相同长度的数组时,将其转换为数组对象结构
- 例如:{"a": [1,2], "b": [3,4]} -> [{"a":1, "b":3}, {"a":2, "b":4}]
- Args:
- obj: 要转换的对象
- Returns:
- 转换后的基本类型对象
- """
- if obj is None:
- return None
- elif isinstance(obj, (str, int, float, bool)):
- return obj
- elif isinstance(obj, dict):
- # 先递归转换所有值
- converted = {k: convert_to_basic_types(v) for k, v in obj.items()}
- # 检查是否需要转换为数组对象结构
- # 条件:所有值都是列表,且长度一致,且长度大于0
- values = list(converted.values())
- if all(isinstance(v, list) for v in values):
- lengths = [len(v) for v in values]
- if len(set(lengths)) == 1 and lengths[0] > 0:
- # 转换为数组对象结构
- result = []
- keys = list(converted.keys())
- for i in range(lengths[0]):
- item = {}
- for key in keys:
- # 处理数组中可能存在的None值
- if i < len(converted[key]):
- item[key] = converted[key][i]
- else:
- item[key] = None
- result.append(item)
- return result
- return converted
- elif isinstance(obj, (list, tuple)):
- return [convert_to_basic_types(item) for item in obj]
- else:
- # 对于其他类型,尝试将其转换为字符串或字典
- try:
- return dict(obj)
- except:
- return str(obj)
- def convert_to_langchain_docs(obj: Any) -> List[Document]:
- """
- 将Infinity搜索结果转换为LangChain的Document格式
- Args:
- obj: 要转换的对象
- Returns:
- 转换后的Document列表
- """
- res = convert_to_basic_types(obj=obj)
- # 将数据转换为 LangChain 的 Document 格式
- candidate_docs = [
- Document(page_content=item["content"],
- metadata={k: v for k, v in item.items() if k != "content"})
- for item in res[0]
- ]
- return candidate_docs
- def convert_to_json(obj: Any, content_field: str = "content") -> List[Dict[str, Any]]:
- """
- 将Infinity搜索结果转换为JSON可序列化的列表格式
-
- Args:
- obj: 要转换的对象(Infinity搜索结果)
- content_field: 内容字段名称,默认为"content"
-
- Returns:
- 转换后的JSON可序列化列表,每个元素包含:
- - content: 内容字段
- - metadata: 其他字段组成的元数据字典
-
- Example:
- >>> result = infinity_client.vector_search(...)
- >>> json_data = convert_to_json(result.to_result())
- >>> # [{"content": "...", "metadata": {"field1": "...", ...}}, ...]
- """
- res = convert_to_basic_types(obj=obj)
-
- # 处理结果为空的情况
- if not res or len(res) == 0 or len(res[0]) == 0:
- return []
-
- # 转换为JSON格式
- json_list = []
- for item in res[0]:
- # 处理 item 可能是字符串或字典的情况
- if isinstance(item, dict):
- json_item = {
- "content": item.get(content_field, ""),
- "metadata": {k: v for k, v in item.items() if k != content_field}
- }
- elif isinstance(item, str):
- # 如果 item 是字符串,直接作为 content
- json_item = {
- "content": item,
- "metadata": {}
- }
- else:
- # 其他类型转为字符串
- json_item = {
- "content": str(item),
- "metadata": {}
- }
- json_list.append(json_item)
-
- return json_list
|