result_util.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. from typing import Dict, Any, List
  2. from langchain_core.documents import Document
  3. def convert_to_basic_types(obj: Any) -> Any:
  4. """
  5. 递归将对象转换为基本类型,以便Pydantic能够序列化
  6. 特殊处理:当字典中的子项包含相同长度的数组时,将其转换为数组对象结构
  7. 例如:{"a": [1,2], "b": [3,4]} -> [{"a":1, "b":3}, {"a":2, "b":4}]
  8. Args:
  9. obj: 要转换的对象
  10. Returns:
  11. 转换后的基本类型对象
  12. """
  13. if obj is None:
  14. return None
  15. elif isinstance(obj, (str, int, float, bool)):
  16. return obj
  17. elif isinstance(obj, dict):
  18. # 先递归转换所有值
  19. converted = {k: convert_to_basic_types(v) for k, v in obj.items()}
  20. # 检查是否需要转换为数组对象结构
  21. # 条件:所有值都是列表,且长度一致,且长度大于0
  22. values = list(converted.values())
  23. if all(isinstance(v, list) for v in values):
  24. lengths = [len(v) for v in values]
  25. if len(set(lengths)) == 1 and lengths[0] > 0:
  26. # 转换为数组对象结构
  27. result = []
  28. keys = list(converted.keys())
  29. for i in range(lengths[0]):
  30. item = {}
  31. for key in keys:
  32. # 处理数组中可能存在的None值
  33. if i < len(converted[key]):
  34. item[key] = converted[key][i]
  35. else:
  36. item[key] = None
  37. result.append(item)
  38. return result
  39. return converted
  40. elif isinstance(obj, (list, tuple)):
  41. return [convert_to_basic_types(item) for item in obj]
  42. else:
  43. # 对于其他类型,尝试将其转换为字符串或字典
  44. try:
  45. return dict(obj)
  46. except:
  47. return str(obj)
  48. def convert_to_langchain_docs(obj: Any) -> List[Document]:
  49. """
  50. 将Infinity搜索结果转换为LangChain的Document格式
  51. Args:
  52. obj: 要转换的对象
  53. Returns:
  54. 转换后的Document列表
  55. """
  56. res = convert_to_basic_types(obj=obj)
  57. # 将数据转换为 LangChain 的 Document 格式
  58. candidate_docs = [
  59. Document(page_content=item["content"],
  60. metadata={k: v for k, v in item.items() if k != "content"})
  61. for item in res[0]
  62. ]
  63. return candidate_docs