result_util.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. from typing import Dict, Any, List
  2. from langchain_core.documents import Document
  3. def convert_to_basic_types(obj: Any) -> Any:
  4. """
  5. 递归将对象转换为基本类型,以便Pydantic能够序列化
  6. 特殊处理:当字典中的子项包含相同长度的数组时,将其转换为数组对象结构
  7. 例如:{"a": [1,2], "b": [3,4]} -> [{"a":1, "b":3}, {"a":2, "b":4}]
  8. Args:
  9. obj: 要转换的对象
  10. Returns:
  11. 转换后的基本类型对象
  12. """
  13. if obj is None:
  14. return None
  15. elif isinstance(obj, (str, int, float, bool)):
  16. return obj
  17. elif isinstance(obj, dict):
  18. # 先递归转换所有值
  19. converted = {k: convert_to_basic_types(v) for k, v in obj.items()}
  20. # 检查是否需要转换为数组对象结构
  21. # 条件:所有值都是列表,且长度一致,且长度大于0
  22. values = list(converted.values())
  23. if all(isinstance(v, list) for v in values):
  24. lengths = [len(v) for v in values]
  25. if len(set(lengths)) == 1 and lengths[0] > 0:
  26. # 转换为数组对象结构
  27. result = []
  28. keys = list(converted.keys())
  29. for i in range(lengths[0]):
  30. item = {}
  31. for key in keys:
  32. # 处理数组中可能存在的None值
  33. if i < len(converted[key]):
  34. item[key] = converted[key][i]
  35. else:
  36. item[key] = None
  37. result.append(item)
  38. return result
  39. return converted
  40. elif isinstance(obj, (list, tuple)):
  41. return [convert_to_basic_types(item) for item in obj]
  42. else:
  43. # 对于其他类型,尝试将其转换为字符串或字典
  44. try:
  45. return dict(obj)
  46. except:
  47. return str(obj)
  48. def convert_to_langchain_docs(obj: Any) -> List[Document]:
  49. """
  50. 将Infinity搜索结果转换为LangChain的Document格式
  51. Args:
  52. obj: 要转换的对象
  53. Returns:
  54. 转换后的Document列表
  55. """
  56. res = convert_to_basic_types(obj=obj)
  57. # 将数据转换为 LangChain 的 Document 格式
  58. candidate_docs = [
  59. Document(page_content=item["content"],
  60. metadata={k: v for k, v in item.items() if k != "content"})
  61. for item in res[0]
  62. ]
  63. return candidate_docs
  64. def convert_to_json(obj: Any, content_field: str = "content") -> List[Dict[str, Any]]:
  65. """
  66. 将Infinity搜索结果转换为JSON可序列化的列表格式
  67. Args:
  68. obj: 要转换的对象(Infinity搜索结果)
  69. content_field: 内容字段名称,默认为"content"
  70. Returns:
  71. 转换后的JSON可序列化列表,每个元素包含:
  72. - content: 内容字段
  73. - metadata: 其他字段组成的元数据字典
  74. Example:
  75. >>> result = infinity_client.vector_search(...)
  76. >>> json_data = convert_to_json(result.to_result())
  77. >>> # [{"content": "...", "metadata": {"field1": "...", ...}}, ...]
  78. """
  79. res = convert_to_basic_types(obj=obj)
  80. # 处理结果为空的情况
  81. if not res or len(res) == 0 or len(res[0]) == 0:
  82. return []
  83. # 转换为JSON格式
  84. json_list = []
  85. for item in res[0]:
  86. # 处理 item 可能是字符串或字典的情况
  87. if isinstance(item, dict):
  88. json_item = {
  89. "content": item.get(content_field, ""),
  90. "metadata": {k: v for k, v in item.items() if k != content_field}
  91. }
  92. elif isinstance(item, str):
  93. # 如果 item 是字符串,直接作为 content
  94. json_item = {
  95. "content": item,
  96. "metadata": {}
  97. }
  98. else:
  99. # 其他类型转为字符串
  100. json_item = {
  101. "content": str(item),
  102. "metadata": {}
  103. }
  104. json_list.append(json_item)
  105. return json_list