from typing import Dict, Any, List from langchain_core.documents import Document def convert_to_basic_types(obj: Any) -> Any: """ 递归将对象转换为基本类型,以便Pydantic能够序列化 特殊处理:当字典中的子项包含相同长度的数组时,将其转换为数组对象结构 例如:{"a": [1,2], "b": [3,4]} -> [{"a":1, "b":3}, {"a":2, "b":4}] Args: obj: 要转换的对象 Returns: 转换后的基本类型对象 """ if obj is None: return None elif isinstance(obj, (str, int, float, bool)): return obj elif isinstance(obj, dict): # 先递归转换所有值 converted = {k: convert_to_basic_types(v) for k, v in obj.items()} # 检查是否需要转换为数组对象结构 # 条件:所有值都是列表,且长度一致,且长度大于0 values = list(converted.values()) if all(isinstance(v, list) for v in values): lengths = [len(v) for v in values] if len(set(lengths)) == 1 and lengths[0] > 0: # 转换为数组对象结构 result = [] keys = list(converted.keys()) for i in range(lengths[0]): item = {} for key in keys: # 处理数组中可能存在的None值 if i < len(converted[key]): item[key] = converted[key][i] else: item[key] = None result.append(item) return result return converted elif isinstance(obj, (list, tuple)): return [convert_to_basic_types(item) for item in obj] else: # 对于其他类型,尝试将其转换为字符串或字典 try: return dict(obj) except: return str(obj) def convert_to_langchain_docs(obj: Any) -> List[Document]: """ 将Infinity搜索结果转换为LangChain的Document格式 Args: obj: 要转换的对象 Returns: 转换后的Document列表 """ res = convert_to_basic_types(obj=obj) # 将数据转换为 LangChain 的 Document 格式 candidate_docs = [ Document(page_content=item["content"], metadata={ "docnm": item["docnm"], "tag_kwd": item["tag_kwd"], "kb_id": item["kb_id"], "doc_id": item["doc_id"] }) for item in res[0] ] return candidate_docs