| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697 |
- import time
- from langchain.chat_models import init_chat_model
- from model.multimodal_embedding import Embedding
- from conf.settings import model_settings, ragflow_settings, tag_search_settings
- from utils.infinity import get_client
- from model.jina_rerank import JinaRerank
- from langchain_core.documents import Document
- from utils.infinity.result_util import convert_to_langchain_docs
- # 初始化多模态嵌入模型
- embedding_model = Embedding(
- model_name=model_settings.embedding_model_name,
- api_key=model_settings.api_key
- )
- llm = init_chat_model(
- model_provider=model_settings.model_provider,
- model=model_settings.model_name,
- base_url=model_settings.base_url,
- api_key=model_settings.api_key,
- temperature=0.3
- )
- compressor = JinaRerank(
- base_url=model_settings.base_url,
- jina_api_key=model_settings.api_key,
- model=model_settings.rank_model_name, # 建议先确认硅基后台此模型 ID 是否正确
- top_n=5
- )
- output_fields = [
- "important_keywords",
- "content",
- "kb_id",
- "doc_id"
- ]
- search_query = {
- "matching_text": """
- 莉莉兰中都有哪些小虫虫
- """,
- "query_vector": [],
- "vector_field": "q_1024_vec",
- "match_field": "content",
- "topn": 5
- }
- def main():
-
-
- infinity_client = get_client(database="ragflow_db")
- # 问题向量化
- # 记录开始时间
- start_time = time.time()
- embedding = embedding_model.get_text_embedding(search_query["matching_text"])
- embedding_time = time.time() - start_time
- print(f"向量化耗时: {embedding_time:.4f} 秒")
- search_query["query_vector"] = embedding
- # TAG_TABLE_NAME="ragflow_92162247e93e11f084830242ac1d0002_52275b36f03611f0a5340242c0a85002"
- TAG_TABLE_NAME="ragflow_92162247e93e11f084830242ac1d0002_6d2e0990f28b11f0b5200242c0a85002"
- results = infinity_client.vector_search(TAG_TABLE_NAME, output_fields, search_query).to_result()
- candidate_docs = convert_to_langchain_docs(results)
- # print(candidate_docs)
-
- # # 4. 直接调用重排序逻辑
- reranked_docs = compressor.compress_documents(
- documents=candidate_docs,
- query=search_query["matching_text"],
- top_n=3
- )
- # # print(reranked_docs)
- # # 5. 查看结果
- for i, doc in enumerate(reranked_docs):
- print(f"排名 {i+1}: 分数 {doc.metadata['relevance_score']:.4f}")
- print(f"内容: {doc.page_content}")
- print(f"标签: {doc.metadata['important_keywords']}\n")
-
- # 记录结束时间并计算执行时间
- end_time = time.time()
- execution_time = end_time - start_time
- print(f"执行时间: {execution_time:.4f} 秒")
- # from utils.asymmetric_encryption import AsymmetricEncryption
- # def main2():
- # # passwd = "zhangqi@lelequ.net"
- # # loaded_public_pem = AsymmetricEncryption.load_key_from_file(r"D:\project\work\graph_rag_server\public_key.pem")
- # loaded_private_pem = AsymmetricEncryption.load_key_from_file(r"D:\project\work\graph_rag_server\private_key.pem")
- # # encrypted = AsymmetricEncryption.encrypt(passwd, loaded_public_pem)
- # # print(encrypted)
- # decrypted = AsymmetricEncryption.decrypt(ragflow_settings.ragflow_user_name, loaded_private_pem)
- # print(decrypted)
- # # assert decrypted2 == test_message, "使用加载的密钥解密失败!"
- # # print("✓ 使用加载的密钥加密解密测试通过!")
- if __name__ == "__main__":
- main()
|