from utils.ragflow.ragflow_service import RAGFlowService from utils.infinity import InfinityClient DATASET_CONFIG_DICT = { "chunk_token_num": 256, "delimiter": "\n!?;。;!?", "html4excel": False, "layout_recognize": "Pro/Qwen/Qwen2.5-VL-7B-Instruct@SILICONFLOW", "auto_keywords": 5, "tag_kb_ids": [], "topn_tags": 3, "task_page_size": 4, "raptor": { "max_cluster": 64, "max_token": 256, "prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.", "random_seed": 0, "threshold": 0.1, "use_raptor": True }, "graphrag": { "resolution": True, "use_graphrag": True, "method": "general", "entity_types": [ "event", "Book", "Author", "Illustrator", "Series", "Theme", "Genre", "Character", "Setting", "AgeGroup", "Competency", "ArtStyle", "Award", "Publisher" ] } } ragflow_service = RAGFlowService() dataset_ids = [ "c2be78a4f10711f095230242c0a85002" ] def create_dataset(): dataset = ragflow_service.create_dataset(name="test_http_dataset1", description="测试HTTP数据集1", permission="team", chunk_method="naive", parser_config=DATASET_CONFIG_DICT) print(dataset) def delete_dataset(dataset_id: str): flg = ragflow_service.delete_datasets(dataset_ids=[dataset_id]) print(flg) def delete_datasets(dataset_ids: list[str]): flg = ragflow_service.delete_datasets(dataset_ids=dataset_ids) print(flg) def list_datasets(name: str = None): datasets = ragflow_service.list_datasets(name=name) print(datasets) def get_dataset(name: str = None, dataset_id: str = None): dataset = ragflow_service.get_dataset(name=name, dataset_id=dataset_id) print(dataset) def add_chunk(dataset_id: str, document_id: str, content: str, important_keywords: list[int] = None): chunk = ragflow_service.create_chunk(dataset_id=dataset_id, document_id=document_id, content=content, important_keywords=important_keywords) print(chunk) return chunk import os def test_image(): url = "http://192.168.16.134:9000/bookpage/daa1861c-2096-42c0-b8e3-a163f96f0f66.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ck7I8Esssx6rzZrXQ5uP%2F20260109%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20260109T074307Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=6150ffc414cccbedc255bc0a72d85fd4e693a59b112789af61e8a0e93d00e5dc" # 截取url中的daa1861c-2096-42c0-b8e3-a163f96f0f66.png部分 img_id = os.path.basename(url).split("?")[0] print(img_id) if __name__ == "__main__": test_image() # dataset_id = "18caf531f04d11f095670242c0a85002" # document_id = "3dda0a90f1e211f0a3b80242c0a85002" # tag = "社会L3_人际交往L3_同理心L3" # content="能感知他人情绪,对同伴的困难产生理解并尝试回应" # important_keywords = ["3", "4"] # chunk = add_chunk(dataset_id=dataset_id, document_id=document_id, content=content, important_keywords=important_keywords) # chunk_id = chunk["chunk"]["id"] # infinity_client = InfinityClient() # infinity_client.update(database_name="default_db", table_name="ragflow_92162247e93e11f084830242ac1d0002_18caf531f04d11f095670242c0a85002", cond=f"id = '{chunk_id}'", data={"tag_kwd": tag}) # print(chunk_id)