alair
/
graph_rag_server


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
							from utils.ragflow.ragflow_service import RAGFlowService
from utils.infinity import InfinityClient

DATASET_CONFIG_DICT = {
            "chunk_token_num": 256,
            "delimiter": "\n!?;。；！？",
            "html4excel": False,
            "layout_recognize": "Pro/Qwen/Qwen2.5-VL-7B-Instruct@SILICONFLOW",
            "auto_keywords": 5,
            "tag_kb_ids": [],
            "topn_tags": 3,
            "task_page_size": 4,
            "raptor": {
                "max_cluster": 64,
                "max_token": 256,
                "prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n      {cluster_content}\nThe above is the content you need to summarize.",
                "random_seed": 0,
                "threshold": 0.1,
                "use_raptor": True
            },
            "graphrag": {
                "resolution": True,
                "use_graphrag": True,
                "method": "general",
                "entity_types": [
                    "event",
                    "Book",
                    "Author",
                    "Illustrator",
                    "Series",
                    "Theme",
                    "Genre",
                    "Character",
                    "Setting",
                    "AgeGroup",
                    "Competency",
                    "ArtStyle",
                    "Award",
                    "Publisher"
                ]
            }
        }
ragflow_service = RAGFlowService()

dataset_ids = [
    "c2be78a4f10711f095230242c0a85002"
]

def create_dataset():
    dataset = ragflow_service.create_dataset(name="test_http_dataset1", description="测试HTTP数据集1",
                                             permission="team",
                                             chunk_method="naive",
                                             parser_config=DATASET_CONFIG_DICT)
    print(dataset)

def delete_dataset(dataset_id: str):
    flg = ragflow_service.delete_datasets(dataset_ids=[dataset_id])
    print(flg)

def delete_datasets(dataset_ids: list[str]):
    flg = ragflow_service.delete_datasets(dataset_ids=dataset_ids)
    print(flg)

def list_datasets(name: str = None):
    datasets = ragflow_service.list_datasets(name=name)
    print(datasets)

def get_dataset(name: str = None, dataset_id: str = None):
    dataset = ragflow_service.get_dataset(name=name, dataset_id=dataset_id)
    print(dataset)

def add_chunk(dataset_id: str, document_id: str, content: str, important_keywords: list[int] = None):
    chunk = ragflow_service.create_chunk(dataset_id=dataset_id, document_id=document_id, content=content, important_keywords=important_keywords)
    print(chunk)
    return chunk
import os
def test_image():
    url = "http://192.168.16.134:9000/bookpage/daa1861c-2096-42c0-b8e3-a163f96f0f66.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ck7I8Esssx6rzZrXQ5uP%2F20260109%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20260109T074307Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=6150ffc414cccbedc255bc0a72d85fd4e693a59b112789af61e8a0e93d00e5dc"
    # 截取url中的daa1861c-2096-42c0-b8e3-a163f96f0f66.png部分
    img_id = os.path.basename(url).split("?")[0]
    print(img_id)

if __name__ == "__main__":
    test_image()
    # dataset_id = "18caf531f04d11f095670242c0a85002"
    # document_id = "3dda0a90f1e211f0a3b80242c0a85002"
    # tag = "社会L3_人际交往L3_同理心L3"	
    # content="能感知他人情绪，对同伴的困难产生理解并尝试回应"
    # important_keywords = ["3", "4"]
    # chunk = add_chunk(dataset_id=dataset_id, document_id=document_id, content=content, important_keywords=important_keywords)
    # chunk_id = chunk["chunk"]["id"]
    # infinity_client = InfinityClient()
    # infinity_client.update(database_name="default_db", table_name="ragflow_92162247e93e11f084830242ac1d0002_18caf531f04d11f095670242c0a85002", cond=f"id = '{chunk_id}'", data={"tag_kwd": tag})
    # print(chunk_id)