| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- from utils.ragflow.ragflow_service import RAGFlowService
- from utils.infinity import InfinityClient
- DATASET_CONFIG_DICT = {
- "chunk_token_num": 256,
- "delimiter": "\n!?;。;!?",
- "html4excel": False,
- "layout_recognize": "Pro/Qwen/Qwen2.5-VL-7B-Instruct@SILICONFLOW",
- "auto_keywords": 5,
- "tag_kb_ids": [],
- "topn_tags": 3,
- "task_page_size": 4,
- "raptor": {
- "max_cluster": 64,
- "max_token": 256,
- "prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
- "random_seed": 0,
- "threshold": 0.1,
- "use_raptor": True
- },
- "graphrag": {
- "resolution": True,
- "use_graphrag": True,
- "method": "general",
- "entity_types": [
- "event",
- "Book",
- "Author",
- "Illustrator",
- "Series",
- "Theme",
- "Genre",
- "Character",
- "Setting",
- "AgeGroup",
- "Competency",
- "ArtStyle",
- "Award",
- "Publisher"
- ]
- }
- }
- ragflow_service = RAGFlowService()
- dataset_ids = [
- "c2be78a4f10711f095230242c0a85002"
- ]
- def create_dataset():
- dataset = ragflow_service.create_dataset(name="test_http_dataset1", description="测试HTTP数据集1",
- permission="team",
- chunk_method="naive",
- parser_config=DATASET_CONFIG_DICT)
- print(dataset)
- def delete_dataset(dataset_id: str):
- flg = ragflow_service.delete_datasets(dataset_ids=[dataset_id])
- print(flg)
- def delete_datasets(dataset_ids: list[str]):
- flg = ragflow_service.delete_datasets(dataset_ids=dataset_ids)
- print(flg)
- def list_datasets(name: str = None):
- datasets = ragflow_service.list_datasets(name=name)
- print(datasets)
- def get_dataset(name: str = None, dataset_id: str = None):
- dataset = ragflow_service.get_dataset(name=name, dataset_id=dataset_id)
- print(dataset)
- def add_chunk(dataset_id: str, document_id: str, content: str, important_keywords: list[int] = None):
- chunk = ragflow_service.create_chunk(dataset_id=dataset_id, document_id=document_id, content=content, important_keywords=important_keywords)
- print(chunk)
- return chunk
- import os
- def test_image():
- url = "http://192.168.16.134:9000/bookpage/daa1861c-2096-42c0-b8e3-a163f96f0f66.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ck7I8Esssx6rzZrXQ5uP%2F20260109%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20260109T074307Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=6150ffc414cccbedc255bc0a72d85fd4e693a59b112789af61e8a0e93d00e5dc"
- # 截取url中的daa1861c-2096-42c0-b8e3-a163f96f0f66.png部分
- img_id = os.path.basename(url).split("?")[0]
- print(img_id)
- if __name__ == "__main__":
- test_image()
- # dataset_id = "18caf531f04d11f095670242c0a85002"
- # document_id = "3dda0a90f1e211f0a3b80242c0a85002"
- # tag = "社会L3_人际交往L3_同理心L3"
- # content="能感知他人情绪,对同伴的困难产生理解并尝试回应"
- # important_keywords = ["3", "4"]
- # chunk = add_chunk(dataset_id=dataset_id, document_id=document_id, content=content, important_keywords=important_keywords)
- # chunk_id = chunk["chunk"]["id"]
- # infinity_client = InfinityClient()
- # infinity_client.update(database_name="default_db", table_name="ragflow_92162247e93e11f084830242ac1d0002_18caf531f04d11f095670242c0a85002", cond=f"id = '{chunk_id}'", data={"tag_kwd": tag})
- # print(chunk_id)
|