alair
/
graph_rag_server


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
							"""Initial schema

Revision ID: 001
Revises: 
Create Date: 2024-01-01 00:00:00.000000

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = '001'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
    """
    创建初始数据库架构
    
    包含以下表：
    - documents: 文档表
    - document_chunks: 文档块表
    - knowledge_bases: 知识库表
    - prompt_dimensions: 提示词维度表
    - parsed_documents: 解析文档表
    - search_history: 搜索历史表
    - document_knowledge_base: 文档和知识库关联表
    """
    
    # 创建文档表
    op.create_table(
        'documents',
        sa.Column('id', sa.String(length=255), nullable=False, comment='文档唯一标识'),
        sa.Column('content', sa.Text(), nullable=False, comment='文档内容'),
        sa.Column('title', sa.String(length=500), nullable=True, comment='文档标题'),
        sa.Column('metadata', sa.JSON(), nullable=False, comment='文档元数据'),
        sa.Column('has_embedding', sa.Boolean(), nullable=True, comment='是否已生成向量'),
        sa.Column('embedding_model', sa.String(length=100), nullable=True, comment='使用的向量模型'),
        sa.Column('vector_dimension', sa.Integer(), nullable=True, comment='向量维度'),
        sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False, comment='创建时间'),
        sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False, comment='更新时间'),
        sa.PrimaryKeyConstraint('id')
    )
    
    # 创建文档表索引
    op.create_index('ix_documents_created_at', 'documents', ['created_at'])
    op.create_index('ix_documents_has_embedding', 'documents', ['has_embedding'])
    
    # 创建文档块表
    op.create_table(
        'document_chunks',
        sa.Column('id', sa.String(length=255), nullable=False, comment='文档块唯一标识'),
        sa.Column('document_id', sa.String(length=255), nullable=False, comment='所属文档ID'),
        sa.Column('content', sa.Text(), nullable=False, comment='文档块内容'),
        sa.Column('position', sa.Integer(), nullable=False, comment='在文档中的位置序号'),
        sa.Column('page_number', sa.Integer(), nullable=True, comment='页码（如果适用）'),
        sa.Column('metadata', sa.JSON(), nullable=False, comment='文档块元数据'),
        sa.Column('has_embedding', sa.Boolean(), nullable=True, comment='是否已生成向量'),
        sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False, comment='创建时间'),
        sa.ForeignKeyConstraint(['document_id'], ['documents.id'], ondelete='CASCADE'),
        sa.PrimaryKeyConstraint('id')
    )
    
    # 创建文档块表索引
    op.create_index('ix_document_chunks_document_id', 'document_chunks', ['document_id'])
    op.create_index('ix_document_chunks_position', 'document_chunks', ['position'])
    
    # 创建知识库表
    op.create_table(
        'knowledge_bases',
        sa.Column('id', sa.String(length=255), nullable=False, comment='知识库唯一标识'),
        sa.Column('name', sa.String(length=255), nullable=False, comment='知识库名称'),
        sa.Column('description', sa.Text(), nullable=True, comment='知识库描述'),
        sa.Column('config', sa.JSON(), nullable=False, comment='知识库配置'),
        sa.Column('tags', sa.JSON(), nullable=False, comment='知识库标签'),
        sa.Column('document_count', sa.Integer(), nullable=True, comment='文档数量'),
        sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False, comment='创建时间'),
        sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False, comment='更新时间'),
        sa.PrimaryKeyConstraint('id')
    )
    
    # 创建知识库表索引
    op.create_index('ix_knowledge_bases_name', 'knowledge_bases', ['name'])
    op.create_index('ix_knowledge_bases_created_at', 'knowledge_bases', ['created_at'])
    
    # 创建文档和知识库关联表
    op.create_table(
        'document_knowledge_base',
        sa.Column('document_id', sa.String(length=255), nullable=False),
        sa.Column('knowledge_base_id', sa.String(length=255), nullable=False),
        sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
        sa.ForeignKeyConstraint(['document_id'], ['documents.id'], ),
        sa.ForeignKeyConstraint(['knowledge_base_id'], ['knowledge_bases.id'], ),
        sa.PrimaryKeyConstraint('document_id', 'knowledge_base_id')
    )
    
    # 创建关联表索引
    op.create_index('ix_doc_kb_document_id', 'document_knowledge_base', ['document_id'])
    op.create_index('ix_doc_kb_knowledge_base_id', 'document_knowledge_base', ['knowledge_base_id'])
    
    # 创建提示词维度表
    op.create_table(
        'prompt_dimensions',
        sa.Column('id', sa.String(length=255), nullable=False, comment='提示词维度唯一标识'),
        sa.Column('knowledge_base_id', sa.String(length=255), nullable=False, comment='所属知识库ID'),
        sa.Column('name', sa.String(length=255), nullable=False, comment='维度名称'),
        sa.Column('description', sa.Text(), nullable=True, comment='维度描述'),
        sa.Column('template', sa.Text(), nullable=False, comment='提示词模板'),
        sa.Column('variables', sa.JSON(), nullable=False, comment='模板变量列表'),
        sa.Column('config', sa.JSON(), nullable=False, comment='维度配置'),
        sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False, comment='创建时间'),
        sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False, comment='更新时间'),
        sa.ForeignKeyConstraint(['knowledge_base_id'], ['knowledge_bases.id'], ondelete='CASCADE'),
        sa.PrimaryKeyConstraint('id')
    )
    
    # 创建提示词维度表索引
    op.create_index('ix_prompt_dimensions_kb_id', 'prompt_dimensions', ['knowledge_base_id'])
    op.create_index('ix_prompt_dimensions_name', 'prompt_dimensions', ['name'])
    
    # 创建解析文档表
    op.create_table(
        'parsed_documents',
        sa.Column('id', sa.String(length=255), nullable=False, comment='解析文档唯一标识'),
        sa.Column('original_filename', sa.String(length=500), nullable=False, comment='原始文件名'),
        sa.Column('document_type', sa.String(length=50), nullable=False, comment='文档类型（pdf/image/text/qa_pair）'),
        sa.Column('file_path', sa.String(length=1000), nullable=True, comment='文件存储路径'),
        sa.Column('file_size', sa.Integer(), nullable=True, comment='文件大小（字节）'),
        sa.Column('status', sa.String(length=50), nullable=False, comment='解析状态（pending/processing/completed/failed）'),
        sa.Column('error_message', sa.Text(), nullable=True, comment='错误信息（如果解析失败）'),
        sa.Column('chunk_count', sa.Integer(), nullable=True, comment='分块数量'),
        sa.Column('metadata', sa.JSON(), nullable=False, comment='文档元数据'),
        sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False, comment='创建时间'),
        sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False, comment='更新时间'),
        sa.Column('completed_at', sa.DateTime(), nullable=True, comment='解析完成时间'),
        sa.PrimaryKeyConstraint('id')
    )
    
    # 创建解析文档表索引
    op.create_index('ix_parsed_documents_status', 'parsed_documents', ['status'])
    op.create_index('ix_parsed_documents_document_type', 'parsed_documents', ['document_type'])
    op.create_index('ix_parsed_documents_created_at', 'parsed_documents', ['created_at'])
    
    # 创建搜索历史表
    op.create_table(
        'search_history',
        sa.Column('id', sa.String(length=255), nullable=False, comment='搜索历史唯一标识'),
        sa.Column('query_text', sa.Text(), nullable=False, comment='搜索查询文本'),
        sa.Column('search_type', sa.String(length=50), nullable=False, comment='搜索类型（vector/text/hybrid）'),
        sa.Column('top_k', sa.Integer(), nullable=False, comment='返回结果数量'),
        sa.Column('filters', sa.JSON(), nullable=True, comment='过滤条件'),
        sa.Column('result_count', sa.Integer(), nullable=True, comment='返回结果数量'),
        sa.Column('execution_time_ms', sa.Integer(), nullable=True, comment='执行时间（毫秒）'),
        sa.Column('user_id', sa.String(length=255), nullable=True, comment='用户ID'),
        sa.Column('session_id', sa.String(length=255), nullable=True, comment='会话ID'),
        sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False, comment='创建时间'),
        sa.PrimaryKeyConstraint('id')
    )
    
    # 创建搜索历史表索引
    op.create_index('ix_search_history_created_at', 'search_history', ['created_at'])
    op.create_index('ix_search_history_search_type', 'search_history', ['search_type'])
    op.create_index('ix_search_history_user_id', 'search_history', ['user_id'])


def downgrade() -> None:
    """
    删除所有表
    """
    # 按照依赖关系的逆序删除表
    op.drop_table('search_history')
    op.drop_table('parsed_documents')
    op.drop_table('prompt_dimensions')
    op.drop_table('document_knowledge_base')
    op.drop_table('knowledge_bases')
    op.drop_table('document_chunks')
    op.drop_table('documents')