本文档说明如何在不同环境中部署 RAG 系统。
系统通过环境变量进行配置。创建 .env 文件:
# 复制示例配置
cp .env.example .env
# 编辑配置
nano .env
# 应用配置
APP_NAME=RAG System
DEBUG=false
LOG_LEVEL=INFO
# 数据库配置
DB_HOST=localhost
DB_PORT=5432
DB_DATABASE=rag_system
DB_USERNAME=rag_user
DB_PASSWORD=your_secure_password
# 向量数据库配置
VECTOR_DB_TYPE=infinity # 或 elasticsearch
# Infinity 配置(如果使用)
INFINITY_HOST=localhost
INFINITY_PORT=23817
# Elasticsearch 配置(如果使用)
ES_HOST=localhost
ES_PORT=9200
ES_USERNAME=elastic
ES_PASSWORD=your_es_password
详细配置选项请参考 配置指南。
# 创建虚拟环境
python -m venv venv
source venv/bin/activate # Windows: venv\Scripts\activate
# 安装依赖
pip install -r requirements.txt
cd docker
docker-compose up -d postgres infinity
PostgreSQL:
# Ubuntu/Debian
sudo apt-get install postgresql
sudo systemctl start postgresql
# macOS
brew install postgresql
brew services start postgresql
Infinity:
# 参考 Infinity 官方文档
# https://github.com/infiniflow/infinity
# 创建数据库
createdb rag_system
# 运行迁移
alembic upgrade head
# 开发模式
uvicorn src.main:app --reload --host 0.0.0.0 --port 8000
# 生产模式
uvicorn src.main:app --host 0.0.0.0 --port 8000 --workers 4
# 健康检查
curl http://localhost:8000/health
# 查看 API 文档
open http://localhost:8000/docs
cd docker
docker-compose build
docker-compose up -d
这将启动:
# 查看所有服务日志
docker-compose logs -f
# 查看特定服务日志
docker-compose logs -f app
docker-compose down
# 同时删除数据卷
docker-compose down -v
docker build -f docker/Dockerfile -t rag-system:latest .
docker run -d \
--name rag-system \
-p 8000:8000 \
-e DB_HOST=host.docker.internal \
-e DB_PORT=5432 \
-e DB_DATABASE=rag_system \
-e DB_USERNAME=rag_user \
-e DB_PASSWORD=your_password \
-e INFINITY_HOST=host.docker.internal \
-e INFINITY_PORT=23817 \
rag-system:latest
docker logs -f rag-system
deployment.yaml:
apiVersion: apps/v1
kind: Deployment
metadata:
name: rag-system
spec:
replicas: 3
selector:
matchLabels:
app: rag-system
template:
metadata:
labels:
app: rag-system
spec:
containers:
- name: rag-system
image: your-registry/rag-system:latest
ports:
- containerPort: 8000
env:
- name: DB_HOST
valueFrom:
secretKeyRef:
name: rag-secrets
key: db-host
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: rag-secrets
key: db-password
resources:
requests:
memory: "512Mi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "2000m"
livenessProbe:
httpGet:
path: /health/live
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health/ready
port: 8000
initialDelaySeconds: 10
periodSeconds: 5
service.yaml:
apiVersion: v1
kind: Service
metadata:
name: rag-system
spec:
selector:
app: rag-system
ports:
- protocol: TCP
port: 80
targetPort: 8000
type: LoadBalancer
# 创建 secrets
kubectl create secret generic rag-secrets \
--from-literal=db-host=postgres.default.svc.cluster.local \
--from-literal=db-password=your_password
# 部署应用
kubectl apply -f deployment.yaml
kubectl apply -f service.yaml
# 查看状态
kubectl get pods
kubectl get services
使用 ECS/Fargate:
数据库:
文件存储:
使用 Azure Container Instances:
数据库:
使用 Cloud Run:
数据库:
alembic init alembic
# 自动生成迁移
alembic revision --autogenerate -m "描述变更"
# 手动创建迁移
alembic revision -m "描述变更"
# 升级到最新版本
alembic upgrade head
# 升级到特定版本
alembic upgrade <revision>
# 回滚一个版本
alembic downgrade -1
# 查看当前版本
alembic current
# 查看迁移历史
alembic history
备份数据库:
pg_dump rag_system > backup_$(date +%Y%m%d).sql
测试迁移:
# 在测试环境先运行
alembic upgrade head
应用迁移:
# 在生产环境运行
alembic upgrade head
验证:
# 检查应用是否正常
curl http://localhost:8000/health
系统使用结构化日志(JSON 格式):
# 配置日志级别
export LOG_LEVEL=INFO
# 配置日志输出
export LOG_FILE=/var/log/rag-system/app.log
配置 Filebeat:
filebeat.inputs:
- type: log
enabled: true
paths:
- /var/log/rag-system/*.log
json.keys_under_root: true
output.elasticsearch:
hosts: ["elasticsearch:9200"]
启动 Filebeat:
filebeat -e -c filebeat.yml
配置指标端点:
# 已在 /metrics 端点实现
配置 Prometheus:
scrape_configs:
- job_name: 'rag-system'
static_configs:
- targets: ['localhost:8000']
配置 Grafana:
groups:
- name: rag-system
rules:
- alert: HighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05
for: 5m
annotations:
summary: "High error rate detected"
- alert: HighResponseTime
expr: http_request_duration_seconds > 1
for: 5m
annotations:
summary: "High response time detected"
检查日志:
# Docker
docker logs rag-system
# Kubernetes
kubectl logs <pod-name>
# 本地
tail -f logs/app.log
常见原因:
检查连接:
# 测试 PostgreSQL 连接
psql -h $DB_HOST -p $DB_PORT -U $DB_USERNAME -d $DB_DATABASE
# 检查网络
ping $DB_HOST
telnet $DB_HOST $DB_PORT
解决方案:
检查 Infinity:
curl http://$INFINITY_HOST:$INFINITY_PORT/health
检查 Elasticsearch:
curl http://$ES_HOST:$ES_PORT/_cluster/health
检查资源使用:
# CPU 和内存
docker stats
# 数据库连接
SELECT count(*) FROM pg_stat_activity;
优化建议:
启用调试模式获取更多信息:
# 设置环境变量
export DEBUG=true
export LOG_LEVEL=DEBUG
# 重启应用
# 基本健康检查
curl http://localhost:8000/health
# 就绪检查
curl http://localhost:8000/health/ready
# 存活检查
curl http://localhost:8000/health/live
# 性能指标
curl http://localhost:8000/metrics
# 创建备份
pg_dump rag_system > backup.sql
# 恢复备份
psql rag_system < backup.sql
# 自动备份脚本
0 2 * * * pg_dump rag_system > /backups/rag_$(date +\%Y\%m\%d).sql
参考各向量数据库的官方文档:
应用层:
数据库层:
向量数据库层:
应用优化:
数据库优化:
网络优化:
如需帮助,请参考: