The RAG System uses structured logging with structlog to provide comprehensive, machine-readable logs that support debugging, monitoring, and log aggregation.
from config import configure_logging, get_logger
# Configure logging
configure_logging(
log_level="INFO",
log_to_console=True,
json_format=True
)
# Get a logger
logger = get_logger(__name__)
# Log messages
logger.info("user_login", user_id="123", ip_address="192.168.1.1")
logger.warning("rate_limit_exceeded", user_id="123", limit=100)
logger.error("database_error", error="Connection timeout", retry_count=3)
For development, use human-readable console output:
from config import configure_development_logging, get_logger
configure_development_logging()
logger = get_logger(__name__)
logger.debug("debug_info", variable="value")
For production, use file logging with rotation:
from config import configure_production_logging, get_logger
configure_production_logging(
log_level="INFO",
log_file="logs/app.log"
)
logger = get_logger(__name__)
logger.info("application_started", version="1.0.0")
configure_logging()Main configuration function with full control:
configure_logging(
log_level="INFO", # Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
log_file="logs/app.log", # Path to log file (required if log_to_file=True)
log_to_console=True, # Enable console output
log_to_file=True, # Enable file output
log_to_remote=False, # Enable remote logging
remote_handler=None, # Custom remote handler
json_format=True, # Use JSON format (True) or human-readable (False)
max_file_size=10*1024*1024, # Max file size before rotation (10 MB)
backup_count=5 # Number of backup files to keep
)
from config import configure_development_logging
configure_development_logging(log_level="DEBUG")
Features:
from config import configure_production_logging
configure_production_logging(
log_level="INFO",
log_file="logs/app.log",
remote_handler=None # Optional: add remote handler
)
Features:
{
"event": "user_login",
"level": "info",
"logger": "api.auth",
"timestamp": "2026-02-01T08:00:00.123456Z",
"app": "rag_system",
"severity": "INFO",
"user_id": "user_123",
"ip_address": "192.168.1.1",
"request_id": "req_456"
}
2026-02-01T08:00:00.123456Z [info ] user_login [api.auth] app=rag_system severity=INFO user_id=user_123 ip_address=192.168.1.1
Bind context to a logger to automatically include it in all subsequent log messages:
from config import get_logger
logger = get_logger(__name__)
# Bind request-specific context
request_logger = logger.bind(
request_id="req_12345",
user_id="user_789"
)
# All logs from request_logger will include request_id and user_id
request_logger.info("request_started", method="GET", path="/api/documents")
request_logger.info("processing_request", step="validation")
request_logger.info("request_completed", status_code=200, duration_ms=45)
Output:
{"request_id": "req_12345", "user_id": "user_789", "method": "GET", "path": "/api/documents", "event": "request_started", ...}
{"request_id": "req_12345", "user_id": "user_789", "step": "validation", "event": "processing_request", ...}
{"request_id": "req_12345", "user_id": "user_789", "status_code": 200, "duration_ms": 45, "event": "request_completed", ...}
Use appropriate log levels for different types of messages:
Detailed information for diagnosing problems. Only enabled in development.
logger.debug("cache_lookup", key="user:123", hit=True)
General informational messages about application flow.
logger.info("user_login", user_id="123", method="oauth")
logger.info("document_created", document_id="doc_456", size_bytes=1024)
Warnings about potentially problematic situations that don't prevent operation.
logger.warning("rate_limit_approaching", user_id="123", usage=90, limit=100)
logger.warning("deprecated_api_used", endpoint="/v1/old-endpoint", user_id="123")
Errors that prevent a specific operation but don't crash the application.
logger.error("database_query_failed", query="SELECT ...", error="Timeout", retry_count=3)
logger.error("external_api_error", service="ragflow", status_code=500)
Severe errors that may cause application failure.
logger.critical("database_connection_lost", error="Connection refused")
logger.critical("out_of_memory", available_mb=10, required_mb=100)
Log exceptions with automatic stack trace capture:
from config import get_logger
logger = get_logger(__name__)
try:
result = process_document(doc_id)
except Exception as e:
logger.error(
"document_processing_failed",
document_id=doc_id,
error=str(e),
exc_info=True # Include stack trace
)
File logging automatically rotates when the file reaches the maximum size:
configure_logging(
log_file="logs/app.log",
log_to_file=True,
max_file_size=10 * 1024 * 1024, # 10 MB
backup_count=5 # Keep 5 backup files
)
This creates:
logs/app.log (current log file)logs/app.log.1 (most recent backup)logs/app.log.2logs/app.log.3logs/app.log.4logs/app.log.5 (oldest backup)Send logs to a remote logging service:
import logging
from logging.handlers import SysLogHandler
from config import configure_logging
# Create remote handler (example: syslog)
remote_handler = SysLogHandler(address=('logs.example.com', 514))
# Configure with remote logging
configure_logging(
log_level="INFO",
log_to_remote=True,
remote_handler=remote_handler
)
Instead of string interpolation, use structured fields:
# ❌ Bad
logger.info(f"User {user_id} logged in from {ip_address}")
# ✅ Good
logger.info("user_login", user_id=user_id, ip_address=ip_address)
Use snake_case event names that describe the action:
logger.info("user_login", ...)
logger.info("document_created", ...)
logger.info("search_completed", ...)
Add context that helps with debugging and monitoring:
logger.info(
"api_request_completed",
method="POST",
path="/api/documents",
status_code=201,
duration_ms=45,
user_id="user_123",
request_id="req_456"
)
In request handlers, bind request-specific context:
async def handle_request(request):
logger = get_logger(__name__).bind(
request_id=request.state.request_id,
user_id=request.state.user_id
)
logger.info("request_started", method=request.method, path=request.url.path)
# ... handle request ...
logger.info("request_completed", status_code=200)
Never log passwords, API keys, or other sensitive data:
# ❌ Bad
logger.info("user_authenticated", password=password, api_key=api_key)
# ✅ Good
logger.info("user_authenticated", user_id=user_id, method="password")
Example middleware for request logging:
from fastapi import Request
from config import get_logger
import time
import uuid
async def logging_middleware(request: Request, call_next):
# Generate request ID
request_id = str(uuid.uuid4())
request.state.request_id = request_id
# Create logger with bound context
logger = get_logger(__name__).bind(request_id=request_id)
# Log request start
start_time = time.time()
logger.info(
"request_started",
method=request.method,
path=request.url.path,
client_ip=request.client.host
)
try:
# Process request
response = await call_next(request)
# Log request completion
duration_ms = (time.time() - start_time) * 1000
logger.info(
"request_completed",
status_code=response.status_code,
duration_ms=duration_ms
)
# Add request ID to response headers
response.headers["X-Request-ID"] = request_id
return response
except Exception as e:
# Log request error
duration_ms = (time.time() - start_time) * 1000
logger.error(
"request_failed",
error=str(e),
duration_ms=duration_ms,
exc_info=True
)
raise
max_file_size and backup_count settings