Best Practices Guide
This guide provides recommendations and patterns for using Moltres effectively in production.
Code Organization
1. Connection Management
Best Practice: Create database connections at the application level and reuse them.
# Good: Single connection per application
db = connect("postgresql://user:pass@host/dbname")
# Use the same connection throughout
def get_users():
return db.table("users").select().collect()
def get_orders():
return db.table("orders").select().collect()
Avoid: Creating new connections for each query
# Bad: New connection for each query
def get_users():
db = connect("postgresql://user:pass@host/dbname") # Don't do this
return db.table("users").select().collect()
2. Query Composition
Best Practice: Compose queries step by step for readability
# Good: Clear, readable query composition
df = (
db.table("orders")
.select("id", "customer_id", "amount", "status")
.where(col("status") == "active")
.where(col("amount") > 100)
.order_by(col("amount").desc())
.limit(100)
)
Avoid: Overly complex one-liners
# Less readable: Everything in one line
df = db.table("orders").select("id", "customer_id", "amount", "status").where(col("status") == "active").where(col("amount") > 100).order_by(col("amount").desc()).limit(100)
3. Column References
Best Practice: Use col() consistently for column references
from moltres import col
# Good: Explicit column references
df = (
db.table("users")
.select(col("id"), col("name"), col("email"))
.where(col("active") == True)
)
Avoid: Mixing string and column references inconsistently
# Less clear: Mixed usage
df = (
db.table("users")
.select("id", "name", col("email")) # Inconsistent
.where(col("active") == True)
)
Error Handling
1. Handle Database Errors
Best Practice: Catch and handle specific exceptions
from moltres.utils.exceptions import (
ExecutionError,
CompilationError,
DatabaseConnectionError,
QueryTimeoutError,
)
try:
results = df.collect()
except QueryTimeoutError as e:
print(f"Query timed out: {e}")
# Handle timeout
except ExecutionError as e:
print(f"Query failed: {e}")
print(f"Suggestion: {e.suggestion}")
# Handle execution error
except DatabaseConnectionError as e:
print(f"Connection failed: {e}")
# Handle connection error
2. Validate Inputs
Best Practice: Validate inputs before building queries
def get_user_orders(user_id: int, limit: int = 100):
if not isinstance(user_id, int) or user_id < 0:
raise ValueError("user_id must be a positive integer")
if limit < 0 or limit > 1000:
raise ValueError("limit must be between 0 and 1000")
return (
db.table("orders")
.select()
.where(col("user_id") == user_id)
.limit(limit)
.collect()
)
Security
1. SQL Injection Prevention
Best Practice: Use column expressions, not string concatenation
# Good: Safe - uses parameterized queries
user_id = 123
df = db.table("users").select().where(col("id") == user_id)
# Bad: Vulnerable to SQL injection
user_id = "123; DROP TABLE users;"
sql = f"SELECT * FROM users WHERE id = {user_id}" # DON'T DO THIS
2. Connection String Security
Best Practice: Use environment variables for credentials
import os
# Good: Credentials from environment
dsn = os.environ.get("MOLTRES_DSN")
db = connect(dsn)
# Avoid: Hardcoded credentials
db = connect("postgresql://user:password@host/dbname") # Don't commit this
Performance
1. Use Indexes
Best Practice: Create indexes on frequently queried columns
# Create indexes for common queries
db.execute("CREATE INDEX IF NOT EXISTS idx_users_email ON users(email)")
db.execute("CREATE INDEX IF NOT EXISTS idx_orders_customer_id ON orders(customer_id)")
2. Filter Early
Best Practice: Apply filters as early as possible
# Good: Filter before join
df = (
db.table("orders")
.select()
.where(col("status") == "active") # Filter early
.join(customers, on=[col("orders.customer_id") == col("customers.id")])
)
3. Select Only Needed Columns
Best Practice: Avoid SELECT * when possible
# Good: Select only needed columns
df = db.table("users").select("id", "name", "email")
# Less efficient: Select all columns
df = db.table("users").select() # SELECT *
4. Use Streaming for Large Results
Best Practice: Stream large result sets
# Good: Stream large results
for chunk in df.collect(stream=True):
process_chunk(chunk)
# Avoid: Loading everything into memory
results = df.collect() # May cause memory issues
Testing
1. Test Query Generation
Best Practice: Verify SQL generation during development
# Check generated SQL
df = db.table("users").select().where(col("age") > 18)
sql = df.to_sql()
print(sql) # Verify the SQL is correct
2. Use Test Databases
Best Practice: Use separate test databases
# Test database
test_db = connect("sqlite:///test.db")
# Production database
prod_db = connect("postgresql://user:pass@host/prod_db")
3. Test Error Handling
Best Practice: Test error scenarios
import pytest
from moltres.utils.exceptions import ExecutionError
def test_invalid_query():
df = db.table("nonexistent").select()
with pytest.raises(ExecutionError):
df.collect()
Configuration
1. Environment Variables
Best Practice: Use environment variables for configuration
# .env file or environment
export MOLTRES_DSN="postgresql://user:pass@host/dbname"
export MOLTRES_POOL_SIZE=10
export MOLTRES_QUERY_TIMEOUT=30.0
# Application code
db = connect() # Uses MOLTRES_DSN from environment
2. Connection Pooling
Best Practice: Configure connection pooling appropriately
db = connect(
"postgresql://user:pass@host/dbname",
pool_size=10, # Match your workload
max_overflow=5, # Allow some overflow
pool_pre_ping=True, # Verify connections
)
Code Patterns
1. Query Builders
Best Practice: Create reusable query builders
def build_user_query(active_only: bool = False, min_age: int = None):
df = db.table("users").select("id", "name", "email")
if active_only:
df = df.where(col("active") == True)
if min_age is not None:
df = df.where(col("age") >= min_age)
return df
# Use the builder
active_users = build_user_query(active_only=True, min_age=18).collect()
2. CTEs for Complex Queries
Best Practice: Use CTEs for complex queries
# Good: Use CTEs for readability
active_orders = (
db.table("orders")
.select()
.where(col("status") == "active")
.cte("active_orders")
)
high_value = (
active_orders
.select()
.where(col("amount") > 1000)
.cte("high_value")
)
result = high_value.select().collect()
3. Type Hints
Best Practice: Use type hints for better IDE support
from typing import List, Dict
from moltres import DataFrame
def get_active_users() -> List[Dict[str, object]]:
df: DataFrame = (
db.table("users")
.select()
.where(col("active") == True)
)
return df.collect()
Documentation
1. Document Complex Queries
Best Practice: Add comments for complex logic
# Calculate monthly revenue by category
# Filters active orders, groups by month and category,
# and calculates total revenue
df = (
db.table("orders")
.select()
.where(col("status") == "active")
.where(col("date") >= "2024-01-01")
.group_by(
func.date_trunc("month", col("date")).alias("month"),
col("category")
)
.agg(sum(col("amount")).alias("revenue"))
)
2. Document Schema Assumptions
Best Practice: Document expected schema
def get_user_orders(user_id: int):
"""
Get orders for a user.
Assumes:
- orders table has columns: id, user_id, amount, status, date
- user_id column is indexed for performance
"""
return (
db.table("orders")
.select()
.where(col("user_id") == user_id)
.collect()
)
Common Patterns
Pattern 1: Pagination
def get_paginated_results(page: int, page_size: int = 20):
offset = page * page_size
return (
db.table("items")
.select()
.order_by(col("id"))
.limit(page_size)
.offset(offset) # If supported
.collect()
)
Pattern 2: Conditional Filtering
def search_users(name: str = None, email: str = None, active: bool = None):
df = db.table("users").select()
if name:
df = df.where(col("name").like(f"%{name}%"))
if email:
df = df.where(col("email") == email)
if active is not None:
df = df.where(col("active") == active)
return df.collect()
Pattern 3: Aggregations with Conditions
from moltres.expressions.functions import sum, when
# Sum with conditions
df = (
db.table("orders")
.select()
.group_by("category")
.agg(
sum(when(col("status") == "active", col("amount"), 0)).alias("active_revenue"),
sum(col("amount")).alias("total_revenue")
)
)
Summary
Reuse database connections
Compose queries clearly
Use
col()consistentlyHandle errors appropriately
Validate inputs
Prevent SQL injection
Use environment variables for secrets
Create indexes for performance
Filter early in queries
Select only needed columns
Use streaming for large results
Test query generation
Use type hints
Document complex queries
Follow common patterns
For more information, see: