Database Management

This guide covers advanced database management techniques for DegentCivil, including optimization, maintenance, and best practices.

Database Architecture

DegentCivil uses a flexible database architecture that supports multiple storage backends:

SQLite (default for local development)
PostgreSQL (recommended for production)
MongoDB (optional for specific use cases)

Basic Setup

SQLite Configuration

from app.database import Database

db_config = {
    'type': 'sqlite',
    'path': 'simulation.db',
    'journal_mode': 'WAL'
}

db = Database(db_config)

PostgreSQL Configuration

db_config = {
    'type': 'postgresql',
    'host': 'localhost',
    'port': 5432,
    'database': 'degent_civil',
    'user': 'username',
    'password': 'password',
    'max_connections': 20
}

Data Models

Character Data

class CharacterModel:
    table_name = 'characters'
    schema = {
        'id': 'UUID PRIMARY KEY',
        'name': 'TEXT NOT NULL',
        'age': 'INTEGER',
        'occupation': 'TEXT',
        'location_id': 'UUID REFERENCES locations(id)',
        'state': 'JSONB',
        'created_at': 'TIMESTAMP',
        'updated_at': 'TIMESTAMP'
    }
    indexes = [
        ('name_idx', 'name'),
        ('location_idx', 'location_id'),
        ('state_idx', 'state', 'gin')
    ]

Memory Storage

class MemoryModel:
    table_name = 'memories'
    schema = {
        'id': 'UUID PRIMARY KEY',
        'character_id': 'UUID REFERENCES characters(id)',
        'content': 'TEXT',
        'importance': 'FLOAT',
        'timestamp': 'TIMESTAMP',
        'metadata': 'JSONB'
    }
    indexes = [
        ('character_timestamp_idx', ['character_id', 'timestamp']),
        ('importance_idx', 'importance')
    ]

Query Optimization

Index Management

class IndexManager:
    def __init__(self, database):
        self.db = database

    def create_indexes(self):
        """Create optimized indexes based on common queries"""
        queries = [
            """
            CREATE INDEX IF NOT EXISTS char_location_idx 
            ON characters (location_id) 
            WHERE active = true
            """,
            """
            CREATE INDEX IF NOT EXISTS memory_importance_idx 
            ON memories (importance) 
            WHERE importance > 0.7
            """
        ]
        for query in queries:
            self.db.execute(query)

    def analyze_index_usage(self):
        """Monitor and analyze index usage patterns"""
        return self.db.execute("""
            SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read
            FROM pg_stat_user_indexes
            ORDER BY idx_scan DESC
        """)

Query Planning

class QueryPlanner:
    def optimize_query(self, query):
        """Analyze and optimize query execution plan"""
        explain_query = f"EXPLAIN ANALYZE {query}"
        plan = self.db.execute(explain_query)

        if self.needs_optimization(plan):
            return self.rewrite_query(query)
        return query

    def needs_optimization(self, plan):
        """Check if query plan needs optimization"""
        return (
            self.has_sequential_scan(plan) or
            self.has_high_cost(plan) or
            self.has_poor_row_estimate(plan)
        )

Performance Tuning

Connection Pooling

from psycopg2 import pool

class ConnectionPool:
    def __init__(self, config):
        self.pool = pool.SimpleConnectionPool(
            minconn=5,
            maxconn=20,
            **config
        )

    def get_connection(self):
        return self.pool.getconn()

    def return_connection(self, conn):
        self.pool.putconn(conn)

    def close_all(self):
        self.pool.closeall()

Query Caching

class QueryCache:
    def __init__(self):
        self.cache = {}
        self.ttl = 300  # 5 minutes

    def get(self, query, params=None):
        key = self.generate_key(query, params)
        if key in self.cache:
            entry = self.cache[key]
            if not self.is_expired(entry):
                return entry['data']
        return None

    def set(self, query, params, data):
        key = self.generate_key(query, params)
        self.cache[key] = {
            'data': data,
            'timestamp': time.time()
        }

Data Migration

Migration Manager

class MigrationManager:
    def __init__(self, database):
        self.db = database
        self.migrations_table = 'schema_migrations'

    def create_migrations_table(self):
        self.db.execute("""
            CREATE TABLE IF NOT EXISTS schema_migrations (
                version VARCHAR(255) PRIMARY KEY,
                applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)

    def apply_migrations(self, migrations_dir):
        """Apply pending migrations"""
        applied = self.get_applied_migrations()
        pending = self.get_pending_migrations(migrations_dir)

        for migration in pending:
            self.apply_migration(migration)
            self.record_migration(migration)

Data Backup

class BackupManager:
    def create_backup(self, path):
        """Create database backup"""
        if self.db.type == 'postgresql':
            os.system(f'pg_dump {self.db.name} > {path}')
        elif self.db.type == 'sqlite':
            import shutil
            shutil.copy2(self.db.path, path)

    def restore_backup(self, path):
        """Restore database from backup"""
        if self.db.type == 'postgresql':
            os.system(f'psql {self.db.name} < {path}')
        elif self.db.type == 'sqlite':
            import shutil
            shutil.copy2(path, self.db.path)

Maintenance

Database Cleanup

class DatabaseMaintenance:
    def cleanup_old_data(self):
        """Remove old, unnecessary data"""
        queries = [
            """
            DELETE FROM memories 
            WHERE importance < 0.1 
            AND timestamp < NOW() - INTERVAL '30 days'
            """,
            """
            DELETE FROM character_states 
            WHERE updated_at < NOW() - INTERVAL '7 days'
            """
        ]
        for query in queries:
            self.db.execute(query)

    def vacuum_database(self):
        """Reclaim storage and update statistics"""
        self.db.execute("VACUUM ANALYZE")

Monitoring

class DatabaseMonitor:
    def check_health(self):
        """Check database health metrics"""
        metrics = {
            'connection_count': self.get_connection_count(),
            'database_size': self.get_database_size(),
            'cache_hit_ratio': self.get_cache_hit_ratio(),
            'slow_queries': self.get_slow_queries()
        }
        return metrics

    def get_slow_queries(self):
        return self.db.execute("""
            SELECT query, calls, total_time, rows
            FROM pg_stat_statements
            ORDER BY total_time DESC
            LIMIT 10
        """)

Best Practices

Regular Maintenance
Schedule regular cleanup tasks
Monitor database size
Update statistics regularly
Backup data frequently
Query Optimization
Use appropriate indexes
Optimize complex queries
Monitor query performance
Use prepared statements
Connection Management
Use connection pooling
Close unused connections
Monitor connection usage
Set appropriate timeouts
Data Integrity
Use transactions appropriately
Implement proper constraints
Validate data before insertion
Handle errors gracefully
Security
Use strong authentication
Encrypt sensitive data
Regular security audits
Proper access control