SQL & NoSQL Comprehensive Cheatsheet

    Table of Contents

    1. SQL Fundamentals
    2. Data Definition Language (DDL)
    3. Data Manipulation Language (DML)
    4. Data Query Language (DQL)
    5. Joins
    6. Aggregation & Functions
    7. Advanced SQL
    8. NoSQL Databases
    9. MongoDB
    10. Redis
    11. DynamoDB
    12. Cassandra
    13. Database Design Principles
    14. Database Architecture Patterns
    15. Performance & Optimization

    SQL Fundamentals

    Basic Syntax

    -- Comments start with --
    /* Multi-line
       comments */
    
    -- SQL is case-insensitive but KEYWORDS are conventionally uppercase
    SELECT column1, column2 FROM table_name;
    SQL

    Data Types

    TypeDescriptionExample
    INTInteger123
    VARCHAR(n)Variable character string'Hello'
    CHAR(n)Fixed character string'ABC'
    TEXTLarge text'Long text...'
    DECIMAL(p,s)Decimal number123.45
    DATEDate'2023-12-25'
    DATETIMEDate and time'2023-12-25 10:30:00'
    BOOLEANTrue/FalseTRUE, FALSE

    Data Definition Language (DDL)

    Creating Database

    -- Create database
    CREATE DATABASE company_db;
    
    -- Use database
    USE company_db;
    
    -- Drop database
    DROP DATABASE company_db;
    SQL

    Creating Tables

    -- Basic table creation
    CREATE TABLE employees (
        id INT PRIMARY KEY AUTO_INCREMENT,
        first_name VARCHAR(50) NOT NULL,
        last_name VARCHAR(50) NOT NULL,
        email VARCHAR(100) UNIQUE,
        department_id INT,
        salary DECIMAL(10,2),
        hire_date DATE,
        is_active BOOLEAN DEFAULT TRUE
    );
    
    -- Table with foreign key
    CREATE TABLE departments (
        id INT PRIMARY KEY AUTO_INCREMENT,
        name VARCHAR(50) NOT NULL,
        location VARCHAR(100)
    );
    
    -- Add foreign key constraint
    ALTER TABLE employees 
    ADD CONSTRAINT fk_department 
    FOREIGN KEY (department_id) REFERENCES departments(id);
    SQL

    Modifying Tables

    -- Add column
    ALTER TABLE employees ADD COLUMN phone VARCHAR(15);
    
    -- Modify column
    ALTER TABLE employees MODIFY COLUMN phone VARCHAR(20);
    
    -- Drop column
    ALTER TABLE employees DROP COLUMN phone;
    
    -- Add index
    CREATE INDEX idx_employee_email ON employees(email);
    
    -- Drop index
    DROP INDEX idx_employee_email ON employees;
    
    -- Drop table
    DROP TABLE employees;
    SQL

    Data Manipulation Language (DML)

    INSERT Operations

    -- Single row insert
    INSERT INTO employees (first_name, last_name, email, department_id, salary, hire_date)
    VALUES ('John', 'Doe', 'john.doe@company.com', 1, 75000.00, '2023-01-15');
    
    -- Multiple rows insert
    INSERT INTO employees (first_name, last_name, email, department_id, salary, hire_date)
    VALUES 
        ('Jane', 'Smith', 'jane.smith@company.com', 2, 80000.00, '2023-02-01'),
        ('Bob', 'Johnson', 'bob.johnson@company.com', 1, 70000.00, '2023-02-15');
    
    -- Insert from another table
    INSERT INTO employees_backup
    SELECT * FROM employees WHERE hire_date < '2023-01-01';
    SQL

    UPDATE Operations

    -- Update single record
    UPDATE employees 
    SET salary = 85000.00 
    WHERE id = 1;
    
    -- Update multiple records
    UPDATE employees 
    SET salary = salary * 1.1 
    WHERE department_id = 1;
    
    -- Update with JOIN
    UPDATE employees e
    JOIN departments d ON e.department_id = d.id
    SET e.salary = e.salary * 1.05
    WHERE d.name = 'Engineering';
    SQL

    DELETE Operations

    -- Delete specific records
    DELETE FROM employees WHERE id = 1;
    
    -- Delete with condition
    DELETE FROM employees WHERE hire_date < '2020-01-01';
    
    -- Delete all records (but keep table structure)
    DELETE FROM employees;
    
    -- Truncate table (faster than DELETE)
    TRUNCATE TABLE employees;
    SQL

    Data Query Language (DQL)

    Basic SELECT

    -- Select all columns
    SELECT * FROM employees;
    
    -- Select specific columns
    SELECT first_name, last_name, salary FROM employees;
    
    -- Select with alias
    SELECT 
        first_name AS 'First Name',
        last_name AS 'Last Name',
        salary * 12 AS 'Annual Salary'
    FROM employees;
    
    -- Select distinct values
    SELECT DISTINCT department_id FROM employees;
    SQL

    WHERE Clause

    -- Basic conditions
    SELECT * FROM employees WHERE salary > 75000;
    SELECT * FROM employees WHERE department_id = 1;
    SELECT * FROM employees WHERE first_name = 'John';
    
    -- Multiple conditions
    SELECT * FROM employees 
    WHERE salary > 70000 AND department_id = 1;
    
    SELECT * FROM employees 
    WHERE department_id = 1 OR department_id = 2;
    
    -- Pattern matching
    SELECT * FROM employees WHERE first_name LIKE 'J%';
    SELECT * FROM employees WHERE email LIKE '%@company.com';
    
    -- Range conditions
    SELECT * FROM employees WHERE salary BETWEEN 70000 AND 90000;
    SELECT * FROM employees WHERE hire_date BETWEEN '2023-01-01' AND '2023-12-31';
    
    -- IN clause
    SELECT * FROM employees WHERE department_id IN (1, 2, 3);
    
    -- NULL checks
    SELECT * FROM employees WHERE email IS NULL;
    SELECT * FROM employees WHERE email IS NOT NULL;
    SQL

    ORDER BY

    -- Sort ascending (default)
    SELECT * FROM employees ORDER BY salary;
    
    -- Sort descending
    SELECT * FROM employees ORDER BY salary DESC;
    
    -- Multiple column sorting
    SELECT * FROM employees ORDER BY department_id, salary DESC;
    SQL

    LIMIT and OFFSET

    -- Limit results
    SELECT * FROM employees LIMIT 10;
    
    -- Pagination
    SELECT * FROM employees LIMIT 10 OFFSET 20;
    
    -- MySQL shorthand for pagination
    SELECT * FROM employees LIMIT 20, 10;
    SQL

    Joins

    INNER JOIN

    -- Basic inner join
    SELECT e.first_name, e.last_name, d.name AS department
    FROM employees e
    INNER JOIN departments d ON e.department_id = d.id;
    
    -- Multiple table join
    SELECT e.first_name, e.last_name, d.name AS department, p.title AS project
    FROM employees e
    INNER JOIN departments d ON e.department_id = d.id
    INNER JOIN projects p ON e.id = p.employee_id;
    SQL

    LEFT JOIN

    -- Include all employees, even without department
    SELECT e.first_name, e.last_name, d.name AS department
    FROM employees e
    LEFT JOIN departments d ON e.department_id = d.id;
    SQL

    RIGHT JOIN

    -- Include all departments, even without employees
    SELECT e.first_name, e.last_name, d.name AS department
    FROM employees e
    RIGHT JOIN departments d ON e.department_id = d.id;
    SQL

    FULL OUTER JOIN

    -- Include all records from both tables
    SELECT e.first_name, e.last_name, d.name AS department
    FROM employees e
    FULL OUTER JOIN departments d ON e.department_id = d.id;
    SQL

    CROSS JOIN

    -- Cartesian product of both tables
    SELECT e.first_name, d.name
    FROM employees e
    CROSS JOIN departments d;
    SQL

    Self JOIN

    -- Join table with itself
    SELECT 
        e1.first_name AS employee,
        e2.first_name AS manager
    FROM employees e1
    LEFT JOIN employees e2 ON e1.manager_id = e2.id;
    SQL

    Aggregation & Functions

    Aggregate Functions

    -- Count records
    SELECT COUNT(*) FROM employees;
    SELECT COUNT(DISTINCT department_id) FROM employees;
    
    -- Sum values
    SELECT SUM(salary) FROM employees;
    
    -- Average
    SELECT AVG(salary) FROM employees;
    
    -- Min and Max
    SELECT MIN(salary), MAX(salary) FROM employees;
    
    -- Group by
    SELECT department_id, COUNT(*), AVG(salary)
    FROM employees
    GROUP BY department_id;
    
    -- Having clause (filter groups)
    SELECT department_id, COUNT(*) as emp_count
    FROM employees
    GROUP BY department_id
    HAVING COUNT(*) > 5;
    SQL

    String Functions

    -- Concatenation
    SELECT CONCAT(first_name, ' ', last_name) AS full_name FROM employees;
    
    -- Substring
    SELECT SUBSTRING(email, 1, 5) FROM employees;
    
    -- Length
    SELECT LENGTH(first_name) FROM employees;
    
    -- Upper/Lower case
    SELECT UPPER(first_name), LOWER(last_name) FROM employees;
    
    -- Trim whitespace
    SELECT TRIM(first_name) FROM employees;
    
    -- Replace
    SELECT REPLACE(email, '@company.com', '@newcompany.com') FROM employees;
    SQL

    Date Functions

    -- Current date/time
    SELECT NOW(), CURDATE(), CURTIME();
    
    -- Date formatting
    SELECT DATE_FORMAT(hire_date, '%Y-%m-%d') FROM employees;
    
    -- Date arithmetic
    SELECT hire_date, DATE_ADD(hire_date, INTERVAL 1 YEAR) AS anniversary
    FROM employees;
    
    -- Date difference
    SELECT DATEDIFF(NOW(), hire_date) AS days_employed FROM employees;
    
    -- Extract parts
    SELECT YEAR(hire_date), MONTH(hire_date), DAY(hire_date) FROM employees;
    SQL

    Mathematical Functions

    -- Round numbers
    SELECT ROUND(salary, 2) FROM employees;
    
    -- Ceiling and Floor
    SELECT CEIL(salary/12), FLOOR(salary/12) FROM employees;
    
    -- Absolute value
    SELECT ABS(-100);
    
    -- Power
    SELECT POWER(salary, 2) FROM employees;
    
    -- Random
    SELECT RAND();
    SQL

    Advanced SQL

    Subqueries

    -- Scalar subquery
    SELECT * FROM employees 
    WHERE salary > (SELECT AVG(salary) FROM employees);
    
    -- Correlated subquery
    SELECT * FROM employees e1
    WHERE salary > (
        SELECT AVG(salary) 
        FROM employees e2 
        WHERE e2.department_id = e1.department_id
    );
    
    -- EXISTS subquery
    SELECT * FROM departments d
    WHERE EXISTS (
        SELECT 1 FROM employees e 
        WHERE e.department_id = d.id
    );
    
    -- IN subquery
    SELECT * FROM employees
    WHERE department_id IN (
        SELECT id FROM departments 
        WHERE location = 'New York'
    );
    SQL

    Common Table Expressions (CTE)

    -- Basic CTE
    WITH high_earners AS (
        SELECT * FROM employees WHERE salary > 80000
    )
    SELECT department_id, COUNT(*) 
    FROM high_earners 
    GROUP BY department_id;
    
    -- Recursive CTE (organizational hierarchy)
    WITH RECURSIVE employee_hierarchy AS (
        -- Base case: top-level managers
        SELECT id, first_name, last_name, manager_id, 1 as level
        FROM employees 
        WHERE manager_id IS NULL
    
        UNION ALL
    
        -- Recursive case
        SELECT e.id, e.first_name, e.last_name, e.manager_id, eh.level + 1
        FROM employees e
        INNER JOIN employee_hierarchy eh ON e.manager_id = eh.id
    )
    SELECT * FROM employee_hierarchy ORDER BY level, id;
    SQL

    Window Functions

    -- Row number
    SELECT 
        first_name, 
        last_name, 
        salary,
        ROW_NUMBER() OVER (ORDER BY salary DESC) as rank
    FROM employees;
    
    -- Rank and Dense Rank
    SELECT 
        first_name, 
        last_name, 
        salary,
        RANK() OVER (ORDER BY salary DESC) as rank,
        DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank
    FROM employees;
    
    -- Partition by
    SELECT 
        first_name, 
        last_name, 
        department_id,
        salary,
        RANK() OVER (PARTITION BY department_id ORDER BY salary DESC) as dept_rank
    FROM employees;
    
    -- Lead and Lag
    SELECT 
        first_name, 
        salary,
        LAG(salary) OVER (ORDER BY hire_date) as prev_salary,
        LEAD(salary) OVER (ORDER BY hire_date) as next_salary
    FROM employees;
    
    -- Running totals
    SELECT 
        first_name, 
        salary,
        SUM(salary) OVER (ORDER BY hire_date ROWS UNBOUNDED PRECEDING) as running_total
    FROM employees;
    SQL

    Stored Procedures

    -- Create stored procedure
    DELIMITER //
    CREATE PROCEDURE GetEmployeesByDepartment(IN dept_id INT)
    BEGIN
        SELECT * FROM employees WHERE department_id = dept_id;
    END //
    DELIMITER ;
    
    -- Call stored procedure
    CALL GetEmployeesByDepartment(1);
    
    -- Procedure with OUT parameter
    DELIMITER //
    CREATE PROCEDURE GetDepartmentStats(
        IN dept_id INT, 
        OUT emp_count INT, 
        OUT avg_salary DECIMAL(10,2)
    )
    BEGIN
        SELECT COUNT(*), AVG(salary) 
        INTO emp_count, avg_salary
        FROM employees 
        WHERE department_id = dept_id;
    END //
    DELIMITER ;
    
    -- Call with OUT parameters
    CALL GetDepartmentStats(1, @count, @avg);
    SELECT @count, @avg;
    SQL

    Triggers

    -- Create trigger
    CREATE TRIGGER salary_audit_trigger
    AFTER UPDATE ON employees
    FOR EACH ROW
    BEGIN
        IF NEW.salary != OLD.salary THEN
            INSERT INTO salary_audit (employee_id, old_salary, new_salary, change_date)
            VALUES (NEW.id, OLD.salary, NEW.salary, NOW());
        END IF;
    END;
    
    -- Drop trigger
    DROP TRIGGER salary_audit_trigger;
    SQL

    NoSQL Databases

    Types of NoSQL Databases

    1. Document Databases (MongoDB, CouchDB)
      • Store data as documents (JSON-like)
      • Flexible schema
      • Good for content management, catalogs
    2. Key-Value Stores (Redis, DynamoDB)
      • Simple key-value pairs
      • Fast lookups
      • Good for caching, session storage
    3. Column-Family (Cassandra, HBase)
      • Data stored in column families
      • Good for time-series data, analytics
    4. Graph Databases (Neo4j, Amazon Neptune)
      • Data stored as nodes and relationships
      • Good for social networks, recommendations

    MongoDB

    Basic Operations

    // Connect to database
    use company_db
    
    // Insert documents
    db.employees.insertOne({
        firstName: "John",
        lastName: "Doe",
        email: "john.doe@company.com",
        department: "Engineering",
        salary: 75000,
        skills: ["JavaScript", "Python", "SQL"],
        address: {
            street: "123 Main St",
            city: "New York",
            zipCode: "10001"
        }
    })
    
    // Insert multiple documents
    db.employees.insertMany([
        {
            firstName: "Jane",
            lastName: "Smith",
            email: "jane.smith@company.com",
            department: "Marketing",
            salary: 70000,
            skills: ["Marketing", "Analytics"]
        },
        {
            firstName: "Bob",
            lastName: "Johnson",
            email: "bob.johnson@company.com",
            department: "Engineering",
            salary: 80000,
            skills: ["Java", "Spring", "MongoDB"]
        }
    ])
    JavaScript

    Querying Documents

    // Find all documents
    db.employees.find()
    
    // Find with condition
    db.employees.find({department: "Engineering"})
    
    // Find with multiple conditions
    db.employees.find({
        department: "Engineering",
        salary: {$gt: 70000}
    })
    
    // Find one document
    db.employees.findOne({email: "john.doe@company.com"})
    
    // Projection (select specific fields)
    db.employees.find({}, {firstName: 1, lastName: 1, salary: 1})
    
    // Query nested documents
    db.employees.find({"address.city": "New York"})
    
    // Query arrays
    db.employees.find({skills: "JavaScript"})
    db.employees.find({skills: {$in: ["Python", "Java"]}})
    JavaScript

    Query Operators

    // Comparison operators
    db.employees.find({salary: {$gt: 75000}})    // greater than
    db.employees.find({salary: {$gte: 75000}})   // greater than or equal
    db.employees.find({salary: {$lt: 80000}})    // less than
    db.employees.find({salary: {$lte: 80000}})   // less than or equal
    db.employees.find({salary: {$ne: 75000}})    // not equal
    db.employees.find({department: {$in: ["Engineering", "Marketing"]}})
    
    // Logical operators
    db.employees.find({
        $and: [
            {department: "Engineering"},
            {salary: {$gt: 70000}}
        ]
    })
    
    db.employees.find({
        $or: [
            {department: "Engineering"},
            {department: "Marketing"}
        ]
    })
    
    // Text search
    db.employees.find({$text: {$search: "engineer"}})
    
    // Regular expressions
    db.employees.find({email: /company\.com$/})
    JavaScript

    Updating Documents

    // Update one document
    db.employees.updateOne(
        {email: "john.doe@company.com"},
        {$set: {salary: 85000}}
    )
    
    // Update multiple documents
    db.employees.updateMany(
        {department: "Engineering"},
        {$inc: {salary: 5000}}
    )
    
    // Add to array
    db.employees.updateOne(
        {email: "john.doe@company.com"},
        {$push: {skills: "React"}}
    )
    
    // Remove from array
    db.employees.updateOne(
        {email: "john.doe@company.com"},
        {$pull: {skills: "SQL"}}
    )
    
    // Upsert (insert if not exists)
    db.employees.updateOne(
        {email: "new.employee@company.com"},
        {$set: {firstName: "New", lastName: "Employee"}},
        {upsert: true}
    )
    JavaScript

    Aggregation Pipeline

    // Group by department and calculate average salary
    db.employees.aggregate([
        {
            $group: {
                _id: "$department",
                avgSalary: {$avg: "$salary"},
                count: {$sum: 1}
            }
        }
    ])
    
    // Match, group, and sort
    db.employees.aggregate([
        {$match: {salary: {$gt: 70000}}},
        {
            $group: {
                _id: "$department",
                avgSalary: {$avg: "$salary"}
            }
        },
        {$sort: {avgSalary: -1}}
    ])
    
    // Unwind arrays
    db.employees.aggregate([
        {$unwind: "$skills"},
        {
            $group: {
                _id: "$skills",
                count: {$sum: 1}
            }
        }
    ])
    JavaScript

    Indexing

    // Create index
    db.employees.createIndex({email: 1})
    db.employees.createIndex({department: 1, salary: -1})
    
    // Text index
    db.employees.createIndex({firstName: "text", lastName: "text"})
    
    // List indexes
    db.employees.getIndexes()
    
    // Drop index
    db.employees.dropIndex({email: 1})
    JavaScript

    Redis

    Basic Data Types and Operations

    # Strings
    SET name "John Doe"
    GET name
    INCR counter
    DECR counter
    EXPIRE name 3600  # Set expiration in seconds
    
    # Lists
    LPUSH queue "task1"
    RPUSH queue "task2"
    LPOP queue
    RPOP queue
    LRANGE queue 0 -1  # Get all elements
    
    # Sets
    SADD skills "JavaScript"
    SADD skills "Python"
    SMEMBERS skills
    SISMEMBER skills "JavaScript"
    
    # Hashes
    HSET user:1 name "John" email "john@example.com"
    HGET user:1 name
    HGETALL user:1
    HDEL user:1 email
    
    # Sorted Sets
    ZADD leaderboard 100 "player1"
    ZADD leaderboard 150 "player2"
    ZRANGE leaderboard 0 -1 WITHSCORES
    ZRANK leaderboard "player1"
    JavaScript

    Advanced Redis Operations

    # Transactions
    MULTI
    SET key1 "value1"
    SET key2 "value2"
    EXEC
    
    # Pub/Sub
    PUBLISH channel "message"
    SUBSCRIBE channel
    
    # Lua Scripts
    EVAL "return redis.call('get', KEYS[1])" 1 mykey
    
    # Pipeline commands for better performance
    redis-cli --pipe < commands.txt
    JavaScript

    DynamoDB

    Table Operations

    // Create table (AWS SDK)
    const params = {
        TableName: 'employees',
        KeySchema: [
            { AttributeName: 'id', KeyType: 'HASH' }
        ],
        AttributeDefinitions: [
            { AttributeName: 'id', AttributeType: 'S' }
        ],
        BillingMode: 'PAY_PER_REQUEST'
    };
    dynamodb.createTable(params);
    JavaScript

    CRUD Operations

    // Put item
    const putParams = {
        TableName: 'employees',
        Item: {
            id: '123',
            firstName: 'John',
            lastName: 'Doe',
            department: 'Engineering',
            salary: 75000
        }
    };
    dynamodb.put(putParams);
    
    // Get item
    const getParams = {
        TableName: 'employees',
        Key: {
            id: '123'
        }
    };
    dynamodb.get(getParams);
    
    // Update item
    const updateParams = {
        TableName: 'employees',
        Key: { id: '123' },
        UpdateExpression: 'SET salary = :salary',
        ExpressionAttributeValues: {
            ':salary': 80000
        }
    };
    dynamodb.update(updateParams);
    
    // Delete item
    const deleteParams = {
        TableName: 'employees',
        Key: { id: '123' }
    };
    dynamodb.delete(deleteParams);
    JavaScript

    Querying and Scanning

    // Query (requires partition key)
    const queryParams = {
        TableName: 'employees',
        KeyConditionExpression: 'id = :id',
        ExpressionAttributeValues: {
            ':id': '123'
        }
    };
    dynamodb.query(queryParams);
    
    // Scan (examines every item)
    const scanParams = {
        TableName: 'employees',
        FilterExpression: 'department = :dept',
        ExpressionAttributeValues: {
            ':dept': 'Engineering'
        }
    };
    dynamodb.scan(scanParams);
    JavaScript

    Cassandra

    Keyspace and Table Creation

    -- Create keyspace
    CREATE KEYSPACE company WITH replication = {
        'class': 'SimpleStrategy',
        'replication_factor': 3
    };
    
    USE company;
    
    -- Create table
    CREATE TABLE employees (
        id UUID PRIMARY KEY,
        first_name TEXT,
        last_name TEXT,
        email TEXT,
        department TEXT,
        salary DECIMAL,
        hire_date DATE
    );
    
    -- Create table with composite key
    CREATE TABLE employee_by_department (
        department TEXT,
        hire_date DATE,
        id UUID,
        first_name TEXT,
        last_name TEXT,
        salary DECIMAL,
        PRIMARY KEY (department, hire_date, id)
    );
    SQL

    CRUD Operations

    -- Insert
    INSERT INTO employees (id, first_name, last_name, email, department, salary, hire_date)
    VALUES (uuid(), 'John', 'Doe', 'john@company.com', 'Engineering', 75000, '2023-01-15');
    
    -- Select
    SELECT * FROM employees WHERE id = 123e4567-e89b-12d3-a456-426614174000;
    
    -- Update
    UPDATE employees SET salary = 80000 WHERE id = 123e4567-e89b-12d3-a456-426614174000;
    
    -- Delete
    DELETE FROM employees WHERE id = 123e4567-e89b-12d3-a456-426614174000;
    SQL

    Cassandra Query Language (CQL) Features

    -- Query with partition key
    SELECT * FROM employee_by_department WHERE department = 'Engineering';
    
    -- Query with clustering columns
    SELECT * FROM employee_by_department 
    WHERE department = 'Engineering' AND hire_date >= '2023-01-01';
    
    -- Use collections
    CREATE TABLE employee_skills (
        id UUID PRIMARY KEY,
        skills SET<TEXT>,
        projects MAP<TEXT, TEXT>
    );
    
    -- Insert with collections
    INSERT INTO employee_skills (id, skills, projects)
    VALUES (uuid(), {'Java', 'Python', 'SQL'}, {'project1': 'description1'});
    SQL

    Best Practices

    SQL Best Practices

    1. Use indexes on frequently queried columns
    2. Avoid SELECT * in production code
    3. Use parameterized queries to prevent SQL injection
    4. Normalize data to reduce redundancy
    5. Use appropriate data types for efficiency
    6. Write readable queries with proper formatting
    7. Test queries with realistic data volumes

    NoSQL Best Practices

    1. Understand your access patterns before designing schema
    2. Denormalize data for better read performance
    3. Use appropriate database type for your use case
    4. Plan for scalability from the beginning
    5. Monitor performance and optimize queries
    6. Implement proper error handling and retry logic
    7. Use connection pooling for better performance

    Security Best Practices

    1. Use parameterized queries or prepared statements
    2. Implement proper authentication and authorization
    3. Encrypt sensitive data at rest and in transit
    4. Regular security audits and updates
    5. Principle of least privilege for database users
    6. Monitor database access and unusual activities
    7. Backup and disaster recovery planning

    Common Patterns and Use Cases

    SQL Use Cases

    • OLTP Systems: Banking, e-commerce, CRM
    • Data Warehousing: Business intelligence, reporting
    • Financial Systems: Accounting, billing
    • Inventory Management: Stock tracking, supply chain

    NoSQL Use Cases

    • Document DB: Content management, catalogs, user profiles
    • Key-Value: Caching, session storage, shopping carts
    • Column-Family: Time-series data, IoT sensors, analytics
    • Graph: Social networks, fraud detection, recommendations


    Database Design Principles

    1. Database Normalization

    First Normal Form (1NF)

    • Rule: Eliminate repeating groups and ensure atomic values
    • Example: -- Violates 1NF (multiple values in one column) CREATE TABLE employees_bad ( id INT PRIMARY KEY, name VARCHAR(100), skills VARCHAR(500) -- "Java,Python,SQL" - not atomic );

    — Follows 1NF CREATE TABLE employees ( id INT PRIMARY KEY, name VARCHAR(100) );

    CREATE TABLE employee_skills ( employee_id INT, skill VARCHAR(50), PRIMARY KEY (employee_id, skill), FOREIGN KEY (employee_id) REFERENCES employees(id) );

    #### Second Normal Form (2NF)
    - **Rule**: Must be in 1NF + eliminate partial dependencies
    - **Example**:
    ```sql
    -- Violates 2NF (course_name depends only on course_id, not the full key)
    CREATE TABLE enrollments_bad (
        student_id INT,
        course_id INT,
        course_name VARCHAR(100),  -- Partial dependency
        grade CHAR(1),
        PRIMARY KEY (student_id, course_id)
    );
    
    -- Follows 2NF
    CREATE TABLE courses (
        id INT PRIMARY KEY,
        name VARCHAR(100)
    );
    
    CREATE TABLE enrollments (
        student_id INT,
        course_id INT,
        grade CHAR(1),
        PRIMARY KEY (student_id, course_id),
        FOREIGN KEY (course_id) REFERENCES courses(id)
    );
    SQL

    Third Normal Form (3NF)

    • Rule: Must be in 2NF + eliminate transitive dependencies
    • Example: -- Violates 3NF (department_name depends on department_id, not employee_id) CREATE TABLE employees_bad ( id INT PRIMARY KEY, name VARCHAR(100), department_id INT, department_name VARCHAR(100) -- Transitive dependency );

    — Follows 3NF CREATE TABLE departments ( id INT PRIMARY KEY, name VARCHAR(100) );

    CREATE TABLE employees ( id INT PRIMARY KEY, name VARCHAR(100), department_id INT, FOREIGN KEY (department_id) REFERENCES departments(id) );

    #### Boyce-Codd Normal Form (BCNF)
    - **Rule**: Every determinant must be a candidate key
    - **Example**:
    ```sql
    -- Violates BCNF
    CREATE TABLE class_schedule (
        student_id INT,
        subject VARCHAR(50),
        teacher VARCHAR(50),
        PRIMARY KEY (student_id, subject)
        -- Issue: teacher determines subject, but teacher is not a candidate key
    );
    
    -- Follows BCNF
    CREATE TABLE teachers (
        id INT PRIMARY KEY,
        name VARCHAR(50),
        subject VARCHAR(50)
    );
    
    CREATE TABLE student_classes (
        student_id INT,
        teacher_id INT,
        PRIMARY KEY (student_id, teacher_id),
        FOREIGN KEY (teacher_id) REFERENCES teachers(id)
    );
    SQL

    2. Denormalization Strategies

    When to Denormalize

    • Read-heavy workloads with complex joins
    • Performance requirements outweigh storage costs
    • Data warehouse and reporting scenarios

    Denormalization Techniques

    -- Materialized Views
    CREATE MATERIALIZED VIEW employee_summary AS
    SELECT 
        e.id,
        e.name,
        d.name as department_name,
        COUNT(p.id) as project_count,
        AVG(pr.rating) as avg_performance
    FROM employees e
    JOIN departments d ON e.department_id = d.id
    LEFT JOIN projects p ON e.id = p.lead_id
    LEFT JOIN performance_reviews pr ON e.id = pr.employee_id
    GROUP BY e.id, e.name, d.name;
    
    -- Redundant Columns for Performance
    CREATE TABLE orders (
        id INT PRIMARY KEY,
        customer_id INT,
        customer_name VARCHAR(100),  -- Denormalized for quick access
        order_total DECIMAL(10,2),   -- Calculated field stored
        item_count INT,              -- Aggregate stored
        order_date DATE
    );
    SQL

    3. Data Modeling Principles

    Entity-Relationship Design

    -- One-to-Many Relationship
    CREATE TABLE customers (
        id INT PRIMARY KEY,
        name VARCHAR(100),
        email VARCHAR(100)
    );
    
    CREATE TABLE orders (
        id INT PRIMARY KEY,
        customer_id INT,
        order_date DATE,
        FOREIGN KEY (customer_id) REFERENCES customers(id)
    );
    
    -- Many-to-Many Relationship
    CREATE TABLE students (
        id INT PRIMARY KEY,
        name VARCHAR(100)
    );
    
    CREATE TABLE courses (
        id INT PRIMARY KEY,
        name VARCHAR(100)
    );
    
    CREATE TABLE enrollments (
        student_id INT,
        course_id INT,
        enrollment_date DATE,
        PRIMARY KEY (student_id, course_id),
        FOREIGN KEY (student_id) REFERENCES students(id),
        FOREIGN KEY (course_id) REFERENCES courses(id)
    );
    
    -- Self-Referencing Relationship
    CREATE TABLE employees (
        id INT PRIMARY KEY,
        name VARCHAR(100),
        manager_id INT,
        FOREIGN KEY (manager_id) REFERENCES employees(id)
    );
    SQL

    Domain-Driven Design (DDD)

    -- Aggregate Root Pattern
    CREATE TABLE orders (
        id INT PRIMARY KEY,
        customer_id INT,
        status ENUM('pending', 'confirmed', 'shipped', 'delivered'),
        total_amount DECIMAL(10,2),
        created_at TIMESTAMP,
        version INT  -- For optimistic locking
    );
    
    -- Value Objects as Embedded Data
    CREATE TABLE addresses (
        id INT PRIMARY KEY,
        street VARCHAR(200),
        city VARCHAR(100),
        state VARCHAR(50),
        zip_code VARCHAR(20),
        country VARCHAR(50)
    );
    
    -- Bounded Context Separation
    -- Order Context
    CREATE SCHEMA order_management;
    -- Customer Context  
    CREATE SCHEMA customer_management;
    -- Inventory Context
    CREATE SCHEMA inventory_management;
    SQL

    4. NoSQL Design Principles

    Document Database Design

    // MongoDB - Embedding vs Referencing
    
    // Embedding (1:Few relationship)
    {
        _id: ObjectId("..."),
        title: "Blog Post",
        content: "...",
        author: {
            name: "John Doe",
            email: "john@example.com"
        },
        comments: [
            {
                text: "Great post!",
                author: "Jane Smith",
                date: ISODate("...")
            }
        ]
    }
    
    // Referencing (1:Many relationship)
    // Posts collection
    {
        _id: ObjectId("..."),
        title: "Blog Post",
        content: "...",
        author_id: ObjectId("...")
    }
    
    // Authors collection
    {
        _id: ObjectId("..."),
        name: "John Doe",
        email: "john@example.com",
        bio: "..."
    }
    JavaScript

    Key-Value Store Design

    // Redis design patterns
    
    // User session
    SET session:user123 '{"userId": 123, "name": "John", "lastActive": "2023-10-27"}'
    EXPIRE session:user123 3600
    
    // Shopping cart
    HSET cart:user123 product:456 2
    HSET cart:user123 product:789 1
    EXPIRE cart:user123 86400
    
    // Leaderboard
    ZADD game:leaderboard 1500 "player1"
    ZADD game:leaderboard 2000 "player2"
    JavaScript

    Column-Family Design

    -- Cassandra design principles
    
    -- Query-driven modeling
    CREATE TABLE user_activity_by_date (
        user_id UUID,
        activity_date DATE,
        activity_time TIMESTAMP,
        activity_type TEXT,
        details TEXT,
        PRIMARY KEY (user_id, activity_date, activity_time)
    ) WITH CLUSTERING ORDER BY (activity_date DESC, activity_time DESC);
    
    -- Time-series data
    CREATE TABLE sensor_data (
        sensor_id UUID,
        year INT,
        month INT,
        timestamp TIMESTAMP,
        temperature FLOAT,
        humidity FLOAT,
        PRIMARY KEY ((sensor_id, year, month), timestamp)
    );
    SQL

    Database Architecture Patterns

    1. Monolithic Database Architecture

    Single Database Pattern

    -- All application data in one database
    CREATE DATABASE ecommerce;
    
    USE ecommerce;
    
    -- User management
    CREATE TABLE users (
        id INT PRIMARY KEY AUTO_INCREMENT,
        username VARCHAR(50),
        email VARCHAR(100),
        password_hash VARCHAR(255)
    );
    
    -- Product catalog
    CREATE TABLE products (
        id INT PRIMARY KEY AUTO_INCREMENT,
        name VARCHAR(200),
        price DECIMAL(10,2),
        inventory_count INT
    );
    
    -- Order processing
    CREATE TABLE orders (
        id INT PRIMARY KEY AUTO_INCREMENT,
        user_id INT,
        total_amount DECIMAL(10,2),
        status ENUM('pending', 'confirmed', 'shipped'),
        FOREIGN KEY (user_id) REFERENCES users(id)
    );
    SQL

    Pros: Simple, ACID compliance, easy transactions Cons: Single point of failure, scaling limitations

    2. Database per Service (Microservices)

    Service Separation

    -- User Service Database
    CREATE DATABASE user_service;
    CREATE TABLE users (
        id INT PRIMARY KEY,
        username VARCHAR(50),
        email VARCHAR(100)
    );
    
    -- Product Service Database  
    CREATE DATABASE product_service;
    CREATE TABLE products (
        id INT PRIMARY KEY,
        name VARCHAR(200),
        price DECIMAL(10,2)
    );
    
    -- Order Service Database
    CREATE DATABASE order_service;
    CREATE TABLE orders (
        id INT PRIMARY KEY,
        user_id INT,  -- Reference to user service
        product_id INT,  -- Reference to product service
        quantity INT
    );
    SQL

    Pros: Independent scaling, technology diversity, fault isolation Cons: Complex transactions, data consistency challenges

    3. CQRS (Command Query Responsibility Segregation)

    Separate Read/Write Models

    -- Write Model (Normalized for consistency)
    CREATE TABLE commands_orders (
        id INT PRIMARY KEY AUTO_INCREMENT,
        user_id INT,
        status VARCHAR(20),
        created_at TIMESTAMP
    );
    
    CREATE TABLE commands_order_items (
        order_id INT,
        product_id INT,
        quantity INT,
        price DECIMAL(10,2),
        PRIMARY KEY (order_id, product_id)
    );
    
    -- Read Model (Denormalized for performance)
    CREATE TABLE queries_order_summary (
        order_id INT PRIMARY KEY,
        user_name VARCHAR(100),
        total_items INT,
        total_amount DECIMAL(10,2),
        order_status VARCHAR(20),
        created_at TIMESTAMP
    );
    
    CREATE TABLE queries_user_orders (
        user_id INT,
        order_id INT,
        order_date DATE,
        total_amount DECIMAL(10,2),
        INDEX idx_user_date (user_id, order_date)
    );
    SQL

    4. Event Sourcing

    Event Store Design

    CREATE TABLE event_store (
        event_id BIGINT PRIMARY KEY AUTO_INCREMENT,
        aggregate_id VARCHAR(100),
        aggregate_type VARCHAR(50),
        event_type VARCHAR(100),
        event_data JSON,
        event_version INT,
        created_at TIMESTAMP,
        INDEX idx_aggregate (aggregate_id, event_version)
    );
    
    -- Example events
    INSERT INTO event_store (aggregate_id, aggregate_type, event_type, event_data, event_version)
    VALUES 
    ('order-123', 'Order', 'OrderCreated', 
     '{"userId": "user-456", "items": [{"productId": "prod-789", "quantity": 2}]}', 1),
    ('order-123', 'Order', 'OrderConfirmed', 
     '{"confirmedAt": "2023-10-27T10:00:00Z"}', 2);
    
    -- Projection/Snapshot table
    CREATE TABLE order_projections (
        order_id VARCHAR(100) PRIMARY KEY,
        user_id VARCHAR(100),
        status VARCHAR(20),
        total_amount DECIMAL(10,2),
        last_event_version INT,
        updated_at TIMESTAMP
    );
    SQL

    5. Polyglot Persistence

    Multiple Database Technologies

    // User profiles - Document DB (MongoDB)
    db.users.insertOne({
        _id: ObjectId("..."),
        username: "john_doe",
        profile: {
            firstName: "John",
            lastName: "Doe",
            preferences: {
                theme: "dark",
                notifications: true
            }
        }
    });
    
    // Product catalog - Search engine (Elasticsearch)
    PUT /products/_doc/1
    {
        "name": "Laptop",
        "description": "High-performance laptop",
        "price": 999.99,
        "categories": ["electronics", "computers"],
        "specs": {
            "cpu": "Intel i7",
            "ram": "16GB"
        }
    }
    
    // Shopping cart - Key-value (Redis)
    HSET cart:user123 product:456 2
    HSET cart:user123 product:789 1
    
    // Financial transactions - RDBMS (PostgreSQL)
    CREATE TABLE transactions (
        id SERIAL PRIMARY KEY,
        user_id INT,
        amount DECIMAL(15,2),
        transaction_type VARCHAR(20),
        created_at TIMESTAMP
    );
    
    // Analytics - Column store (Cassandra)
    CREATE TABLE user_events (
        user_id UUID,
        event_date DATE,
        event_time TIMESTAMP,
        event_type TEXT,
        properties MAP<TEXT, TEXT>,
        PRIMARY KEY (user_id, event_date, event_time)
    );
    JavaScript

    6. Sharding Strategies

    Horizontal Partitioning

    -- Hash-based sharding
    CREATE TABLE users_shard_0 AS SELECT * FROM users WHERE MOD(id, 4) = 0;
    CREATE TABLE users_shard_1 AS SELECT * FROM users WHERE MOD(id, 4) = 1;
    CREATE TABLE users_shard_2 AS SELECT * FROM users WHERE MOD(id, 4) = 2;
    CREATE TABLE users_shard_3 AS SELECT * FROM users WHERE MOD(id, 4) = 3;
    
    -- Range-based sharding
    CREATE TABLE orders_2023 AS SELECT * FROM orders WHERE YEAR(created_at) = 2023;
    CREATE TABLE orders_2024 AS SELECT * FROM orders WHERE YEAR(created_at) = 2024;
    
    -- Geographic sharding
    CREATE TABLE users_us AS SELECT * FROM users WHERE country = 'US';
    CREATE TABLE users_eu AS SELECT * FROM users WHERE country IN ('DE', 'FR', 'UK');
    CREATE TABLE users_asia AS SELECT * FROM users WHERE country IN ('JP', 'SG', 'KR');
    SQL

    Vertical Partitioning

    -- Split frequently accessed vs rarely accessed columns
    CREATE TABLE user_core (
        id INT PRIMARY KEY,
        username VARCHAR(50),
        email VARCHAR(100),
        last_login TIMESTAMP
    );
    
    CREATE TABLE user_profile (
        user_id INT PRIMARY KEY,
        full_name VARCHAR(200),
        bio TEXT,
        avatar_url VARCHAR(500),
        created_at TIMESTAMP,
        FOREIGN KEY (user_id) REFERENCES user_core(id)
    );
    SQL

    7. Master-Slave Replication

    Read Replicas

    -- Master (Write operations)
    -- All INSERT, UPDATE, DELETE operations
    
    -- Slave/Replica (Read operations)
    -- Configuration in application layer
    class DatabaseConnection {
        private $master_db;
        private $slave_db;
    
        public function write($query) {
            return $this->master_db->query($query);
        }
    
        public function read($query) {
            return $this->slave_db->query($query);
        }
    }
    SQL

    8. Federation (Split by Function)

    Functional Database Splitting

    -- User Database
    CREATE DATABASE user_system;
    USE user_system;
    CREATE TABLE users (...);
    CREATE TABLE user_profiles (...);
    
    -- Product Database
    CREATE DATABASE product_system;
    USE product_system;
    CREATE TABLE products (...);
    CREATE TABLE categories (...);
    
    -- Order Database  
    CREATE DATABASE order_system;
    USE order_system;
    CREATE TABLE orders (...);
    CREATE TABLE order_items (...);
    SQL

    Performance & Optimization

    1. Indexing Strategies

    Index Types and Usage

    -- B-Tree Index (default)
    CREATE INDEX idx_user_email ON users(email);
    
    -- Composite Index
    CREATE INDEX idx_order_user_date ON orders(user_id, order_date);
    
    -- Covering Index
    CREATE INDEX idx_order_covering ON orders(user_id, order_date) 
    INCLUDE (total_amount, status);
    
    -- Partial Index
    CREATE INDEX idx_active_users ON users(email) WHERE is_active = true;
    
    -- Functional Index
    CREATE INDEX idx_upper_lastname ON users(UPPER(last_name));
    
    -- Full-text Index
    CREATE FULLTEXT INDEX idx_product_search ON products(name, description);
    SQL

    Index Optimization

    -- Analyze index usage
    SHOW INDEX FROM users;
    EXPLAIN SELECT * FROM users WHERE email = 'john@example.com';
    
    -- Drop unused indexes
    DROP INDEX idx_unused ON table_name;
    
    -- Rebuild fragmented indexes
    ALTER TABLE users ENGINE=InnoDB;  -- MySQL
    REINDEX TABLE users;  -- PostgreSQL
    SQL

    2. Query Optimization

    Query Planning

    -- Use EXPLAIN to analyze queries
    EXPLAIN ANALYZE 
    SELECT u.name, COUNT(o.id) as order_count
    FROM users u
    LEFT JOIN orders o ON u.id = o.user_id
    WHERE u.created_at >= '2023-01-01'
    GROUP BY u.id, u.name
    HAVING COUNT(o.id) > 5;
    
    -- Optimize with proper indexing
    CREATE INDEX idx_user_created ON users(created_at);
    CREATE INDEX idx_order_user ON orders(user_id);
    SQL

    Query Rewriting

    -- Instead of using OR conditions
    SELECT * FROM products WHERE category = 'electronics' OR category = 'computers';
    
    -- Use UNION for better performance
    SELECT * FROM products WHERE category = 'electronics'
    UNION ALL
    SELECT * FROM products WHERE category = 'computers';
    
    -- Instead of correlated subqueries
    SELECT * FROM users u 
    WHERE EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id);
    
    -- Use JOIN
    SELECT DISTINCT u.* FROM users u
    INNER JOIN orders o ON u.id = o.user_id;
    SQL

    3. Caching Strategies

    Application-Level Caching

    # Redis caching example
    import redis
    import json
    
    cache = redis.Redis(host='localhost', port=6379)
    
    def get_user(user_id):
        # Try cache first
        cached_user = cache.get(f"user:{user_id}")
        if cached_user:
            return json.loads(cached_user)
    
        # Query database
        user = database.query("SELECT * FROM users WHERE id = %s", user_id)
    
        # Cache result
        cache.setex(f"user:{user_id}", 3600, json.dumps(user))
        return user
    Python

    Database-Level Caching

    -- MySQL Query Cache (deprecated in 8.0)
    SET global query_cache_type = ON;
    SET global query_cache_size = 268435456;  -- 256MB
    
    -- Buffer Pool optimization
    SET innodb_buffer_pool_size = 2G;
    
    -- Connection pooling
    SET max_connections = 200;
    SET innodb_thread_concurrency = 8;
    SQL

    4. Partitioning

    Table Partitioning

    -- Range Partitioning
    CREATE TABLE sales (
        id INT,
        sale_date DATE,
        amount DECIMAL(10,2)
    )
    PARTITION BY RANGE (YEAR(sale_date)) (
        PARTITION p2022 VALUES LESS THAN (2023),
        PARTITION p2023 VALUES LESS THAN (2024),
        PARTITION p2024 VALUES LESS THAN (2025),
        PARTITION p_future VALUES LESS THAN MAXVALUE
    );
    
    -- Hash Partitioning
    CREATE TABLE users (
        id INT,
        name VARCHAR(100)
    )
    PARTITION BY HASH(id)
    PARTITIONS 4;
    
    -- List Partitioning
    CREATE TABLE orders (
        id INT,
        region VARCHAR(20)
    )
    PARTITION BY LIST COLUMNS(region) (
        PARTITION p_north VALUES IN ('north', 'northeast'),
        PARTITION p_south VALUES IN ('south', 'southeast'),
        PARTITION p_west VALUES IN ('west', 'southwest')
    );
    SQL

    5. Connection Management

    Connection Pooling

    # Python connection pooling example
    from sqlalchemy import create_engine
    from sqlalchemy.pool import QueuePool
    
    engine = create_engine(
        'mysql://user:password@localhost/database',
        poolclass=QueuePool,
        pool_size=20,
        max_overflow=30,
        pool_recycle=3600,
        pool_pre_ping=True
    )
    Python

    6. Monitoring and Profiling

    Performance Monitoring

    -- MySQL Performance Schema
    SELECT * FROM performance_schema.events_statements_summary_by_digest
    ORDER BY sum_timer_wait DESC LIMIT 10;
    
    -- PostgreSQL pg_stat_statements
    SELECT query, calls, total_time, mean_time 
    FROM pg_stat_statements 
    ORDER BY mean_time DESC LIMIT 10;
    
    -- Index usage statistics
    SELECT 
        schemaname,
        tablename,
        indexname,
        idx_scan,
        idx_tup_read,
        idx_tup_fetch
    FROM pg_stat_user_indexes
    ORDER BY idx_scan DESC;
    SQL

    7. Hardware Optimization

    Storage Considerations

    • SSD vs HDD: Use SSDs for better I/O performance
    • RAID Configuration: RAID 10 for high performance, RAID 5/6 for storage efficiency
    • Separate disks: OS, logs, data, and temp files on separate drives

    Memory Optimization

    -- MySQL memory settings
    SET innodb_buffer_pool_size = 8G;  -- 70-80% of available RAM
    SET innodb_log_buffer_size = 256M;
    SET key_buffer_size = 512M;
    
    -- PostgreSQL memory settings
    -- In postgresql.conf
    shared_buffers = 2GB
    effective_cache_size = 6GB
    work_mem = 64MB
    maintenance_work_mem = 512MB
    SQL

    This comprehensive addition covers essential database design principles and architecture patterns that are crucial for building scalable, maintainable database systems. The content includes both theoretical concepts and practical examples for immediate application.


    Discover more from Altgr Blog

    Subscribe to get the latest posts sent to your email.

    Leave a Reply

    Your email address will not be published. Required fields are marked *