Table of Contents
- SQL Fundamentals
- Data Definition Language (DDL)
- Data Manipulation Language (DML)
- Data Query Language (DQL)
- Joins
- Aggregation & Functions
- Advanced SQL
- NoSQL Databases
- MongoDB
- Redis
- DynamoDB
- Cassandra
- Database Design Principles
- Database Architecture Patterns
- Performance & Optimization
SQL Fundamentals
Basic Syntax
-- Comments start with --
/* Multi-line
comments */
-- SQL is case-insensitive but KEYWORDS are conventionally uppercase
SELECT column1, column2 FROM table_name;SQLData Types
| Type | Description | Example |
|---|---|---|
INT | Integer | 123 |
VARCHAR(n) | Variable character string | 'Hello' |
CHAR(n) | Fixed character string | 'ABC' |
TEXT | Large text | 'Long text...' |
DECIMAL(p,s) | Decimal number | 123.45 |
DATE | Date | '2023-12-25' |
DATETIME | Date and time | '2023-12-25 10:30:00' |
BOOLEAN | True/False | TRUE, FALSE |
Data Definition Language (DDL)
Creating Database
-- Create database
CREATE DATABASE company_db;
-- Use database
USE company_db;
-- Drop database
DROP DATABASE company_db;SQLCreating Tables
-- Basic table creation
CREATE TABLE employees (
id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50) NOT NULL,
last_name VARCHAR(50) NOT NULL,
email VARCHAR(100) UNIQUE,
department_id INT,
salary DECIMAL(10,2),
hire_date DATE,
is_active BOOLEAN DEFAULT TRUE
);
-- Table with foreign key
CREATE TABLE departments (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(50) NOT NULL,
location VARCHAR(100)
);
-- Add foreign key constraint
ALTER TABLE employees
ADD CONSTRAINT fk_department
FOREIGN KEY (department_id) REFERENCES departments(id);SQLModifying Tables
-- Add column
ALTER TABLE employees ADD COLUMN phone VARCHAR(15);
-- Modify column
ALTER TABLE employees MODIFY COLUMN phone VARCHAR(20);
-- Drop column
ALTER TABLE employees DROP COLUMN phone;
-- Add index
CREATE INDEX idx_employee_email ON employees(email);
-- Drop index
DROP INDEX idx_employee_email ON employees;
-- Drop table
DROP TABLE employees;SQLData Manipulation Language (DML)
INSERT Operations
-- Single row insert
INSERT INTO employees (first_name, last_name, email, department_id, salary, hire_date)
VALUES ('John', 'Doe', 'john.doe@company.com', 1, 75000.00, '2023-01-15');
-- Multiple rows insert
INSERT INTO employees (first_name, last_name, email, department_id, salary, hire_date)
VALUES
('Jane', 'Smith', 'jane.smith@company.com', 2, 80000.00, '2023-02-01'),
('Bob', 'Johnson', 'bob.johnson@company.com', 1, 70000.00, '2023-02-15');
-- Insert from another table
INSERT INTO employees_backup
SELECT * FROM employees WHERE hire_date < '2023-01-01';SQLUPDATE Operations
-- Update single record
UPDATE employees
SET salary = 85000.00
WHERE id = 1;
-- Update multiple records
UPDATE employees
SET salary = salary * 1.1
WHERE department_id = 1;
-- Update with JOIN
UPDATE employees e
JOIN departments d ON e.department_id = d.id
SET e.salary = e.salary * 1.05
WHERE d.name = 'Engineering';SQLDELETE Operations
-- Delete specific records
DELETE FROM employees WHERE id = 1;
-- Delete with condition
DELETE FROM employees WHERE hire_date < '2020-01-01';
-- Delete all records (but keep table structure)
DELETE FROM employees;
-- Truncate table (faster than DELETE)
TRUNCATE TABLE employees;SQLData Query Language (DQL)
Basic SELECT
-- Select all columns
SELECT * FROM employees;
-- Select specific columns
SELECT first_name, last_name, salary FROM employees;
-- Select with alias
SELECT
first_name AS 'First Name',
last_name AS 'Last Name',
salary * 12 AS 'Annual Salary'
FROM employees;
-- Select distinct values
SELECT DISTINCT department_id FROM employees;SQLWHERE Clause
-- Basic conditions
SELECT * FROM employees WHERE salary > 75000;
SELECT * FROM employees WHERE department_id = 1;
SELECT * FROM employees WHERE first_name = 'John';
-- Multiple conditions
SELECT * FROM employees
WHERE salary > 70000 AND department_id = 1;
SELECT * FROM employees
WHERE department_id = 1 OR department_id = 2;
-- Pattern matching
SELECT * FROM employees WHERE first_name LIKE 'J%';
SELECT * FROM employees WHERE email LIKE '%@company.com';
-- Range conditions
SELECT * FROM employees WHERE salary BETWEEN 70000 AND 90000;
SELECT * FROM employees WHERE hire_date BETWEEN '2023-01-01' AND '2023-12-31';
-- IN clause
SELECT * FROM employees WHERE department_id IN (1, 2, 3);
-- NULL checks
SELECT * FROM employees WHERE email IS NULL;
SELECT * FROM employees WHERE email IS NOT NULL;SQLORDER BY
-- Sort ascending (default)
SELECT * FROM employees ORDER BY salary;
-- Sort descending
SELECT * FROM employees ORDER BY salary DESC;
-- Multiple column sorting
SELECT * FROM employees ORDER BY department_id, salary DESC;SQLLIMIT and OFFSET
-- Limit results
SELECT * FROM employees LIMIT 10;
-- Pagination
SELECT * FROM employees LIMIT 10 OFFSET 20;
-- MySQL shorthand for pagination
SELECT * FROM employees LIMIT 20, 10;SQLJoins
INNER JOIN
-- Basic inner join
SELECT e.first_name, e.last_name, d.name AS department
FROM employees e
INNER JOIN departments d ON e.department_id = d.id;
-- Multiple table join
SELECT e.first_name, e.last_name, d.name AS department, p.title AS project
FROM employees e
INNER JOIN departments d ON e.department_id = d.id
INNER JOIN projects p ON e.id = p.employee_id;SQLLEFT JOIN
-- Include all employees, even without department
SELECT e.first_name, e.last_name, d.name AS department
FROM employees e
LEFT JOIN departments d ON e.department_id = d.id;SQLRIGHT JOIN
-- Include all departments, even without employees
SELECT e.first_name, e.last_name, d.name AS department
FROM employees e
RIGHT JOIN departments d ON e.department_id = d.id;SQLFULL OUTER JOIN
-- Include all records from both tables
SELECT e.first_name, e.last_name, d.name AS department
FROM employees e
FULL OUTER JOIN departments d ON e.department_id = d.id;SQLCROSS JOIN
-- Cartesian product of both tables
SELECT e.first_name, d.name
FROM employees e
CROSS JOIN departments d;SQLSelf JOIN
-- Join table with itself
SELECT
e1.first_name AS employee,
e2.first_name AS manager
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.id;SQLAggregation & Functions
Aggregate Functions
-- Count records
SELECT COUNT(*) FROM employees;
SELECT COUNT(DISTINCT department_id) FROM employees;
-- Sum values
SELECT SUM(salary) FROM employees;
-- Average
SELECT AVG(salary) FROM employees;
-- Min and Max
SELECT MIN(salary), MAX(salary) FROM employees;
-- Group by
SELECT department_id, COUNT(*), AVG(salary)
FROM employees
GROUP BY department_id;
-- Having clause (filter groups)
SELECT department_id, COUNT(*) as emp_count
FROM employees
GROUP BY department_id
HAVING COUNT(*) > 5;SQLString Functions
-- Concatenation
SELECT CONCAT(first_name, ' ', last_name) AS full_name FROM employees;
-- Substring
SELECT SUBSTRING(email, 1, 5) FROM employees;
-- Length
SELECT LENGTH(first_name) FROM employees;
-- Upper/Lower case
SELECT UPPER(first_name), LOWER(last_name) FROM employees;
-- Trim whitespace
SELECT TRIM(first_name) FROM employees;
-- Replace
SELECT REPLACE(email, '@company.com', '@newcompany.com') FROM employees;SQLDate Functions
-- Current date/time
SELECT NOW(), CURDATE(), CURTIME();
-- Date formatting
SELECT DATE_FORMAT(hire_date, '%Y-%m-%d') FROM employees;
-- Date arithmetic
SELECT hire_date, DATE_ADD(hire_date, INTERVAL 1 YEAR) AS anniversary
FROM employees;
-- Date difference
SELECT DATEDIFF(NOW(), hire_date) AS days_employed FROM employees;
-- Extract parts
SELECT YEAR(hire_date), MONTH(hire_date), DAY(hire_date) FROM employees;SQLMathematical Functions
-- Round numbers
SELECT ROUND(salary, 2) FROM employees;
-- Ceiling and Floor
SELECT CEIL(salary/12), FLOOR(salary/12) FROM employees;
-- Absolute value
SELECT ABS(-100);
-- Power
SELECT POWER(salary, 2) FROM employees;
-- Random
SELECT RAND();SQLAdvanced SQL
Subqueries
-- Scalar subquery
SELECT * FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
-- Correlated subquery
SELECT * FROM employees e1
WHERE salary > (
SELECT AVG(salary)
FROM employees e2
WHERE e2.department_id = e1.department_id
);
-- EXISTS subquery
SELECT * FROM departments d
WHERE EXISTS (
SELECT 1 FROM employees e
WHERE e.department_id = d.id
);
-- IN subquery
SELECT * FROM employees
WHERE department_id IN (
SELECT id FROM departments
WHERE location = 'New York'
);SQLCommon Table Expressions (CTE)
-- Basic CTE
WITH high_earners AS (
SELECT * FROM employees WHERE salary > 80000
)
SELECT department_id, COUNT(*)
FROM high_earners
GROUP BY department_id;
-- Recursive CTE (organizational hierarchy)
WITH RECURSIVE employee_hierarchy AS (
-- Base case: top-level managers
SELECT id, first_name, last_name, manager_id, 1 as level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case
SELECT e.id, e.first_name, e.last_name, e.manager_id, eh.level + 1
FROM employees e
INNER JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT * FROM employee_hierarchy ORDER BY level, id;SQLWindow Functions
-- Row number
SELECT
first_name,
last_name,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) as rank
FROM employees;
-- Rank and Dense Rank
SELECT
first_name,
last_name,
salary,
RANK() OVER (ORDER BY salary DESC) as rank,
DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank
FROM employees;
-- Partition by
SELECT
first_name,
last_name,
department_id,
salary,
RANK() OVER (PARTITION BY department_id ORDER BY salary DESC) as dept_rank
FROM employees;
-- Lead and Lag
SELECT
first_name,
salary,
LAG(salary) OVER (ORDER BY hire_date) as prev_salary,
LEAD(salary) OVER (ORDER BY hire_date) as next_salary
FROM employees;
-- Running totals
SELECT
first_name,
salary,
SUM(salary) OVER (ORDER BY hire_date ROWS UNBOUNDED PRECEDING) as running_total
FROM employees;SQLStored Procedures
-- Create stored procedure
DELIMITER //
CREATE PROCEDURE GetEmployeesByDepartment(IN dept_id INT)
BEGIN
SELECT * FROM employees WHERE department_id = dept_id;
END //
DELIMITER ;
-- Call stored procedure
CALL GetEmployeesByDepartment(1);
-- Procedure with OUT parameter
DELIMITER //
CREATE PROCEDURE GetDepartmentStats(
IN dept_id INT,
OUT emp_count INT,
OUT avg_salary DECIMAL(10,2)
)
BEGIN
SELECT COUNT(*), AVG(salary)
INTO emp_count, avg_salary
FROM employees
WHERE department_id = dept_id;
END //
DELIMITER ;
-- Call with OUT parameters
CALL GetDepartmentStats(1, @count, @avg);
SELECT @count, @avg;SQLTriggers
-- Create trigger
CREATE TRIGGER salary_audit_trigger
AFTER UPDATE ON employees
FOR EACH ROW
BEGIN
IF NEW.salary != OLD.salary THEN
INSERT INTO salary_audit (employee_id, old_salary, new_salary, change_date)
VALUES (NEW.id, OLD.salary, NEW.salary, NOW());
END IF;
END;
-- Drop trigger
DROP TRIGGER salary_audit_trigger;SQLNoSQL Databases
Types of NoSQL Databases
- Document Databases (MongoDB, CouchDB)
- Store data as documents (JSON-like)
- Flexible schema
- Good for content management, catalogs
- Key-Value Stores (Redis, DynamoDB)
- Simple key-value pairs
- Fast lookups
- Good for caching, session storage
- Column-Family (Cassandra, HBase)
- Data stored in column families
- Good for time-series data, analytics
- Graph Databases (Neo4j, Amazon Neptune)
- Data stored as nodes and relationships
- Good for social networks, recommendations
MongoDB
Basic Operations
// Connect to database
use company_db
// Insert documents
db.employees.insertOne({
firstName: "John",
lastName: "Doe",
email: "john.doe@company.com",
department: "Engineering",
salary: 75000,
skills: ["JavaScript", "Python", "SQL"],
address: {
street: "123 Main St",
city: "New York",
zipCode: "10001"
}
})
// Insert multiple documents
db.employees.insertMany([
{
firstName: "Jane",
lastName: "Smith",
email: "jane.smith@company.com",
department: "Marketing",
salary: 70000,
skills: ["Marketing", "Analytics"]
},
{
firstName: "Bob",
lastName: "Johnson",
email: "bob.johnson@company.com",
department: "Engineering",
salary: 80000,
skills: ["Java", "Spring", "MongoDB"]
}
])JavaScriptQuerying Documents
// Find all documents
db.employees.find()
// Find with condition
db.employees.find({department: "Engineering"})
// Find with multiple conditions
db.employees.find({
department: "Engineering",
salary: {$gt: 70000}
})
// Find one document
db.employees.findOne({email: "john.doe@company.com"})
// Projection (select specific fields)
db.employees.find({}, {firstName: 1, lastName: 1, salary: 1})
// Query nested documents
db.employees.find({"address.city": "New York"})
// Query arrays
db.employees.find({skills: "JavaScript"})
db.employees.find({skills: {$in: ["Python", "Java"]}})JavaScriptQuery Operators
// Comparison operators
db.employees.find({salary: {$gt: 75000}}) // greater than
db.employees.find({salary: {$gte: 75000}}) // greater than or equal
db.employees.find({salary: {$lt: 80000}}) // less than
db.employees.find({salary: {$lte: 80000}}) // less than or equal
db.employees.find({salary: {$ne: 75000}}) // not equal
db.employees.find({department: {$in: ["Engineering", "Marketing"]}})
// Logical operators
db.employees.find({
$and: [
{department: "Engineering"},
{salary: {$gt: 70000}}
]
})
db.employees.find({
$or: [
{department: "Engineering"},
{department: "Marketing"}
]
})
// Text search
db.employees.find({$text: {$search: "engineer"}})
// Regular expressions
db.employees.find({email: /company\.com$/})JavaScriptUpdating Documents
// Update one document
db.employees.updateOne(
{email: "john.doe@company.com"},
{$set: {salary: 85000}}
)
// Update multiple documents
db.employees.updateMany(
{department: "Engineering"},
{$inc: {salary: 5000}}
)
// Add to array
db.employees.updateOne(
{email: "john.doe@company.com"},
{$push: {skills: "React"}}
)
// Remove from array
db.employees.updateOne(
{email: "john.doe@company.com"},
{$pull: {skills: "SQL"}}
)
// Upsert (insert if not exists)
db.employees.updateOne(
{email: "new.employee@company.com"},
{$set: {firstName: "New", lastName: "Employee"}},
{upsert: true}
)JavaScriptAggregation Pipeline
// Group by department and calculate average salary
db.employees.aggregate([
{
$group: {
_id: "$department",
avgSalary: {$avg: "$salary"},
count: {$sum: 1}
}
}
])
// Match, group, and sort
db.employees.aggregate([
{$match: {salary: {$gt: 70000}}},
{
$group: {
_id: "$department",
avgSalary: {$avg: "$salary"}
}
},
{$sort: {avgSalary: -1}}
])
// Unwind arrays
db.employees.aggregate([
{$unwind: "$skills"},
{
$group: {
_id: "$skills",
count: {$sum: 1}
}
}
])JavaScriptIndexing
// Create index
db.employees.createIndex({email: 1})
db.employees.createIndex({department: 1, salary: -1})
// Text index
db.employees.createIndex({firstName: "text", lastName: "text"})
// List indexes
db.employees.getIndexes()
// Drop index
db.employees.dropIndex({email: 1})JavaScriptRedis
Basic Data Types and Operations
# Strings
SET name "John Doe"
GET name
INCR counter
DECR counter
EXPIRE name 3600 # Set expiration in seconds
# Lists
LPUSH queue "task1"
RPUSH queue "task2"
LPOP queue
RPOP queue
LRANGE queue 0 -1 # Get all elements
# Sets
SADD skills "JavaScript"
SADD skills "Python"
SMEMBERS skills
SISMEMBER skills "JavaScript"
# Hashes
HSET user:1 name "John" email "john@example.com"
HGET user:1 name
HGETALL user:1
HDEL user:1 email
# Sorted Sets
ZADD leaderboard 100 "player1"
ZADD leaderboard 150 "player2"
ZRANGE leaderboard 0 -1 WITHSCORES
ZRANK leaderboard "player1"JavaScriptAdvanced Redis Operations
# Transactions
MULTI
SET key1 "value1"
SET key2 "value2"
EXEC
# Pub/Sub
PUBLISH channel "message"
SUBSCRIBE channel
# Lua Scripts
EVAL "return redis.call('get', KEYS[1])" 1 mykey
# Pipeline commands for better performance
redis-cli --pipe < commands.txtJavaScriptDynamoDB
Table Operations
// Create table (AWS SDK)
const params = {
TableName: 'employees',
KeySchema: [
{ AttributeName: 'id', KeyType: 'HASH' }
],
AttributeDefinitions: [
{ AttributeName: 'id', AttributeType: 'S' }
],
BillingMode: 'PAY_PER_REQUEST'
};
dynamodb.createTable(params);JavaScriptCRUD Operations
// Put item
const putParams = {
TableName: 'employees',
Item: {
id: '123',
firstName: 'John',
lastName: 'Doe',
department: 'Engineering',
salary: 75000
}
};
dynamodb.put(putParams);
// Get item
const getParams = {
TableName: 'employees',
Key: {
id: '123'
}
};
dynamodb.get(getParams);
// Update item
const updateParams = {
TableName: 'employees',
Key: { id: '123' },
UpdateExpression: 'SET salary = :salary',
ExpressionAttributeValues: {
':salary': 80000
}
};
dynamodb.update(updateParams);
// Delete item
const deleteParams = {
TableName: 'employees',
Key: { id: '123' }
};
dynamodb.delete(deleteParams);JavaScriptQuerying and Scanning
// Query (requires partition key)
const queryParams = {
TableName: 'employees',
KeyConditionExpression: 'id = :id',
ExpressionAttributeValues: {
':id': '123'
}
};
dynamodb.query(queryParams);
// Scan (examines every item)
const scanParams = {
TableName: 'employees',
FilterExpression: 'department = :dept',
ExpressionAttributeValues: {
':dept': 'Engineering'
}
};
dynamodb.scan(scanParams);JavaScriptCassandra
Keyspace and Table Creation
-- Create keyspace
CREATE KEYSPACE company WITH replication = {
'class': 'SimpleStrategy',
'replication_factor': 3
};
USE company;
-- Create table
CREATE TABLE employees (
id UUID PRIMARY KEY,
first_name TEXT,
last_name TEXT,
email TEXT,
department TEXT,
salary DECIMAL,
hire_date DATE
);
-- Create table with composite key
CREATE TABLE employee_by_department (
department TEXT,
hire_date DATE,
id UUID,
first_name TEXT,
last_name TEXT,
salary DECIMAL,
PRIMARY KEY (department, hire_date, id)
);SQLCRUD Operations
-- Insert
INSERT INTO employees (id, first_name, last_name, email, department, salary, hire_date)
VALUES (uuid(), 'John', 'Doe', 'john@company.com', 'Engineering', 75000, '2023-01-15');
-- Select
SELECT * FROM employees WHERE id = 123e4567-e89b-12d3-a456-426614174000;
-- Update
UPDATE employees SET salary = 80000 WHERE id = 123e4567-e89b-12d3-a456-426614174000;
-- Delete
DELETE FROM employees WHERE id = 123e4567-e89b-12d3-a456-426614174000;SQLCassandra Query Language (CQL) Features
-- Query with partition key
SELECT * FROM employee_by_department WHERE department = 'Engineering';
-- Query with clustering columns
SELECT * FROM employee_by_department
WHERE department = 'Engineering' AND hire_date >= '2023-01-01';
-- Use collections
CREATE TABLE employee_skills (
id UUID PRIMARY KEY,
skills SET<TEXT>,
projects MAP<TEXT, TEXT>
);
-- Insert with collections
INSERT INTO employee_skills (id, skills, projects)
VALUES (uuid(), {'Java', 'Python', 'SQL'}, {'project1': 'description1'});SQLBest Practices
SQL Best Practices
- Use indexes on frequently queried columns
- Avoid SELECT * in production code
- Use parameterized queries to prevent SQL injection
- Normalize data to reduce redundancy
- Use appropriate data types for efficiency
- Write readable queries with proper formatting
- Test queries with realistic data volumes
NoSQL Best Practices
- Understand your access patterns before designing schema
- Denormalize data for better read performance
- Use appropriate database type for your use case
- Plan for scalability from the beginning
- Monitor performance and optimize queries
- Implement proper error handling and retry logic
- Use connection pooling for better performance
Security Best Practices
- Use parameterized queries or prepared statements
- Implement proper authentication and authorization
- Encrypt sensitive data at rest and in transit
- Regular security audits and updates
- Principle of least privilege for database users
- Monitor database access and unusual activities
- Backup and disaster recovery planning
Common Patterns and Use Cases
SQL Use Cases
- OLTP Systems: Banking, e-commerce, CRM
- Data Warehousing: Business intelligence, reporting
- Financial Systems: Accounting, billing
- Inventory Management: Stock tracking, supply chain
NoSQL Use Cases
- Document DB: Content management, catalogs, user profiles
- Key-Value: Caching, session storage, shopping carts
- Column-Family: Time-series data, IoT sensors, analytics
- Graph: Social networks, fraud detection, recommendations
Database Design Principles
1. Database Normalization
First Normal Form (1NF)
- Rule: Eliminate repeating groups and ensure atomic values
- Example:
-- Violates 1NF (multiple values in one column) CREATE TABLE employees_bad ( id INT PRIMARY KEY, name VARCHAR(100), skills VARCHAR(500) -- "Java,Python,SQL" - not atomic );
— Follows 1NF CREATE TABLE employees ( id INT PRIMARY KEY, name VARCHAR(100) );
CREATE TABLE employee_skills ( employee_id INT, skill VARCHAR(50), PRIMARY KEY (employee_id, skill), FOREIGN KEY (employee_id) REFERENCES employees(id) );
#### Second Normal Form (2NF)
- **Rule**: Must be in 1NF + eliminate partial dependencies
- **Example**:
```sql
-- Violates 2NF (course_name depends only on course_id, not the full key)
CREATE TABLE enrollments_bad (
student_id INT,
course_id INT,
course_name VARCHAR(100), -- Partial dependency
grade CHAR(1),
PRIMARY KEY (student_id, course_id)
);
-- Follows 2NF
CREATE TABLE courses (
id INT PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE enrollments (
student_id INT,
course_id INT,
grade CHAR(1),
PRIMARY KEY (student_id, course_id),
FOREIGN KEY (course_id) REFERENCES courses(id)
);SQLThird Normal Form (3NF)
- Rule: Must be in 2NF + eliminate transitive dependencies
- Example:
-- Violates 3NF (department_name depends on department_id, not employee_id) CREATE TABLE employees_bad ( id INT PRIMARY KEY, name VARCHAR(100), department_id INT, department_name VARCHAR(100) -- Transitive dependency );
— Follows 3NF CREATE TABLE departments ( id INT PRIMARY KEY, name VARCHAR(100) );
CREATE TABLE employees ( id INT PRIMARY KEY, name VARCHAR(100), department_id INT, FOREIGN KEY (department_id) REFERENCES departments(id) );
#### Boyce-Codd Normal Form (BCNF)
- **Rule**: Every determinant must be a candidate key
- **Example**:
```sql
-- Violates BCNF
CREATE TABLE class_schedule (
student_id INT,
subject VARCHAR(50),
teacher VARCHAR(50),
PRIMARY KEY (student_id, subject)
-- Issue: teacher determines subject, but teacher is not a candidate key
);
-- Follows BCNF
CREATE TABLE teachers (
id INT PRIMARY KEY,
name VARCHAR(50),
subject VARCHAR(50)
);
CREATE TABLE student_classes (
student_id INT,
teacher_id INT,
PRIMARY KEY (student_id, teacher_id),
FOREIGN KEY (teacher_id) REFERENCES teachers(id)
);SQL2. Denormalization Strategies
When to Denormalize
- Read-heavy workloads with complex joins
- Performance requirements outweigh storage costs
- Data warehouse and reporting scenarios
Denormalization Techniques
-- Materialized Views
CREATE MATERIALIZED VIEW employee_summary AS
SELECT
e.id,
e.name,
d.name as department_name,
COUNT(p.id) as project_count,
AVG(pr.rating) as avg_performance
FROM employees e
JOIN departments d ON e.department_id = d.id
LEFT JOIN projects p ON e.id = p.lead_id
LEFT JOIN performance_reviews pr ON e.id = pr.employee_id
GROUP BY e.id, e.name, d.name;
-- Redundant Columns for Performance
CREATE TABLE orders (
id INT PRIMARY KEY,
customer_id INT,
customer_name VARCHAR(100), -- Denormalized for quick access
order_total DECIMAL(10,2), -- Calculated field stored
item_count INT, -- Aggregate stored
order_date DATE
);SQL3. Data Modeling Principles
Entity-Relationship Design
-- One-to-Many Relationship
CREATE TABLE customers (
id INT PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(100)
);
CREATE TABLE orders (
id INT PRIMARY KEY,
customer_id INT,
order_date DATE,
FOREIGN KEY (customer_id) REFERENCES customers(id)
);
-- Many-to-Many Relationship
CREATE TABLE students (
id INT PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE courses (
id INT PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE enrollments (
student_id INT,
course_id INT,
enrollment_date DATE,
PRIMARY KEY (student_id, course_id),
FOREIGN KEY (student_id) REFERENCES students(id),
FOREIGN KEY (course_id) REFERENCES courses(id)
);
-- Self-Referencing Relationship
CREATE TABLE employees (
id INT PRIMARY KEY,
name VARCHAR(100),
manager_id INT,
FOREIGN KEY (manager_id) REFERENCES employees(id)
);SQLDomain-Driven Design (DDD)
-- Aggregate Root Pattern
CREATE TABLE orders (
id INT PRIMARY KEY,
customer_id INT,
status ENUM('pending', 'confirmed', 'shipped', 'delivered'),
total_amount DECIMAL(10,2),
created_at TIMESTAMP,
version INT -- For optimistic locking
);
-- Value Objects as Embedded Data
CREATE TABLE addresses (
id INT PRIMARY KEY,
street VARCHAR(200),
city VARCHAR(100),
state VARCHAR(50),
zip_code VARCHAR(20),
country VARCHAR(50)
);
-- Bounded Context Separation
-- Order Context
CREATE SCHEMA order_management;
-- Customer Context
CREATE SCHEMA customer_management;
-- Inventory Context
CREATE SCHEMA inventory_management;SQL4. NoSQL Design Principles
Document Database Design
// MongoDB - Embedding vs Referencing
// Embedding (1:Few relationship)
{
_id: ObjectId("..."),
title: "Blog Post",
content: "...",
author: {
name: "John Doe",
email: "john@example.com"
},
comments: [
{
text: "Great post!",
author: "Jane Smith",
date: ISODate("...")
}
]
}
// Referencing (1:Many relationship)
// Posts collection
{
_id: ObjectId("..."),
title: "Blog Post",
content: "...",
author_id: ObjectId("...")
}
// Authors collection
{
_id: ObjectId("..."),
name: "John Doe",
email: "john@example.com",
bio: "..."
}JavaScriptKey-Value Store Design
// Redis design patterns
// User session
SET session:user123 '{"userId": 123, "name": "John", "lastActive": "2023-10-27"}'
EXPIRE session:user123 3600
// Shopping cart
HSET cart:user123 product:456 2
HSET cart:user123 product:789 1
EXPIRE cart:user123 86400
// Leaderboard
ZADD game:leaderboard 1500 "player1"
ZADD game:leaderboard 2000 "player2"JavaScriptColumn-Family Design
-- Cassandra design principles
-- Query-driven modeling
CREATE TABLE user_activity_by_date (
user_id UUID,
activity_date DATE,
activity_time TIMESTAMP,
activity_type TEXT,
details TEXT,
PRIMARY KEY (user_id, activity_date, activity_time)
) WITH CLUSTERING ORDER BY (activity_date DESC, activity_time DESC);
-- Time-series data
CREATE TABLE sensor_data (
sensor_id UUID,
year INT,
month INT,
timestamp TIMESTAMP,
temperature FLOAT,
humidity FLOAT,
PRIMARY KEY ((sensor_id, year, month), timestamp)
);SQLDatabase Architecture Patterns
1. Monolithic Database Architecture
Single Database Pattern
-- All application data in one database
CREATE DATABASE ecommerce;
USE ecommerce;
-- User management
CREATE TABLE users (
id INT PRIMARY KEY AUTO_INCREMENT,
username VARCHAR(50),
email VARCHAR(100),
password_hash VARCHAR(255)
);
-- Product catalog
CREATE TABLE products (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(200),
price DECIMAL(10,2),
inventory_count INT
);
-- Order processing
CREATE TABLE orders (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT,
total_amount DECIMAL(10,2),
status ENUM('pending', 'confirmed', 'shipped'),
FOREIGN KEY (user_id) REFERENCES users(id)
);SQLPros: Simple, ACID compliance, easy transactions Cons: Single point of failure, scaling limitations
2. Database per Service (Microservices)
Service Separation
-- User Service Database
CREATE DATABASE user_service;
CREATE TABLE users (
id INT PRIMARY KEY,
username VARCHAR(50),
email VARCHAR(100)
);
-- Product Service Database
CREATE DATABASE product_service;
CREATE TABLE products (
id INT PRIMARY KEY,
name VARCHAR(200),
price DECIMAL(10,2)
);
-- Order Service Database
CREATE DATABASE order_service;
CREATE TABLE orders (
id INT PRIMARY KEY,
user_id INT, -- Reference to user service
product_id INT, -- Reference to product service
quantity INT
);SQLPros: Independent scaling, technology diversity, fault isolation Cons: Complex transactions, data consistency challenges
3. CQRS (Command Query Responsibility Segregation)
Separate Read/Write Models
-- Write Model (Normalized for consistency)
CREATE TABLE commands_orders (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT,
status VARCHAR(20),
created_at TIMESTAMP
);
CREATE TABLE commands_order_items (
order_id INT,
product_id INT,
quantity INT,
price DECIMAL(10,2),
PRIMARY KEY (order_id, product_id)
);
-- Read Model (Denormalized for performance)
CREATE TABLE queries_order_summary (
order_id INT PRIMARY KEY,
user_name VARCHAR(100),
total_items INT,
total_amount DECIMAL(10,2),
order_status VARCHAR(20),
created_at TIMESTAMP
);
CREATE TABLE queries_user_orders (
user_id INT,
order_id INT,
order_date DATE,
total_amount DECIMAL(10,2),
INDEX idx_user_date (user_id, order_date)
);SQL4. Event Sourcing
Event Store Design
CREATE TABLE event_store (
event_id BIGINT PRIMARY KEY AUTO_INCREMENT,
aggregate_id VARCHAR(100),
aggregate_type VARCHAR(50),
event_type VARCHAR(100),
event_data JSON,
event_version INT,
created_at TIMESTAMP,
INDEX idx_aggregate (aggregate_id, event_version)
);
-- Example events
INSERT INTO event_store (aggregate_id, aggregate_type, event_type, event_data, event_version)
VALUES
('order-123', 'Order', 'OrderCreated',
'{"userId": "user-456", "items": [{"productId": "prod-789", "quantity": 2}]}', 1),
('order-123', 'Order', 'OrderConfirmed',
'{"confirmedAt": "2023-10-27T10:00:00Z"}', 2);
-- Projection/Snapshot table
CREATE TABLE order_projections (
order_id VARCHAR(100) PRIMARY KEY,
user_id VARCHAR(100),
status VARCHAR(20),
total_amount DECIMAL(10,2),
last_event_version INT,
updated_at TIMESTAMP
);SQL5. Polyglot Persistence
Multiple Database Technologies
// User profiles - Document DB (MongoDB)
db.users.insertOne({
_id: ObjectId("..."),
username: "john_doe",
profile: {
firstName: "John",
lastName: "Doe",
preferences: {
theme: "dark",
notifications: true
}
}
});
// Product catalog - Search engine (Elasticsearch)
PUT /products/_doc/1
{
"name": "Laptop",
"description": "High-performance laptop",
"price": 999.99,
"categories": ["electronics", "computers"],
"specs": {
"cpu": "Intel i7",
"ram": "16GB"
}
}
// Shopping cart - Key-value (Redis)
HSET cart:user123 product:456 2
HSET cart:user123 product:789 1
// Financial transactions - RDBMS (PostgreSQL)
CREATE TABLE transactions (
id SERIAL PRIMARY KEY,
user_id INT,
amount DECIMAL(15,2),
transaction_type VARCHAR(20),
created_at TIMESTAMP
);
// Analytics - Column store (Cassandra)
CREATE TABLE user_events (
user_id UUID,
event_date DATE,
event_time TIMESTAMP,
event_type TEXT,
properties MAP<TEXT, TEXT>,
PRIMARY KEY (user_id, event_date, event_time)
);JavaScript6. Sharding Strategies
Horizontal Partitioning
-- Hash-based sharding
CREATE TABLE users_shard_0 AS SELECT * FROM users WHERE MOD(id, 4) = 0;
CREATE TABLE users_shard_1 AS SELECT * FROM users WHERE MOD(id, 4) = 1;
CREATE TABLE users_shard_2 AS SELECT * FROM users WHERE MOD(id, 4) = 2;
CREATE TABLE users_shard_3 AS SELECT * FROM users WHERE MOD(id, 4) = 3;
-- Range-based sharding
CREATE TABLE orders_2023 AS SELECT * FROM orders WHERE YEAR(created_at) = 2023;
CREATE TABLE orders_2024 AS SELECT * FROM orders WHERE YEAR(created_at) = 2024;
-- Geographic sharding
CREATE TABLE users_us AS SELECT * FROM users WHERE country = 'US';
CREATE TABLE users_eu AS SELECT * FROM users WHERE country IN ('DE', 'FR', 'UK');
CREATE TABLE users_asia AS SELECT * FROM users WHERE country IN ('JP', 'SG', 'KR');SQLVertical Partitioning
-- Split frequently accessed vs rarely accessed columns
CREATE TABLE user_core (
id INT PRIMARY KEY,
username VARCHAR(50),
email VARCHAR(100),
last_login TIMESTAMP
);
CREATE TABLE user_profile (
user_id INT PRIMARY KEY,
full_name VARCHAR(200),
bio TEXT,
avatar_url VARCHAR(500),
created_at TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES user_core(id)
);SQL7. Master-Slave Replication
Read Replicas
-- Master (Write operations)
-- All INSERT, UPDATE, DELETE operations
-- Slave/Replica (Read operations)
-- Configuration in application layer
class DatabaseConnection {
private $master_db;
private $slave_db;
public function write($query) {
return $this->master_db->query($query);
}
public function read($query) {
return $this->slave_db->query($query);
}
}SQL8. Federation (Split by Function)
Functional Database Splitting
-- User Database
CREATE DATABASE user_system;
USE user_system;
CREATE TABLE users (...);
CREATE TABLE user_profiles (...);
-- Product Database
CREATE DATABASE product_system;
USE product_system;
CREATE TABLE products (...);
CREATE TABLE categories (...);
-- Order Database
CREATE DATABASE order_system;
USE order_system;
CREATE TABLE orders (...);
CREATE TABLE order_items (...);SQLPerformance & Optimization
1. Indexing Strategies
Index Types and Usage
-- B-Tree Index (default)
CREATE INDEX idx_user_email ON users(email);
-- Composite Index
CREATE INDEX idx_order_user_date ON orders(user_id, order_date);
-- Covering Index
CREATE INDEX idx_order_covering ON orders(user_id, order_date)
INCLUDE (total_amount, status);
-- Partial Index
CREATE INDEX idx_active_users ON users(email) WHERE is_active = true;
-- Functional Index
CREATE INDEX idx_upper_lastname ON users(UPPER(last_name));
-- Full-text Index
CREATE FULLTEXT INDEX idx_product_search ON products(name, description);SQLIndex Optimization
-- Analyze index usage
SHOW INDEX FROM users;
EXPLAIN SELECT * FROM users WHERE email = 'john@example.com';
-- Drop unused indexes
DROP INDEX idx_unused ON table_name;
-- Rebuild fragmented indexes
ALTER TABLE users ENGINE=InnoDB; -- MySQL
REINDEX TABLE users; -- PostgreSQLSQL2. Query Optimization
Query Planning
-- Use EXPLAIN to analyze queries
EXPLAIN ANALYZE
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2023-01-01'
GROUP BY u.id, u.name
HAVING COUNT(o.id) > 5;
-- Optimize with proper indexing
CREATE INDEX idx_user_created ON users(created_at);
CREATE INDEX idx_order_user ON orders(user_id);SQLQuery Rewriting
-- Instead of using OR conditions
SELECT * FROM products WHERE category = 'electronics' OR category = 'computers';
-- Use UNION for better performance
SELECT * FROM products WHERE category = 'electronics'
UNION ALL
SELECT * FROM products WHERE category = 'computers';
-- Instead of correlated subqueries
SELECT * FROM users u
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id);
-- Use JOIN
SELECT DISTINCT u.* FROM users u
INNER JOIN orders o ON u.id = o.user_id;SQL3. Caching Strategies
Application-Level Caching
# Redis caching example
import redis
import json
cache = redis.Redis(host='localhost', port=6379)
def get_user(user_id):
# Try cache first
cached_user = cache.get(f"user:{user_id}")
if cached_user:
return json.loads(cached_user)
# Query database
user = database.query("SELECT * FROM users WHERE id = %s", user_id)
# Cache result
cache.setex(f"user:{user_id}", 3600, json.dumps(user))
return userPythonDatabase-Level Caching
-- MySQL Query Cache (deprecated in 8.0)
SET global query_cache_type = ON;
SET global query_cache_size = 268435456; -- 256MB
-- Buffer Pool optimization
SET innodb_buffer_pool_size = 2G;
-- Connection pooling
SET max_connections = 200;
SET innodb_thread_concurrency = 8;SQL4. Partitioning
Table Partitioning
-- Range Partitioning
CREATE TABLE sales (
id INT,
sale_date DATE,
amount DECIMAL(10,2)
)
PARTITION BY RANGE (YEAR(sale_date)) (
PARTITION p2022 VALUES LESS THAN (2023),
PARTITION p2023 VALUES LESS THAN (2024),
PARTITION p2024 VALUES LESS THAN (2025),
PARTITION p_future VALUES LESS THAN MAXVALUE
);
-- Hash Partitioning
CREATE TABLE users (
id INT,
name VARCHAR(100)
)
PARTITION BY HASH(id)
PARTITIONS 4;
-- List Partitioning
CREATE TABLE orders (
id INT,
region VARCHAR(20)
)
PARTITION BY LIST COLUMNS(region) (
PARTITION p_north VALUES IN ('north', 'northeast'),
PARTITION p_south VALUES IN ('south', 'southeast'),
PARTITION p_west VALUES IN ('west', 'southwest')
);SQL5. Connection Management
Connection Pooling
# Python connection pooling example
from sqlalchemy import create_engine
from sqlalchemy.pool import QueuePool
engine = create_engine(
'mysql://user:password@localhost/database',
poolclass=QueuePool,
pool_size=20,
max_overflow=30,
pool_recycle=3600,
pool_pre_ping=True
)Python6. Monitoring and Profiling
Performance Monitoring
-- MySQL Performance Schema
SELECT * FROM performance_schema.events_statements_summary_by_digest
ORDER BY sum_timer_wait DESC LIMIT 10;
-- PostgreSQL pg_stat_statements
SELECT query, calls, total_time, mean_time
FROM pg_stat_statements
ORDER BY mean_time DESC LIMIT 10;
-- Index usage statistics
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;SQL7. Hardware Optimization
Storage Considerations
- SSD vs HDD: Use SSDs for better I/O performance
- RAID Configuration: RAID 10 for high performance, RAID 5/6 for storage efficiency
- Separate disks: OS, logs, data, and temp files on separate drives
Memory Optimization
-- MySQL memory settings
SET innodb_buffer_pool_size = 8G; -- 70-80% of available RAM
SET innodb_log_buffer_size = 256M;
SET key_buffer_size = 512M;
-- PostgreSQL memory settings
-- In postgresql.conf
shared_buffers = 2GB
effective_cache_size = 6GB
work_mem = 64MB
maintenance_work_mem = 512MBSQLThis comprehensive addition covers essential database design principles and architecture patterns that are crucial for building scalable, maintainable database systems. The content includes both theoretical concepts and practical examples for immediate application.
Discover more from Altgr Blog
Subscribe to get the latest posts sent to your email.
