commit c85b877dc0e60eabf0173a4ea1a7753b7a8eb5c0 Author: Santhosh Janardhanan Date: Tue Jan 27 13:24:03 2026 -0500 Initial Commit diff --git a/.env b/.env new file mode 100644 index 0000000..8d65d4e --- /dev/null +++ b/.env @@ -0,0 +1,22 @@ +# Database +DATABASE_URL=postgresql://postgres:changeme@postgres:5432/privacy_analyzer + +# Redis +REDIS_URL=redis://redis:6379 + +# Meilisearch +MEILISEARCH_URL=http://meilisearch:7700 +MEILISEARCH_API_KEY=your_secure_master_key_here + +# OpenAI +OPENAI_API_KEY=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 +OPENAI_MODEL=gpt-4o-mini + +# Admin Credentials (change these!) +ADMIN_USERNAME=admin +ADMIN_PASSWORD=secure_password_here +SESSION_SECRET=your_random_session_secret_here + +# App +PORT=3000 +NODE_ENV=local diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..87fc8a3 --- /dev/null +++ b/.env.example @@ -0,0 +1,22 @@ +# Database +DATABASE_URL=postgresql://postgres:changeme@postgres:5432/privacy_analyzer + +# Redis +REDIS_URL=redis://redis:6379 + +# Meilisearch +MEILISEARCH_URL=http://meilisearch:7700 +MEILISEARCH_API_KEY=your_secure_master_key_here + +# OpenAI +OPENAI_API_KEY=sk-your-openai-api-key-here +OPENAI_MODEL=gpt-4o + +# Admin Credentials (change these!) +ADMIN_USERNAME=admin +ADMIN_PASSWORD=secure_password_here +SESSION_SECRET=your_random_session_secret_here + +# App +PORT=3000 +NODE_ENV=production diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..e64dd1e --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,649 @@ +# AGENTS.md - Privacy Policy Analyzer + +This file provides essential context and guidelines for AI agents working on this project. + +## Project Overview + +**Privacy Policy Analyzer** - A self-hosted web application that analyzes website privacy policies using OpenAI's GPT models. Provides easy-to-understand A-E grades and detailed findings about privacy practices. + +**Inspiration**: ToS;DR (Terms of Service; Didn't Read) - but focused specifically on privacy policies. + +**Repository**: Private pet project, no monetization + +## Tech Stack + +- **Runtime**: Bun (JavaScript, NOT TypeScript) +- **Web Framework**: Native Bun HTTP server or Elysia.js (lightweight) +- **Database**: PostgreSQL 15 +- **Search**: Meilisearch v1.6 +- **Cache**: Redis 7 +- **Templating**: EJS +- **AI**: OpenAI API (GPT-4o/GPT-4-turbo) +- **Containerization**: Docker + Docker Compose +- **Hosting**: Self-hosted on Linode + +## Project Structure + +``` +privacy-policy-analyzer/ +├── docker-compose.yml # Service orchestration +├── Dockerfile # Bun app container +├── .env # Environment variables (gitignored) +├── package.json # Bun dependencies +├── src/ +│ ├── app.js # Entry point +│ ├── config/ # Configuration files +│ ├── models/ # Database models +│ ├── routes/ # Route definitions +│ ├── controllers/ # Request handlers +│ ├── services/ # Business logic +│ ├── middleware/ # Express-style middleware +│ ├── views/ # EJS templates +│ └── utils/ # Helper functions +├── migrations/ # SQL migrations +└── public/ # Static assets +``` + +## Progress Tracking + +This project uses a task tracking system to monitor progress. Tasks are managed using the todo tool and organized by priority: + +### Priority Levels +- **High**: Critical infrastructure and core functionality +- **Medium**: Essential features and business logic +- **Low**: Enhancements, optimizations, and polish + +### Progress Checklist (48 Tasks Total) + +#### Phase 1: Infrastructure Setup (High Priority) - COMPLETED ✓ +- [x] Create project root files (docker-compose.yml, Dockerfile, .env.example, package.json) +- [x] Create directory structure (src/, migrations/, public/) +- [x] Configure PostgreSQL in docker-compose.yml with persistent volume +- [x] Configure Redis in docker-compose.yml with persistent volume +- [x] Configure Meilisearch in docker-compose.yml with persistent volume +- [x] Create Bun Dockerfile with optimized build +- [x] Set up .env.example with all required environment variables +- [x] Create package.json with dependencies (postgres, ejs, openai, etc.) +- [x] Test Docker Compose setup - verify all services start + +#### Phase 2: Database & Models (Medium Priority) - COMPLETED ✓ +- [x] Create database migration file (001_initial.sql) with schema +- [x] Create src/config/database.js for PostgreSQL connection +- [x] Create src/config/redis.js for Redis connection +- [x] Create src/config/meilisearch.js for Meilisearch client +- [x] Create src/config/openai.js for OpenAI client +- [x] Create database migration runner script +- [x] Create src/models/Service.js +- [x] Create src/models/PolicyVersion.js +- [x] Create src/models/Analysis.js +- [x] Create src/models/AdminSession.js + +#### Phase 3: Middleware & Routes (Medium Priority) - COMPLETED ✓ +- [x] Create src/middleware/auth.js for session authentication +- [x] Create src/middleware/errorHandler.js +- [x] Create src/middleware/security.js for security headers +- [x] Create src/middleware/rateLimiter.js +- [x] Create src/routes/admin.js with authentication routes +- [x] Create src/views/admin/login.ejs +- [x] Create admin dashboard view +- [x] Create src/routes/public.js for public pages +- [x] Create main layout EJS template with SEO meta tags +- [x] Create public homepage view with service listing +- [x] Create service detail page view with last analyzed date display + +#### Phase 4: Services & Features (Medium Priority) - COMPLETED ✓ +- [x] Create src/services/policyFetcher.js to fetch policy from URL +- [x] Create src/services/aiAnalyzer.js with OpenAI integration +- [x] Create admin service management forms (add/edit) +- [x] Implement manual analysis trigger in admin panel +- [x] Create src/services/scheduler.js for cron jobs +- [x] Create src/services/searchIndexer.js for Meilisearch + +#### Phase 5: Enhancements (Low Priority) - IN PROGRESS +- [ ] Implement Redis caching for public pages +- [ ] Create sitemap.xml generator +- [ ] Create robots.txt +- [ ] Add structured data (Schema.org) to service pages +- [x] Implement accessibility features (WCAG 2.1 AA) - Already implemented +- [x] Add CSS styling with focus indicators - Already implemented +- [x] Implement skip to main content link - Already implemented +- [ ] Performance testing and optimization +- [ ] Security audit and penetration testing +- [ ] Accessibility audit with axe-core +- [ ] SEO audit and optimization +- [ ] Create comprehensive documentation + +### Working with Tasks +- **ALWAYS** check the current todo list before starting work +- **Update** task status to `in_progress` when starting work +- **Mark complete** immediately after finishing a task +- **Verify** completed tasks using testing checklists in this document +- **Review** progress regularly to maintain momentum + +### Current Phase Focus +We are currently in **Phase 5: Enhancements**. Phases 1-4 are complete. All core functionality is working. Remaining tasks are optimizations, audits, and documentation. + +## Critical Rules + +### 1. JavaScript Only +- NO TypeScript +- Use JSDoc comments for type documentation when helpful +- Bun supports modern JavaScript (ES2023) + +### 2. Database Conventions +- Use `postgres` library (Bun-compatible) +- Always use parameterized queries +- Migrations are in `migrations/` folder, numbered sequentially +- Never write raw SQL in routes/controllers + +### 3. Environment Variables +ALL configuration goes in `.env`: +```bash +DATABASE_URL=postgresql://user:pass@postgres:5432/dbname +REDIS_URL=redis://redis:6379 +MEILISEARCH_URL=http://meilisearch:7700 +MEILISEARCH_API_KEY=key +OPENAI_API_KEY=sk-... +OPENAI_MODEL=gpt-4o +ADMIN_USERNAME=admin +ADMIN_PASSWORD=changeme +SESSION_SECRET=random_string +PORT=3000 +NODE_ENV=production +``` + +### 4. AI Analysis Guidelines +- Always use OpenAI's JSON mode for structured output +- Store raw AI response in database (for debugging) +- Implement retry logic with exponential backoff +- Rate limit AI calls (max 10/minute) +- Handle AI failures gracefully - don't crash the app + +### 5. Security Requirements (OWASP Top 10) +- NEVER commit `.env` file +- NEVER log API keys or passwords +- Use bcrypt for password hashing (cost factor 12) +- Session tokens stored in Redis with expiration (24 hours) +- All admin routes require authentication middleware +- Input validation on ALL user inputs with proper sanitization +- SQL injection prevention via parameterized queries ONLY +- XSS prevention via EJS auto-escaping AND Content Security Policy +- Rate limiting: 100 req/15min public, 30 req/15min admin, 10 req/hour AI +- Security headers REQUIRED on all responses: + - Strict-Transport-Security + - Content-Security-Policy + - X-Content-Type-Options: nosniff + - X-Frame-Options: DENY + - X-XSS-Protection: 1; mode=block + - Referrer-Policy: strict-origin-when-cross-origin +- HTTPS only with HSTS +- Secure cookies (HttpOnly, Secure, SameSite=Strict) +- Regular dependency audits (`bun audit`) +- Non-root Docker user +- Log authentication attempts and errors (NEVER log sensitive data) + +### 6. Error Handling Pattern +```javascript +try { + // Operation +} catch (error) { + console.error('Context:', error.message); + // Return user-friendly error + return new Response('Error message', { status: 500 }); +} +``` + +### 7. Code Style +- Use single quotes for strings +- 2-space indentation +- Semicolons required +- camelCase for variables/functions +- PascalCase for classes +- No trailing commas +- Max line length: 100 characters + +## Common Commands + +```bash +# Start all services +docker-compose up -d + +# View logs +docker-compose logs -f app + +# Run database migrations +docker-compose exec app bun run migrate + +# Restart app only +docker-compose restart app + +# Shell into app container +docker-compose exec app sh + +# Install new dependency +docker-compose exec app bun add package-name + +# Run tests (when added) +docker-compose exec app bun test +``` + +## Database Schema + +### services +- id (PK, serial) +- name (varchar) +- url (varchar) +- logo_url (varchar, nullable) +- policy_url (varchar) +- created_at (timestamp) +- updated_at (timestamp) + +### policy_versions +- id (PK, serial) +- service_id (FK) +- content (text) +- content_hash (varchar 64) +- fetched_at (timestamp) +- created_at (timestamp) + +### analyses +- id (PK, serial) +- service_id (FK) +- policy_version_id (FK) +- overall_score (char 1: A/B/C/D/E) +- findings (JSONB) +- raw_analysis (text) +- created_at (timestamp) - **This is the "last analyzed" date, must be displayed on all service pages** +- updated_at (timestamp) + +### admin_sessions +- id (PK, serial) +- session_token (varchar, unique) +- created_at (timestamp) +- expires_at (timestamp) + +## AI Prompt Template + +When modifying AI analysis, use this structure: + +```javascript +const prompt = { + model: process.env.OPENAI_MODEL, + messages: [ + { + role: 'system', + content: `You are a privacy policy analyzer. Analyze the following privacy policy and provide a structured assessment. + +Scoring Criteria: +- A: Excellent privacy practices +- B: Good with minor issues +- C: Acceptable but concerns exist +- D: Poor privacy practices +- E: Very invasive, major concerns + +Categories: +1. Data Collection (what's collected) +2. Data Sharing (third parties) +3. User Rights (access, deletion, etc.) +4. Data Retention (how long kept) +5. Tracking & Security + +Respond ONLY with valid JSON matching this schema: +{ + "overall_score": "A|B|C|D|E", + "score_breakdown": { "data_collection": "A|B|C|D|E", ... }, + "findings": { "positive": [...], "negative": [...], "neutral": [...] }, + "data_types_collected": [...], + "third_parties": [...], + "summary": "string" +}` + }, + { + role: 'user', + content: `Analyze this privacy policy:\n\n${policyText}` + } + ], + response_format: { type: 'json_object' } +}; +``` + +## SEO Requirements + +### Meta Tags (All Public Pages) +Every public page MUST include: +```html + +Descriptive Title - Privacy Policy Analyzer + + + + + + + + + + + + + + + +``` + +### Structured Data (Schema.org) +Include JSON-LD structured data on all service pages: +```html + +``` + +### Semantic HTML Requirements +- One `

` per page with main topic +- Logical heading hierarchy (no skipping levels) +- Use `
`, `