docs: add comprehensive README and module documentation
This commit is contained in:
278
docs/config.md
Normal file
278
docs/config.md
Normal file
@@ -0,0 +1,278 @@
|
||||
# Configuration Reference
|
||||
|
||||
Complete reference for `config.json` configuration options.
|
||||
|
||||
## Overview
|
||||
|
||||
The configuration file uses JSON format with support for:
|
||||
- Path expansion (`~` expands to home directory)
|
||||
- Type validation via Pydantic models
|
||||
- Environment-specific overrides
|
||||
|
||||
## Schema Validation
|
||||
|
||||
Validate your config against the schema:
|
||||
|
||||
```bash
|
||||
python -c "from companion.config import load_config; load_config('config.json')"
|
||||
```
|
||||
|
||||
Or use the JSON Schema directly: [config-schema.json](../config-schema.json)
|
||||
|
||||
## Configuration Sections
|
||||
|
||||
### companion
|
||||
|
||||
Core companion personality and behavior settings.
|
||||
|
||||
```json
|
||||
{
|
||||
"companion": {
|
||||
"name": "SAN",
|
||||
"persona": {
|
||||
"role": "companion",
|
||||
"tone": "reflective",
|
||||
"style": "questioning",
|
||||
"boundaries": [
|
||||
"does_not_impersonate_user",
|
||||
"no_future_predictions",
|
||||
"no_medical_or_legal_advice"
|
||||
]
|
||||
},
|
||||
"memory": {
|
||||
"session_turns": 20,
|
||||
"persistent_store": "~/.companion/memory.db",
|
||||
"summarize_after": 10
|
||||
},
|
||||
"chat": {
|
||||
"streaming": true,
|
||||
"max_response_tokens": 2048,
|
||||
"default_temperature": 0.7,
|
||||
"allow_temperature_override": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Fields
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `name` | string | "SAN" | Display name for the companion |
|
||||
| `persona.role` | string | "companion" | Role description (companion/advisor/reflector) |
|
||||
| `persona.tone` | string | "reflective" | Communication tone (reflective/supportive/analytical) |
|
||||
| `persona.style` | string | "questioning" | Interaction style (questioning/supportive/direct) |
|
||||
| `persona.boundaries` | string[] | [...] | Behavioral guardrails |
|
||||
| `memory.session_turns` | int | 20 | Messages to keep in context |
|
||||
| `memory.persistent_store` | string | "~/.companion/memory.db" | SQLite database path |
|
||||
| `memory.summarize_after` | int | 10 | Summarize history after N turns |
|
||||
| `chat.streaming` | bool | true | Stream responses in real-time |
|
||||
| `chat.max_response_tokens` | int | 2048 | Max tokens per response |
|
||||
| `chat.default_temperature` | float | 0.7 | Creativity (0.0=deterministic, 2.0=creative) |
|
||||
| `chat.allow_temperature_override` | bool | true | Let users adjust temperature |
|
||||
|
||||
---
|
||||
|
||||
### vault
|
||||
|
||||
Obsidian vault indexing configuration.
|
||||
|
||||
```json
|
||||
{
|
||||
"vault": {
|
||||
"path": "~/KnowledgeVault/Default",
|
||||
"indexing": {
|
||||
"auto_sync": true,
|
||||
"auto_sync_interval_minutes": 1440,
|
||||
"watch_fs_events": true,
|
||||
"file_patterns": ["*.md"],
|
||||
"deny_dirs": [".obsidian", ".trash", "zzz-Archive", ".git"],
|
||||
"deny_patterns": ["*.tmp", "*.bak", "*conflict*"]
|
||||
},
|
||||
"chunking_rules": {
|
||||
"default": {
|
||||
"strategy": "sliding_window",
|
||||
"chunk_size": 500,
|
||||
"chunk_overlap": 100
|
||||
},
|
||||
"Journal/**": {
|
||||
"strategy": "section",
|
||||
"section_tags": ["#DayInShort", "#mentalhealth", "#work"],
|
||||
"chunk_size": 300,
|
||||
"chunk_overlap": 50
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### rag
|
||||
|
||||
RAG (Retrieval-Augmented Generation) engine configuration.
|
||||
|
||||
```json
|
||||
{
|
||||
"rag": {
|
||||
"embedding": {
|
||||
"provider": "ollama",
|
||||
"model": "mxbai-embed-large",
|
||||
"base_url": "http://localhost:11434",
|
||||
"dimensions": 1024,
|
||||
"batch_size": 32
|
||||
},
|
||||
"vector_store": {
|
||||
"type": "lancedb",
|
||||
"path": "~/.companion/vectors.lance"
|
||||
},
|
||||
"search": {
|
||||
"default_top_k": 8,
|
||||
"max_top_k": 20,
|
||||
"similarity_threshold": 0.75,
|
||||
"hybrid_search": {
|
||||
"enabled": true,
|
||||
"keyword_weight": 0.3,
|
||||
"semantic_weight": 0.7
|
||||
},
|
||||
"filters": {
|
||||
"date_range_enabled": true,
|
||||
"tag_filter_enabled": true,
|
||||
"directory_filter_enabled": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### model
|
||||
|
||||
LLM configuration for inference and fine-tuning.
|
||||
|
||||
```json
|
||||
{
|
||||
"model": {
|
||||
"inference": {
|
||||
"backend": "llama.cpp",
|
||||
"model_path": "~/.companion/models/companion-7b-q4.gguf",
|
||||
"context_length": 8192,
|
||||
"gpu_layers": 35,
|
||||
"batch_size": 512,
|
||||
"threads": 8
|
||||
},
|
||||
"fine_tuning": {
|
||||
"base_model": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
|
||||
"output_dir": "~/.companion/training",
|
||||
"lora_rank": 16,
|
||||
"lora_alpha": 32,
|
||||
"learning_rate": 0.0002,
|
||||
"batch_size": 4,
|
||||
"gradient_accumulation_steps": 4,
|
||||
"num_epochs": 3,
|
||||
"warmup_steps": 100,
|
||||
"save_steps": 500,
|
||||
"eval_steps": 250,
|
||||
"training_data_path": "~/.companion/training_data/",
|
||||
"validation_split": 0.1
|
||||
},
|
||||
"retrain_schedule": {
|
||||
"auto_reminder": true,
|
||||
"default_interval_days": 90,
|
||||
"reminder_channels": ["chat_stream", "log"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### api
|
||||
|
||||
FastAPI backend configuration.
|
||||
|
||||
```json
|
||||
{
|
||||
"api": {
|
||||
"host": "127.0.0.1",
|
||||
"port": 7373,
|
||||
"cors_origins": ["http://localhost:5173"],
|
||||
"auth": {
|
||||
"enabled": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### ui
|
||||
|
||||
Web UI configuration.
|
||||
|
||||
```json
|
||||
{
|
||||
"ui": {
|
||||
"web": {
|
||||
"enabled": true,
|
||||
"theme": "obsidian",
|
||||
"features": {
|
||||
"streaming": true,
|
||||
"citations": true,
|
||||
"source_preview": true
|
||||
}
|
||||
},
|
||||
"cli": {
|
||||
"enabled": true,
|
||||
"rich_output": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### logging
|
||||
|
||||
Logging configuration.
|
||||
|
||||
```json
|
||||
{
|
||||
"logging": {
|
||||
"level": "INFO",
|
||||
"file": "~/.companion/logs/companion.log",
|
||||
"max_size_mb": 100,
|
||||
"backup_count": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### security
|
||||
|
||||
Security and privacy settings.
|
||||
|
||||
```json
|
||||
{
|
||||
"security": {
|
||||
"local_only": true,
|
||||
"vault_path_traversal_check": true,
|
||||
"sensitive_content_detection": true,
|
||||
"sensitive_patterns": [
|
||||
"#mentalhealth",
|
||||
"#physicalhealth",
|
||||
"#finance",
|
||||
"#Relations"
|
||||
],
|
||||
"require_confirmation_for_external_apis": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Full Example
|
||||
|
||||
See [config.json](../config.json) for a complete working configuration.
|
||||
288
docs/forge.md
Normal file
288
docs/forge.md
Normal file
@@ -0,0 +1,288 @@
|
||||
# FORGE Module Documentation
|
||||
|
||||
The FORGE module handles fine-tuning of the companion model. It extracts training examples from your vault reflections and trains a custom LoRA adapter using QLoRA on your local GPU.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Vault Reflections
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ Extract │ - Scan for #reflection, #insight tags
|
||||
│ (extract.py) │ - Parse reflection patterns
|
||||
└────────┬────────┘
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ Curate │ - Manual review (optional)
|
||||
│ (curate.py) │ - Deduplication
|
||||
└────────┬────────┘
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ Train │ - QLoRA fine-tuning
|
||||
│ (train.py) │ - Unsloth + transformers
|
||||
└────────┬────────┘
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ Export │ - Merge LoRA weights
|
||||
│ (export.py) │ - Convert to GGUF
|
||||
└────────┬────────┘
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ Reload │ - Hot-swap in API
|
||||
│ (reload.py) │ - No restart needed
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
- **GPU**: RTX 5070 or equivalent (12GB+ VRAM)
|
||||
- **Dependencies**: Install with `pip install -e ".[train]"`
|
||||
- **Time**: 4-6 hours for full training run
|
||||
|
||||
## Workflow
|
||||
|
||||
### 1. Extract Training Data
|
||||
|
||||
Scan your vault for reflection patterns:
|
||||
|
||||
```bash
|
||||
python -m companion.forge.cli extract
|
||||
```
|
||||
|
||||
This scans for:
|
||||
- Tags: `#reflection`, `#insight`, `#learning`, `#decision`, etc.
|
||||
- Patterns: "I think", "I realize", "Looking back", "What if"
|
||||
- Section headers in journal entries
|
||||
|
||||
Output: `~/.companion/training_data/extracted.jsonl`
|
||||
|
||||
**Example extracted data:**
|
||||
|
||||
```json
|
||||
{
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a thoughtful, reflective companion."},
|
||||
{"role": "user", "content": "I'm facing a decision. How should I think through this?"},
|
||||
{"role": "assistant", "content": "#reflection I think I need to slow down..."}
|
||||
],
|
||||
"source_file": "Journal/2026/04/2026-04-12.md",
|
||||
"tags": ["#reflection", "#DayInShort"],
|
||||
"date": "2026-04-12"
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Train Model
|
||||
|
||||
Run QLoRA fine-tuning:
|
||||
|
||||
```bash
|
||||
python -m companion.forge.cli train --epochs 3 --lr 2e-4
|
||||
```
|
||||
|
||||
**Hyperparameters (from config):**
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `lora_rank` | 16 | LoRA rank (8-64) |
|
||||
| `lora_alpha` | 32 | LoRA scaling factor |
|
||||
| `learning_rate` | 2e-4 | Optimizer learning rate |
|
||||
| `num_epochs` | 3 | Training epochs |
|
||||
| `batch_size` | 4 | Per-device batch |
|
||||
| `gradient_accumulation_steps` | 4 | Steps before update |
|
||||
|
||||
**Training Output:**
|
||||
- Checkpoints: `~/.companion/training/checkpoint-*/`
|
||||
- Final model: `~/.companion/training/final/`
|
||||
- Logs: Training loss, eval metrics
|
||||
|
||||
### 3. Reload Model
|
||||
|
||||
Hot-swap without restarting API:
|
||||
|
||||
```bash
|
||||
python -m companion.forge.cli reload ~/.companion/training/final
|
||||
```
|
||||
|
||||
Or via API:
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:7373/admin/reload-model \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model_path": "~/.companion/training/final"}'
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### Extractor (`companion.forge.extract`)
|
||||
|
||||
```python
|
||||
from companion.forge.extract import TrainingDataExtractor, extract_training_data
|
||||
|
||||
# Extract from vault
|
||||
extractor = TrainingDataExtractor(config)
|
||||
examples = extractor.extract()
|
||||
|
||||
# Get statistics
|
||||
stats = extractor.get_stats()
|
||||
print(f"Extracted {stats['total']} examples")
|
||||
|
||||
# Save to JSONL
|
||||
extractor.save_to_jsonl(Path("training.jsonl"))
|
||||
```
|
||||
|
||||
**Reflection Detection:**
|
||||
|
||||
- **Tags**: `#reflection`, `#learning`, `#insight`, `#decision`, `#analysis`, `#takeaway`, `#realization`
|
||||
- **Patterns**: "I think", "I feel", "I realize", "I wonder", "Looking back", "On one hand...", "Ultimately decided"
|
||||
|
||||
### Trainer (`companion.forge.train`)
|
||||
|
||||
```python
|
||||
from companion.forge.train import train
|
||||
|
||||
final_path = train(
|
||||
data_path=Path("training.jsonl"),
|
||||
output_dir=Path("~/.companion/training"),
|
||||
base_model="unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
|
||||
lora_rank=16,
|
||||
lora_alpha=32,
|
||||
learning_rate=2e-4,
|
||||
num_epochs=3,
|
||||
)
|
||||
```
|
||||
|
||||
**Base Models:**
|
||||
|
||||
- `unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit` - Recommended
|
||||
- `unsloth/llama-3-8b-bnb-4bit` - Alternative
|
||||
|
||||
**Target Modules:**
|
||||
|
||||
LoRA is applied to: `q_proj`, `k_proj`, `v_proj`, `o_proj`, `gate_proj`, `up_proj`, `down_proj`
|
||||
|
||||
### Exporter (`companion.forge.export`)
|
||||
|
||||
```python
|
||||
from companion.forge.export import merge_only
|
||||
|
||||
# Merge LoRA into base model
|
||||
merged_path = merge_only(
|
||||
checkpoint_path=Path("~/.companion/training/checkpoint-500"),
|
||||
output_path=Path("~/.companion/models/merged"),
|
||||
)
|
||||
```
|
||||
|
||||
### Reloader (`companion.forge.reload`)
|
||||
|
||||
```python
|
||||
from companion.forge.reload import reload_model, get_model_status
|
||||
|
||||
# Check current model
|
||||
status = get_model_status(config)
|
||||
print(f"Model size: {status['size_mb']} MB")
|
||||
|
||||
# Reload with new model
|
||||
new_path = reload_model(
|
||||
config=config,
|
||||
new_model_path=Path("~/.companion/training/final"),
|
||||
backup=True,
|
||||
)
|
||||
```
|
||||
|
||||
## CLI Reference
|
||||
|
||||
```bash
|
||||
# Extract training data
|
||||
companion.forge.cli extract [--output PATH]
|
||||
|
||||
# Train model
|
||||
companion.forge.cli train \
|
||||
[--data PATH] \
|
||||
[--output PATH] \
|
||||
[--epochs N] \
|
||||
[--lr FLOAT]
|
||||
|
||||
# Check model status
|
||||
companion.forge.cli status
|
||||
|
||||
# Reload model
|
||||
companion.forge.cli reload MODEL_PATH [--no-backup]
|
||||
```
|
||||
|
||||
## Training Tips
|
||||
|
||||
**Dataset Size:**
|
||||
- Minimum: 50 examples
|
||||
- Optimal: 100-500 examples
|
||||
- More is not always better - quality over quantity
|
||||
|
||||
**Epochs:**
|
||||
- Start with 3 epochs
|
||||
- Increase if underfitting (high loss)
|
||||
- Decrease if overfitting (loss increases on eval)
|
||||
|
||||
**LoRA Rank:**
|
||||
- `8` - Quick experiments
|
||||
- `16` - Balanced (recommended)
|
||||
- `32-64` - High capacity, more VRAM
|
||||
|
||||
**Overfitting Signs:**
|
||||
- Training loss decreasing, eval loss increasing
|
||||
- Model repeats exact phrases from training data
|
||||
- Responses feel "memorized" not "learned"
|
||||
|
||||
## VRAM Usage (RTX 5070, 12GB)
|
||||
|
||||
| Config | VRAM | Batch Size |
|
||||
|--------|------|------------|
|
||||
| Rank 16, 8-bit adam | ~10GB | 4 |
|
||||
| Rank 32, 8-bit adam | ~11GB | 4 |
|
||||
| Rank 64, 8-bit adam | OOM | - |
|
||||
|
||||
Use `gradient_accumulation_steps` to increase effective batch size.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
**CUDA Out of Memory**
|
||||
- Reduce `lora_rank` to 8
|
||||
- Reduce `batch_size` to 2
|
||||
- Increase `gradient_accumulation_steps`
|
||||
|
||||
**Training Loss Not Decreasing**
|
||||
- Check data quality (reflections present?)
|
||||
- Increase learning rate to 5e-4
|
||||
- Check for data formatting issues
|
||||
|
||||
**Model Not Loading After Reload**
|
||||
- Check path exists: `ls -la ~/.companion/models/`
|
||||
- Verify model format (GGUF vs HF)
|
||||
- Check API logs for errors
|
||||
|
||||
**Slow Training**
|
||||
- Expected: ~6 hours for 3 epochs on RTX 5070
|
||||
- Enable gradient checkpointing (enabled by default)
|
||||
- Close other GPU applications
|
||||
|
||||
## Advanced: Custom Training Script
|
||||
|
||||
```python
|
||||
# custom_train.py
|
||||
from companion.forge.train import train
|
||||
from companion.config import load_config
|
||||
|
||||
config = load_config()
|
||||
|
||||
final_path = train(
|
||||
data_path=config.model.fine_tuning.training_data_path / "curated.jsonl",
|
||||
output_dir=config.model.fine_tuning.output_dir,
|
||||
base_model=config.model.fine_tuning.base_model,
|
||||
lora_rank=32, # Higher capacity
|
||||
lora_alpha=64,
|
||||
learning_rate=3e-4, # Slightly higher
|
||||
num_epochs=5, # More epochs
|
||||
batch_size=2, # Smaller batches
|
||||
gradient_accumulation_steps=8, # Effective batch = 16
|
||||
)
|
||||
|
||||
print(f"Model saved to: {final_path}")
|
||||
```
|
||||
269
docs/rag.md
Normal file
269
docs/rag.md
Normal file
@@ -0,0 +1,269 @@
|
||||
# RAG Module Documentation
|
||||
|
||||
The RAG (Retrieval-Augmented Generation) module provides semantic search over your Obsidian vault. It handles document chunking, embedding generation, and vector similarity search.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Vault Markdown Files
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ Chunker │ - Split by strategy (sliding window / section)
|
||||
│ (chunker.py) │ - Extract metadata (tags, dates, sections)
|
||||
└────────┬────────┘
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ Embedder │ - HTTP client for Ollama API
|
||||
│ (embedder.py) │ - Batch processing with retries
|
||||
└────────┬────────┘
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ Vector Store │ - LanceDB persistence
|
||||
│(vector_store.py)│ - Upsert, delete, search
|
||||
└────────┬────────┘
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ Indexer │ - Full/incremental sync
|
||||
│ (indexer.py) │ - File watching
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### Chunker (`companion.rag.chunker`)
|
||||
|
||||
Splits markdown files into searchable chunks.
|
||||
|
||||
```python
|
||||
from companion.rag.chunker import chunk_file, ChunkingRule
|
||||
|
||||
rules = {
|
||||
"default": ChunkingRule(strategy="sliding_window", chunk_size=500, chunk_overlap=100),
|
||||
"Journal/**": ChunkingRule(strategy="section", section_tags=["#DayInShort"], chunk_size=300, chunk_overlap=50),
|
||||
}
|
||||
|
||||
chunks = chunk_file(
|
||||
file_path=Path("journal/2026-04-12.md"),
|
||||
vault_root=Path("~/vault"),
|
||||
rules=rules,
|
||||
modified_at=1234567890.0,
|
||||
)
|
||||
|
||||
for chunk in chunks:
|
||||
print(f"{chunk.source_file}:{chunk.chunk_index}")
|
||||
print(f"Text: {chunk.text[:100]}...")
|
||||
print(f"Tags: {chunk.tags}")
|
||||
print(f"Date: {chunk.date}")
|
||||
```
|
||||
|
||||
#### Chunking Strategies
|
||||
|
||||
**Sliding Window**
|
||||
- Fixed-size chunks with overlap
|
||||
- Best for: Longform text, articles
|
||||
|
||||
```python
|
||||
ChunkingRule(
|
||||
strategy="sliding_window",
|
||||
chunk_size=500, # words per chunk
|
||||
chunk_overlap=100, # words overlap between chunks
|
||||
)
|
||||
```
|
||||
|
||||
**Section-Based**
|
||||
- Split on section headers (tags)
|
||||
- Best for: Structured journals, daily notes
|
||||
|
||||
```python
|
||||
ChunkingRule(
|
||||
strategy="section",
|
||||
section_tags=["#DayInShort", "#mentalhealth", "#work"],
|
||||
chunk_size=300,
|
||||
chunk_overlap=50,
|
||||
)
|
||||
```
|
||||
|
||||
#### Metadata Extraction
|
||||
|
||||
Each chunk includes:
|
||||
- `source_file` - Relative path from vault root
|
||||
- `source_directory` - Top-level directory
|
||||
- `section` - Section header (for section strategy)
|
||||
- `date` - Parsed from filename
|
||||
- `tags` - Hashtags and wikilinks
|
||||
- `chunk_index` - Position in document
|
||||
- `modified_at` - File mtime for sync
|
||||
|
||||
### Embedder (`companion.rag.embedder`)
|
||||
|
||||
Generates embeddings via Ollama API.
|
||||
|
||||
```python
|
||||
from companion.rag.embedder import OllamaEmbedder
|
||||
|
||||
embedder = OllamaEmbedder(
|
||||
base_url="http://localhost:11434",
|
||||
model="mxbai-embed-large",
|
||||
batch_size=32,
|
||||
)
|
||||
|
||||
# Single embedding
|
||||
embeddings = embedder.embed(["Hello world"])
|
||||
print(len(embeddings[0])) # 1024 dimensions
|
||||
|
||||
# Batch embedding (with automatic batching)
|
||||
texts = ["text 1", "text 2", "text 3", ...] # 100 texts
|
||||
embeddings = embedder.embed(texts) # Automatically batches
|
||||
```
|
||||
|
||||
#### Features
|
||||
|
||||
- **Batching**: Automatically splits large requests
|
||||
- **Retries**: Exponential backoff on failures
|
||||
- **Context Manager**: Proper resource cleanup
|
||||
|
||||
```python
|
||||
with OllamaEmbedder(...) as embedder:
|
||||
embeddings = embedder.embed(texts)
|
||||
```
|
||||
|
||||
### Vector Store (`companion.rag.vector_store`)
|
||||
|
||||
LanceDB wrapper for vector storage.
|
||||
|
||||
```python
|
||||
from companion.rag.vector_store import VectorStore
|
||||
|
||||
store = VectorStore(
|
||||
uri="~/.companion/vectors.lance",
|
||||
dimensions=1024,
|
||||
)
|
||||
|
||||
# Upsert chunks
|
||||
store.upsert(
|
||||
ids=["file.md::0", "file.md::1"],
|
||||
texts=["chunk 1", "chunk 2"],
|
||||
embeddings=[[0.1, ...], [0.2, ...]],
|
||||
metadatas=[
|
||||
{"source_file": "file.md", "source_directory": "docs"},
|
||||
{"source_file": "file.md", "source_directory": "docs"},
|
||||
],
|
||||
)
|
||||
|
||||
# Search
|
||||
results = store.search(
|
||||
query_vector=[0.1, ...],
|
||||
top_k=8,
|
||||
filters={"source_directory": "Journal"},
|
||||
)
|
||||
```
|
||||
|
||||
#### Schema
|
||||
|
||||
| Field | Type | Nullable |
|
||||
|-------|------|----------|
|
||||
| id | string | No |
|
||||
| text | string | No |
|
||||
| vector | list[float32] | No |
|
||||
| source_file | string | No |
|
||||
| source_directory | string | No |
|
||||
| section | string | Yes |
|
||||
| date | string | Yes |
|
||||
| tags | list[string] | Yes |
|
||||
| chunk_index | int32 | No |
|
||||
| total_chunks | int32 | No |
|
||||
| modified_at | float64 | Yes |
|
||||
| rule_applied | string | No |
|
||||
|
||||
### Indexer (`companion.rag.indexer`)
|
||||
|
||||
Orchestrates vault indexing.
|
||||
|
||||
```python
|
||||
from companion.config import load_config
|
||||
from companion.rag.indexer import Indexer
|
||||
from companion.rag.vector_store import VectorStore
|
||||
|
||||
config = load_config()
|
||||
store = VectorStore(
|
||||
uri=config.rag.vector_store.path,
|
||||
dimensions=config.rag.embedding.dimensions,
|
||||
)
|
||||
|
||||
indexer = Indexer(config, store)
|
||||
|
||||
# Full reindex (clear + rebuild)
|
||||
indexer.full_index()
|
||||
|
||||
# Incremental sync (only changed files)
|
||||
indexer.sync()
|
||||
|
||||
# Get status
|
||||
status = indexer.status()
|
||||
print(f"Total chunks: {status['total_chunks']}")
|
||||
print(f"Unindexed files: {status['unindexed_files']}")
|
||||
```
|
||||
|
||||
### Search (`companion.rag.search`)
|
||||
|
||||
High-level search interface.
|
||||
|
||||
```python
|
||||
from companion.rag.search import SearchEngine
|
||||
|
||||
engine = SearchEngine(
|
||||
vector_store=store,
|
||||
embedder_base_url="http://localhost:11434",
|
||||
embedder_model="mxbai-embed-large",
|
||||
default_top_k=8,
|
||||
similarity_threshold=0.75,
|
||||
hybrid_search_enabled=False,
|
||||
)
|
||||
|
||||
results = engine.search(
|
||||
query="What did I learn about friendships?",
|
||||
top_k=8,
|
||||
filters={"source_directory": "Journal"},
|
||||
)
|
||||
|
||||
for result in results:
|
||||
print(f"Source: {result['source_file']}")
|
||||
print(f"Relevance: {1 - result['_distance']:.2f}")
|
||||
```
|
||||
|
||||
## CLI Commands
|
||||
|
||||
```bash
|
||||
# Full index
|
||||
python -m companion.indexer_daemon.cli index
|
||||
|
||||
# Incremental sync
|
||||
python -m companion.indexer_daemon.cli sync
|
||||
|
||||
# Check status
|
||||
python -m companion.indexer_daemon.cli status
|
||||
|
||||
# Reindex (same as index)
|
||||
python -m companion.indexer_daemon.cli reindex
|
||||
```
|
||||
|
||||
## Performance Tips
|
||||
|
||||
1. **Chunk Size**: Smaller chunks = better retrieval, larger = more context
|
||||
2. **Batch Size**: 32 is optimal for Ollama embeddings
|
||||
3. **Filters**: Use directory filters to narrow search scope
|
||||
4. **Sync vs Index**: Use `sync` for daily updates, `index` for full rebuilds
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
**Slow indexing**
|
||||
- Check Ollama is running: `ollama ps`
|
||||
- Reduce batch size if OOM
|
||||
|
||||
**No results**
|
||||
- Verify vault path in config
|
||||
- Check `indexer.status()` for unindexed files
|
||||
|
||||
**Duplicate chunks**
|
||||
- Each chunk ID is `{source_file}::{chunk_index}`
|
||||
- Use `full_index()` to clear and rebuild
|
||||
408
docs/ui.md
Normal file
408
docs/ui.md
Normal file
@@ -0,0 +1,408 @@
|
||||
# UI Module Documentation
|
||||
|
||||
The UI is a React + Vite frontend for the companion chat interface. It provides real-time streaming chat with a clean, Obsidian-inspired dark theme.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
HTTP/SSE
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ App.tsx │ - State management
|
||||
│ Message state │ - User/assistant messages
|
||||
└────────┬────────┘
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ MessageList │ - Render messages
|
||||
│ (components/) │ - User/assistant styling
|
||||
└─────────────────┘
|
||||
┌─────────────────┐
|
||||
│ ChatInput │ - Textarea + send
|
||||
│ (components/) │ - Auto-resize, hotkeys
|
||||
└─────────────────┘
|
||||
↓
|
||||
┌─────────────────┐
|
||||
│ useChatStream │ - SSE streaming
|
||||
│ (hooks/) │ - Session management
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
ui/
|
||||
├── src/
|
||||
│ ├── main.tsx # React entry point
|
||||
│ ├── App.tsx # Main app component
|
||||
│ ├── App.css # App layout styles
|
||||
│ ├── index.css # Global styles
|
||||
│ ├── components/
|
||||
│ │ ├── MessageList.tsx # Message display
|
||||
│ │ ├── MessageList.css # Message styling
|
||||
│ │ ├── ChatInput.tsx # Input textarea
|
||||
│ │ └── ChatInput.css # Input styling
|
||||
│ └── hooks/
|
||||
│ └── useChatStream.ts # SSE streaming hook
|
||||
├── index.html # HTML template
|
||||
├── vite.config.ts # Vite configuration
|
||||
├── tsconfig.json # TypeScript config
|
||||
└── package.json # Dependencies
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### App.tsx
|
||||
|
||||
Main application state management:
|
||||
|
||||
```typescript
|
||||
interface Message {
|
||||
role: 'user' | 'assistant'
|
||||
content: string
|
||||
}
|
||||
|
||||
// State
|
||||
const [messages, setMessages] = useState<Message[]>([])
|
||||
const [input, setInput] = useState('')
|
||||
const [isLoading, setIsLoading] = useState(false)
|
||||
|
||||
// Handlers
|
||||
const handleSend = async () => { /* ... */ }
|
||||
const handleKeyDown = (e) => { /* Enter to send, Shift+Enter newline */ }
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Auto-scroll to bottom on new messages
|
||||
- Keyboard shortcuts (Enter to send, Shift+Enter for newline)
|
||||
- Loading state with animation
|
||||
- Message streaming in real-time
|
||||
|
||||
### MessageList.tsx
|
||||
|
||||
Renders the chat history:
|
||||
|
||||
```typescript
|
||||
interface MessageListProps {
|
||||
messages: Message[]
|
||||
isLoading: boolean
|
||||
}
|
||||
```
|
||||
|
||||
**Layout:**
|
||||
- User messages: Right-aligned, blue background
|
||||
- Assistant messages: Left-aligned, gray background with border
|
||||
- Loading indicator: Three animated dots
|
||||
- Empty state: Prompt text when no messages
|
||||
|
||||
**Styling:**
|
||||
- Max-width 800px, centered
|
||||
- Smooth scroll behavior
|
||||
- Avatar-less design (clean, text-focused)
|
||||
|
||||
### ChatInput.tsx
|
||||
|
||||
Textarea input with send button:
|
||||
|
||||
```typescript
|
||||
interface ChatInputProps {
|
||||
value: string
|
||||
onChange: (value: string) => void
|
||||
onSend: () => void
|
||||
onKeyDown: (e: KeyboardEvent) => void
|
||||
disabled: boolean
|
||||
}
|
||||
```
|
||||
|
||||
**Features:**
|
||||
- Auto-resizing textarea
|
||||
- Send button with loading state
|
||||
- Placeholder text
|
||||
- Disabled during streaming
|
||||
|
||||
## Hooks
|
||||
|
||||
### useChatStream.ts
|
||||
|
||||
Manages SSE streaming connection:
|
||||
|
||||
```typescript
|
||||
interface UseChatStreamReturn {
|
||||
sendMessage: (
|
||||
message: string,
|
||||
onChunk: (chunk: string) => void
|
||||
) => Promise<void>
|
||||
sessionId: string | null
|
||||
}
|
||||
|
||||
const { sendMessage, sessionId } = useChatStream()
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```typescript
|
||||
await sendMessage("Hello", (chunk) => {
|
||||
// Append chunk to current response
|
||||
setMessages(prev => {
|
||||
const last = prev[prev.length - 1]
|
||||
if (last?.role === 'assistant') {
|
||||
last.content += chunk
|
||||
return [...prev]
|
||||
}
|
||||
return [...prev, { role: 'assistant', content: chunk }]
|
||||
})
|
||||
})
|
||||
```
|
||||
|
||||
**SSE Protocol:**
|
||||
|
||||
The API streams events in this format:
|
||||
|
||||
```
|
||||
data: {"type": "chunk", "content": "Hello"}
|
||||
|
||||
data: {"type": "chunk", "content": " world"}
|
||||
|
||||
data: {"type": "sources", "sources": [{"file": "journal.md"}]}
|
||||
|
||||
data: {"type": "done", "session_id": "uuid"}
|
||||
```
|
||||
|
||||
## Styling
|
||||
|
||||
### Design System
|
||||
|
||||
Based on Obsidian's dark theme:
|
||||
|
||||
```css
|
||||
:root {
|
||||
--bg-primary: #0d1117; /* App background */
|
||||
--bg-secondary: #161b22; /* Header/footer */
|
||||
--bg-tertiary: #21262d; /* Input background */
|
||||
|
||||
--text-primary: #c9d1d9; /* Main text */
|
||||
--text-secondary: #8b949e; /* Placeholder */
|
||||
|
||||
--accent-primary: #58a6ff; /* Primary blue */
|
||||
--accent-secondary: #79c0ff;/* Lighter blue */
|
||||
|
||||
--border: #30363d; /* Borders */
|
||||
--user-bg: #1f6feb; /* User message */
|
||||
--assistant-bg: #21262d; /* Assistant message */
|
||||
}
|
||||
```
|
||||
|
||||
### Message Styling
|
||||
|
||||
**User Message:**
|
||||
- Blue background (`--user-bg`)
|
||||
- White text
|
||||
- Border radius: 12px (12px 12px 4px 12px)
|
||||
- Max-width: 80%
|
||||
|
||||
**Assistant Message:**
|
||||
- Gray background (`--assistant-bg`)
|
||||
- Light text (`--text-primary`)
|
||||
- Border: 1px solid `--border`
|
||||
- Border radius: 12px (12px 12px 12px 4px)
|
||||
|
||||
### Loading Animation
|
||||
|
||||
Three bouncing dots using CSS keyframes:
|
||||
|
||||
```css
|
||||
@keyframes bounce {
|
||||
0%, 80%, 100% { transform: scale(0.6); }
|
||||
40% { transform: scale(1); }
|
||||
}
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
### Setup
|
||||
|
||||
```bash
|
||||
cd ui
|
||||
npm install
|
||||
```
|
||||
|
||||
### Dev Server
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
# Opens http://localhost:5173
|
||||
```
|
||||
|
||||
### Build
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
# Output: ui/dist/
|
||||
```
|
||||
|
||||
### Preview Production Build
|
||||
|
||||
```bash
|
||||
npm run preview
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Vite Config
|
||||
|
||||
`vite.config.ts`:
|
||||
|
||||
```typescript
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
server: {
|
||||
port: 5173,
|
||||
proxy: {
|
||||
'/api': {
|
||||
target: 'http://localhost:7373',
|
||||
changeOrigin: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
```
|
||||
|
||||
**Proxy Setup:**
|
||||
- Frontend: `http://localhost:5173`
|
||||
- API: `http://localhost:7373`
|
||||
- `/api/*` → `http://localhost:7373/api/*`
|
||||
|
||||
This allows using relative API paths in the code:
|
||||
|
||||
```typescript
|
||||
const API_BASE = '/api' // Not http://localhost:7373/api
|
||||
```
|
||||
|
||||
## TypeScript
|
||||
|
||||
### Types
|
||||
|
||||
```typescript
|
||||
// Message role
|
||||
type Role = 'user' | 'assistant'
|
||||
|
||||
// Message object
|
||||
interface Message {
|
||||
role: Role
|
||||
content: string
|
||||
}
|
||||
|
||||
// Chat request
|
||||
type ChatRequest = {
|
||||
message: string
|
||||
session_id?: string
|
||||
temperature?: number
|
||||
}
|
||||
|
||||
// SSE chunk
|
||||
type ChunkEvent = {
|
||||
type: 'chunk'
|
||||
content: string
|
||||
}
|
||||
|
||||
type SourcesEvent = {
|
||||
type: 'sources'
|
||||
sources: Array<{
|
||||
file: string
|
||||
section?: string
|
||||
date?: string
|
||||
}>
|
||||
}
|
||||
|
||||
type DoneEvent = {
|
||||
type: 'done'
|
||||
session_id: string
|
||||
}
|
||||
```
|
||||
|
||||
## API Integration
|
||||
|
||||
### Chat Endpoint
|
||||
|
||||
```typescript
|
||||
const response = await fetch('/api/chat', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
message: userInput,
|
||||
session_id: sessionId, // null for new session
|
||||
stream: true,
|
||||
}),
|
||||
})
|
||||
|
||||
// Read SSE stream
|
||||
const reader = response.body?.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
|
||||
const chunk = decoder.decode(value, { stream: true })
|
||||
// Parse SSE lines
|
||||
}
|
||||
```
|
||||
|
||||
### Session Persistence
|
||||
|
||||
The backend maintains conversation history via `session_id`:
|
||||
|
||||
1. First message: `session_id: null` → backend creates UUID
|
||||
2. Response header: `X-Session-ID: <uuid>`
|
||||
3. Subsequent messages: include `session_id: <uuid>`
|
||||
4. History retrieved automatically
|
||||
|
||||
## Customization
|
||||
|
||||
### Themes
|
||||
|
||||
Modify `App.css` and `index.css`:
|
||||
|
||||
```css
|
||||
/* Custom accent color */
|
||||
--accent-primary: #ff6b6b;
|
||||
--user-bg: #ff6b6b;
|
||||
```
|
||||
|
||||
### Fonts
|
||||
|
||||
Update `index.css`:
|
||||
|
||||
```css
|
||||
body {
|
||||
font-family: 'Inter', -apple-system, sans-serif;
|
||||
}
|
||||
```
|
||||
|
||||
### Message Layout
|
||||
|
||||
Modify `MessageList.css`:
|
||||
|
||||
```css
|
||||
.message-content {
|
||||
max-width: 90%; /* Wider messages */
|
||||
font-size: 16px; /* Larger text */
|
||||
}
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
**CORS errors**
|
||||
- Check `vite.config.ts` proxy configuration
|
||||
- Verify backend CORS origins include `http://localhost:5173`
|
||||
|
||||
**Stream not updating**
|
||||
- Check browser network tab for SSE events
|
||||
- Verify `EventSourceResponse` from backend
|
||||
|
||||
**Messages not appearing**
|
||||
- Check React DevTools for state updates
|
||||
- Verify `messages` array is being mutated correctly
|
||||
|
||||
**Build fails**
|
||||
- Check TypeScript errors: `npx tsc --noEmit`
|
||||
- Update dependencies: `npm update`
|
||||
Reference in New Issue
Block a user