134 lines
3.8 KiB
Python
134 lines
3.8 KiB
Python
"""CLI for model forge operations."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import typer
|
|
|
|
from companion.config import load_config
|
|
from companion.forge.extract import TrainingDataExtractor
|
|
from companion.forge.reload import get_model_status, reload_model
|
|
from companion.forge.train import train as train_model
|
|
|
|
app = typer.Typer(help="Companion model forge - training pipeline")
|
|
|
|
|
|
@app.command()
|
|
def extract(
|
|
output: Path = typer.Option(
|
|
Path("~/.companion/training_data/extracted.jsonl"),
|
|
help="Output JSONL file path",
|
|
),
|
|
) -> None:
|
|
"""Extract training examples from vault."""
|
|
config = load_config()
|
|
|
|
typer.echo("Scanning vault for reflection examples...")
|
|
|
|
extractor = TrainingDataExtractor(config)
|
|
examples = extractor.extract()
|
|
|
|
if not examples:
|
|
typer.echo("No reflection examples found in vault.")
|
|
typer.echo(
|
|
"Try adding tags like #reflection, #insight, or #learning to your notes."
|
|
)
|
|
raise typer.Exit(1)
|
|
|
|
# Save to JSONL
|
|
output = output.expanduser()
|
|
output.parent.mkdir(parents=True, exist_ok=True)
|
|
count = extractor.save_to_jsonl(output)
|
|
|
|
stats = extractor.get_stats()
|
|
|
|
typer.echo(f"\nExtracted {count} training examples:")
|
|
typer.echo(f" - Average length: {stats.get('avg_length', 0)} chars")
|
|
if stats.get("top_tags"):
|
|
typer.echo(
|
|
f" - Top tags: {', '.join(f'{tag}({cnt})' for tag, cnt in stats['top_tags'][:5])}"
|
|
)
|
|
typer.echo(f"\nSaved to: {output}")
|
|
|
|
|
|
@app.command()
|
|
def status() -> None:
|
|
"""Check model status."""
|
|
config = load_config()
|
|
|
|
model_status = get_model_status(config)
|
|
|
|
typer.echo(f"Model Status:")
|
|
typer.echo(f" Path: {model_status['path']}")
|
|
typer.echo(f" Exists: {'Yes' if model_status['exists'] else 'No'}")
|
|
if model_status["exists"]:
|
|
typer.echo(f" Type: {model_status['type']}")
|
|
typer.echo(f" Size: {model_status['size_mb']} MB")
|
|
|
|
|
|
@app.command()
|
|
def reload(
|
|
model_path: Path = typer.Argument(
|
|
...,
|
|
help="Path to new model directory or GGUF file",
|
|
),
|
|
no_backup: bool = typer.Option(
|
|
False,
|
|
"--no-backup",
|
|
help="Skip backing up current model",
|
|
),
|
|
) -> None:
|
|
"""Reload model with a new fine-tuned version."""
|
|
config = load_config()
|
|
|
|
model_path = model_path.expanduser()
|
|
|
|
try:
|
|
active_path = reload_model(config, model_path, backup=not no_backup)
|
|
typer.echo(f"Model reloaded successfully: {active_path}")
|
|
except FileNotFoundError as e:
|
|
typer.echo(f"Error: {e}")
|
|
raise typer.Exit(1)
|
|
|
|
|
|
@app.command()
|
|
def train(
|
|
data: Path = typer.Option(
|
|
Path("~/.companion/training_data/extracted.jsonl"),
|
|
help="Path to training data JSONL",
|
|
),
|
|
output: Path = typer.Option(
|
|
Path("~/.companion/training"),
|
|
help="Output directory for checkpoints",
|
|
),
|
|
epochs: int = typer.Option(3, help="Number of training epochs"),
|
|
lr: float = typer.Option(2e-4, help="Learning rate"),
|
|
) -> None:
|
|
"""Train model using QLoRA fine-tuning."""
|
|
data = data.expanduser()
|
|
output = output.expanduser()
|
|
|
|
if not data.exists():
|
|
typer.echo(f"Training data not found: {data}")
|
|
typer.echo("Run 'forge extract' first to generate training data.")
|
|
raise typer.Exit(1)
|
|
|
|
try:
|
|
final_path = train_model(
|
|
data_path=data,
|
|
output_dir=output,
|
|
num_epochs=epochs,
|
|
learning_rate=lr,
|
|
)
|
|
typer.echo(f"\nTraining complete! Model saved to: {final_path}")
|
|
typer.echo("\nTo use this model:")
|
|
typer.echo(f" forge reload {final_path}")
|
|
except Exception as e:
|
|
typer.echo(f"Training failed: {e}")
|
|
raise typer.Exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app()
|