kv-ai/src/companion/forge/cli.py

"""CLI for model forge operations."""

from __future__ import annotations

from pathlib import Path

import typer

from companion.config import load_config
from companion.forge.extract import TrainingDataExtractor
from companion.forge.reload import get_model_status, reload_model
from companion.forge.train import train as train_model

app = typer.Typer(help="Companion model forge - training pipeline")


@app.command()
def extract(
    output: Path = typer.Option(
        Path("~/.companion/training_data/extracted.jsonl"),
        help="Output JSONL file path",
    ),
) -> None:
    """Extract training examples from vault."""
    config = load_config()

    typer.echo("Scanning vault for reflection examples...")

    extractor = TrainingDataExtractor(config)
    examples = extractor.extract()

    if not examples:
        typer.echo("No reflection examples found in vault.")
        typer.echo(
            "Try adding tags like #reflection, #insight, or #learning to your notes."
        )
        raise typer.Exit(1)

    # Save to JSONL
    output = output.expanduser()
    output.parent.mkdir(parents=True, exist_ok=True)
    count = extractor.save_to_jsonl(output)

    stats = extractor.get_stats()

    typer.echo(f"\nExtracted {count} training examples:")
    typer.echo(f"  - Average length: {stats.get('avg_length', 0)} chars")
    if stats.get("top_tags"):
        typer.echo(
            f"  - Top tags: {', '.join(f'{tag}({cnt})' for tag, cnt in stats['top_tags'][:5])}"
        )
    typer.echo(f"\nSaved to: {output}")


@app.command()
def status() -> None:
    """Check model status."""
    config = load_config()

    model_status = get_model_status(config)

    typer.echo(f"Model Status:")
    typer.echo(f"  Path: {model_status['path']}")
    typer.echo(f"  Exists: {'Yes' if model_status['exists'] else 'No'}")
    if model_status["exists"]:
        typer.echo(f"  Type: {model_status['type']}")
        typer.echo(f"  Size: {model_status['size_mb']} MB")


@app.command()
def reload(
    model_path: Path = typer.Argument(
        ...,
        help="Path to new model directory or GGUF file",
    ),
    no_backup: bool = typer.Option(
        False,
        "--no-backup",
        help="Skip backing up current model",
    ),
) -> None:
    """Reload model with a new fine-tuned version."""
    config = load_config()

    model_path = model_path.expanduser()

    try:
        active_path = reload_model(config, model_path, backup=not no_backup)
        typer.echo(f"Model reloaded successfully: {active_path}")
    except FileNotFoundError as e:
        typer.echo(f"Error: {e}")
        raise typer.Exit(1)


@app.command()
def train(
    data: Path = typer.Option(
        Path("~/.companion/training_data/extracted.jsonl"),
        help="Path to training data JSONL",
    ),
    output: Path = typer.Option(
        Path("~/.companion/training"),
        help="Output directory for checkpoints",
    ),
    epochs: int = typer.Option(3, help="Number of training epochs"),
    lr: float = typer.Option(2e-4, help="Learning rate"),
) -> None:
    """Train model using QLoRA fine-tuning."""
    data = data.expanduser()
    output = output.expanduser()

    if not data.exists():
        typer.echo(f"Training data not found: {data}")
        typer.echo("Run 'forge extract' first to generate training data.")
        raise typer.Exit(1)

    try:
        final_path = train_model(
            data_path=data,
            output_dir=output,
            num_epochs=epochs,
            learning_rate=lr,
        )
        typer.echo(f"\nTraining complete! Model saved to: {final_path}")
        typer.echo("\nTo use this model:")
        typer.echo(f"  forge reload {final_path}")
    except Exception as e:
        typer.echo(f"Training failed: {e}")
        raise typer.Exit(1)


if __name__ == "__main__":
    app()