Sprint 0-1: Python indexer, TS plugin scaffolding, and test suite
## What's new **Python indexer (`python/obsidian_rag/`)** — full pipeline from scan to LanceDB: - `config.py` — JSON config loader with cross-platform path resolution - `security.py` — path traversal prevention, HTML stripping, sensitive content detection, dir allow/deny lists - `chunker.py` — section-split for journal entries (date-named files), sliding-window for unstructured notes - `embedder.py` — Ollama `/api/embeddings` client with batched requests and timeout/error handling - `vector_store.py` — LanceDB schema, upsert (merge_insert), delete, search with filters, stats - `indexer.py` — full/sync/reindex pipeline orchestrator with progress yields - `cli.py` — `index | sync | reindex | status` CLI commands **TypeScript plugin (`src/`)** — OpenClaw plugin scaffold: - `utils/` — config loader, TypeScript types, response envelope factory, LanceDB client - `services/` — health state machine (HEALTHY/DEGRADED/UNAVAILABLE), vault watcher with debounce/batching, indexer bridge (subprocess spawner) - `tools/` — 4 tool stubs: search, index, status, memory_store (OpenClaw wiring pending) - `index.ts` — plugin entry point with health probe + vault watcher startup **Config** (`obsidian-rag/config.json`, `openclaw.plugin.json`): - 627 files / 3764 chunks indexed in dev vault **Tests: 76 passing** - Python: 64 pytest tests (chunker, security, vector_store, config) - TypeScript: 12 vitest tests (lancedb client, response envelope) ## Bugs fixed - LanceDB `tags` column filter: `LIKE '%tag%'` → `list_contains(tags, 'tag')` (List<String> column) - LanceDB JS `db.list_tables()` returns `ListTablesResponse` object, not plain array - LanceDB JS result score field: `_score` → `_distance` - TypeScript regex literal with unescaped `/` in path-resolve regex - Python: `create_table_if_not_exists` identity check → name comparison Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
130
python/tests/unit/test_config.py
Normal file
130
python/tests/unit/test_config.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""Tests for obsidian_rag.config — loader, path resolution, defaults."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from obsidian_rag.config import (
|
||||
EmbeddingConfig,
|
||||
ObsidianRagConfig,
|
||||
load_config,
|
||||
resolve_vector_db_path,
|
||||
resolve_vault_path,
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Config loading
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_load_config_parses_valid_json(tmp_path: Path):
|
||||
config_path = tmp_path / "config.json"
|
||||
config_path.write_text(
|
||||
json.dumps({
|
||||
"vault_path": "/path/to/vault",
|
||||
"embedding": {"model": "custom-model:tag", "dimensions": 512},
|
||||
"vector_store": {"path": "/vectors/db"},
|
||||
})
|
||||
)
|
||||
config = load_config(config_path)
|
||||
assert config.vault_path == "/path/to/vault"
|
||||
assert config.embedding.model == "custom-model:tag"
|
||||
assert config.embedding.dimensions == 512 # overridden
|
||||
|
||||
|
||||
def test_load_config_missing_file_raises(tmp_path: Path):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_config(tmp_path / "nonexistent.json")
|
||||
|
||||
|
||||
def test_load_config_merges_partial_json(tmp_path: Path):
|
||||
config_path = tmp_path / "config.json"
|
||||
config_path.write_text(json.dumps({"vault_path": "/custom/vault"}))
|
||||
config = load_config(config_path)
|
||||
# Unspecified fields fall back to defaults
|
||||
assert config.vault_path == "/custom/vault"
|
||||
assert config.embedding.base_url == "http://localhost:11434" # default
|
||||
assert config.indexing.chunk_size == 500 # default
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# resolve_vault_path
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_resolve_vault_path_absolute():
|
||||
cfg = ObsidianRagConfig(vault_path="/absolute/vault")
|
||||
assert resolve_vault_path(cfg) == Path("/absolute/vault")
|
||||
|
||||
|
||||
def test_resolve_vault_path_relative_defaults_to_project_root():
|
||||
cfg = ObsidianRagConfig(vault_path="KnowledgeVault/Default")
|
||||
result = resolve_vault_path(cfg)
|
||||
# Should resolve relative to python/obsidian_rag/ → project root
|
||||
assert result.name == "Default"
|
||||
assert result.parent.name == "KnowledgeVault"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# resolve_vector_db_path
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_resolve_vector_db_path_string_absolute():
|
||||
"""VectorStoreConfig stores path as a string; Path objects should be converted first."""
|
||||
from obsidian_rag.config import VectorStoreConfig
|
||||
|
||||
# Using a string path (the actual usage)
|
||||
cfg = ObsidianRagConfig(vector_store=VectorStoreConfig(path="/my/vectors.lance"))
|
||||
result = resolve_vector_db_path(cfg)
|
||||
assert result == Path("/my/vectors.lance")
|
||||
|
||||
|
||||
def test_resolve_vector_db_path_string_relative(tmp_path: Path):
|
||||
"""Relative paths are resolved against the data directory."""
|
||||
import obsidian_rag.config as cfg_mod
|
||||
|
||||
# Set up data dir + vault marker (required by _resolve_data_dir)
|
||||
# Note: the dev data dir is "obsidian-rag" (without leading dot)
|
||||
data_dir = tmp_path / "obsidian-rag"
|
||||
data_dir.mkdir()
|
||||
(tmp_path / "KnowledgeVault").mkdir()
|
||||
vector_file = data_dir / "vectors.lance"
|
||||
vector_file.touch()
|
||||
|
||||
cfg = ObsidianRagConfig(vector_store=cfg_mod.VectorStoreConfig(path="vectors.lance"))
|
||||
orig = cfg_mod.DEFAULT_CONFIG_DIR
|
||||
cfg_mod.DEFAULT_CONFIG_DIR = tmp_path
|
||||
try:
|
||||
result = resolve_vector_db_path(cfg)
|
||||
finally:
|
||||
cfg_mod.DEFAULT_CONFIG_DIR = orig
|
||||
|
||||
# Resolves to data_dir / vectors.lance
|
||||
assert result.parent.name == "obsidian-rag" # dev dir is "obsidian-rag" (no leading dot)
|
||||
assert result.name == "vectors.lance"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Dataclass defaults
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_embedding_config_defaults():
|
||||
cfg = EmbeddingConfig()
|
||||
assert cfg.model == "mxbai-embed-large"
|
||||
assert cfg.dimensions == 1024
|
||||
assert cfg.batch_size == 64
|
||||
|
||||
|
||||
def test_security_config_defaults():
|
||||
from obsidian_rag.config import SecurityConfig
|
||||
|
||||
cfg = SecurityConfig()
|
||||
assert "#mentalhealth" in cfg.sensitive_sections
|
||||
assert "health" in cfg.require_confirmation_for
|
||||
Reference in New Issue
Block a user