169 lines
5.5 KiB
Python
169 lines
5.5 KiB
Python
"""Configuration loader — reads ~/.obsidian-rag/config.json (or ./obsidian-rag/ for dev)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from enum import Enum
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
DEFAULT_CONFIG_DIR = Path(__file__).parent.parent.parent # python/ → project root
|
|
|
|
|
|
@dataclass
|
|
class EmbeddingConfig:
|
|
provider: str = "ollama"
|
|
model: str = "mxbai-embed-large"
|
|
base_url: str = "http://localhost:11434"
|
|
dimensions: int = 1024
|
|
batch_size: int = 64
|
|
|
|
|
|
@dataclass
|
|
class VectorStoreConfig:
|
|
type: str = "lancedb"
|
|
path: str = "" # resolved relative to data_dir
|
|
|
|
|
|
@dataclass
|
|
class IndexingConfig:
|
|
chunk_size: int = 500
|
|
chunk_overlap: int = 100
|
|
file_patterns: list[str] = field(default_factory=lambda: ["*.md"])
|
|
deny_dirs: list[str] = field(
|
|
default_factory=lambda: [
|
|
".obsidian",
|
|
".trash",
|
|
"zzz-Archive",
|
|
".git",
|
|
".logseq",
|
|
]
|
|
)
|
|
allow_dirs: list[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class SecurityConfig:
|
|
require_confirmation_for: list[str] = field(
|
|
default_factory=lambda: ["health", "financial_debt"]
|
|
)
|
|
sensitive_sections: list[str] = field(
|
|
default_factory=lambda: ["#mentalhealth", "#physicalhealth", "#Relations"]
|
|
)
|
|
local_only: bool = True
|
|
auto_approve_sensitive: bool = False
|
|
|
|
|
|
@dataclass
|
|
class MemoryConfig:
|
|
auto_suggest: bool = True
|
|
patterns: dict[str, list[str]] = field(
|
|
default_factory=lambda: {
|
|
"financial": ["owe", "owed", "debt", "paid", "$", "spent", "spend"],
|
|
"health": ["#mentalhealth", "#physicalhealth", "medication", "therapy"],
|
|
"commitments": ["shopping list", "costco", "amazon", "grocery"],
|
|
}
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class ObsidianRagConfig:
|
|
vault_path: str = ""
|
|
embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
|
|
vector_store: VectorStoreConfig = field(default_factory=VectorStoreConfig)
|
|
indexing: IndexingConfig = field(default_factory=IndexingConfig)
|
|
security: SecurityConfig = field(default_factory=SecurityConfig)
|
|
memory: MemoryConfig = field(default_factory=MemoryConfig)
|
|
|
|
|
|
def _resolve_data_dir() -> Path:
|
|
"""Resolve the data directory: dev (project root/obsidian-rag/) or production (~/.obsidian-rag/)."""
|
|
import os as osmod
|
|
|
|
# Use cwd for dev detection to handle pip install scenarios
|
|
cwd = Path(osmod.getcwd())
|
|
dev_data_dir = cwd / "obsidian-rag"
|
|
if dev_data_dir.exists() or (cwd / "KnowledgeVault").exists():
|
|
return dev_data_dir
|
|
# Production: ~/.obsidian-rag/
|
|
return Path(osmod.path.expanduser("~/.obsidian-rag"))
|
|
|
|
|
|
def load_config(config_path: str | Path | None = None) -> ObsidianRagConfig:
|
|
"""Load config from JSON file, falling back to dev/default config."""
|
|
if config_path is None:
|
|
config_path = _resolve_data_dir() / "config.json"
|
|
else:
|
|
config_path = Path(config_path)
|
|
|
|
if not config_path.exists():
|
|
raise FileNotFoundError(f"Config file not found: {config_path}")
|
|
|
|
with open(config_path) as f:
|
|
raw: dict[str, Any] = json.load(f)
|
|
|
|
return ObsidianRagConfig(
|
|
vault_path=raw.get("vault_path", ""),
|
|
embedding=_merge(EmbeddingConfig(), raw.get("embedding", {})),
|
|
vector_store=_merge(VectorStoreConfig(), raw.get("vector_store", {})),
|
|
indexing=_merge(IndexingConfig(), raw.get("indexing", {})),
|
|
security=_merge(SecurityConfig(), raw.get("security", {})),
|
|
memory=_merge(MemoryConfig(), raw.get("memory", {})),
|
|
)
|
|
|
|
|
|
def _merge(default: Any, overrides: dict[str, Any]) -> Any:
|
|
"""Shallow-merge a dict into a dataclass instance."""
|
|
if not isinstance(default, type) and not isinstance(
|
|
default, (list, dict, str, int, float, bool)
|
|
):
|
|
# It's a dataclass instance — merge fields
|
|
if hasattr(default, "__dataclass_fields__"):
|
|
fields = {}
|
|
for key, val in overrides.items():
|
|
if key in default.__dataclass_fields__:
|
|
field_def = default.__dataclass_fields__[key]
|
|
actual_default = field_def.default
|
|
if isinstance(actual_default, type) and issubclass(
|
|
actual_default, Enum
|
|
):
|
|
# Enum fields need special handling
|
|
fields[key] = val
|
|
elif isinstance(val, dict):
|
|
fields[key] = _merge(actual_default, val)
|
|
else:
|
|
fields[key] = val
|
|
else:
|
|
fields[key] = val
|
|
return default.__class__(**{**default.__dict__, **fields})
|
|
if isinstance(overrides, dict) and isinstance(default, dict):
|
|
return {**default, **overrides}
|
|
return overrides if overrides is not None else default
|
|
|
|
|
|
def resolve_vault_path(config: ObsidianRagConfig) -> Path:
|
|
"""Resolve vault_path relative to project root or as absolute."""
|
|
import os as osmod
|
|
|
|
cwd = Path(osmod.getcwd())
|
|
vp = Path(config.vault_path)
|
|
if vp.is_absolute():
|
|
return vp
|
|
# Resolve relative to project root
|
|
return (cwd / vp).resolve()
|
|
|
|
|
|
def resolve_vector_db_path(config: ObsidianRagConfig) -> Path:
|
|
"""Resolve vector store path relative to data directory."""
|
|
import os as osmod
|
|
|
|
cwd = Path(osmod.getcwd())
|
|
data_dir = cwd / "obsidian-rag"
|
|
vsp = Path(config.vector_store.path)
|
|
if vsp.is_absolute():
|
|
return vsp
|
|
return (data_dir / vsp).resolve()
|