feat(indexer): hierarchical chunking for large sections

- Section-split first for structured notes
- Large sections (>max_section_chars) broken via sliding-window
- Small sections stay intact with heading preserved
- Adds max_section_chars config (default 4000)
- 2 new TDD tests for hierarchical chunking
This commit is contained in:
2026-04-11 23:58:05 -04:00
parent a744c0c566
commit 34f3ce97f7
5 changed files with 88 additions and 21 deletions

View File

@@ -3,7 +3,6 @@
from __future__ import annotations
import json
import os
from enum import Enum
from dataclasses import dataclass, field
from pathlib import Path
@@ -32,6 +31,7 @@ class VectorStoreConfig:
class IndexingConfig:
chunk_size: int = 500
chunk_overlap: int = 100
max_section_chars: int = 4000
file_patterns: list[str] = field(default_factory=lambda: ["*.md"])
deny_dirs: list[str] = field(
default_factory=lambda: [