feat(indexer): hierarchical chunking for large sections
- Section-split first for structured notes - Large sections (>max_section_chars) broken via sliding-window - Small sections stay intact with heading preserved - Adds max_section_chars config (default 4000) - 2 new TDD tests for hierarchical chunking
This commit is contained in:
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
@@ -32,6 +31,7 @@ class VectorStoreConfig:
|
||||
class IndexingConfig:
|
||||
chunk_size: int = 500
|
||||
chunk_overlap: int = 100
|
||||
max_section_chars: int = 4000
|
||||
file_patterns: list[str] = field(default_factory=lambda: ["*.md"])
|
||||
deny_dirs: list[str] = field(
|
||||
default_factory=lambda: [
|
||||
|
||||
Reference in New Issue
Block a user