RAG — chunking & overlap
RAGchunk.ts
export function chunk(text: string, size = 300, overlap = 50): string[] {
// Split on sentence boundaries so we never cut mid-thought.
const sentences = text.match(/[^.!?]+[.!?]+|\S+$/g) ?? [];
const chunks: string[] = [];
let current = "";
for (const s of sentences) {
if ((current + s).length > size && current) {
chunks.push(current.trim());
// Carry the tail of this chunk into the next one for context.
current = current.slice(-overlap) + s;
} else {
current += s;
}
}
if (current.trim()) chunks.push(current.trim());
return chunks;
}
0