RAG — chunking & overlap

RAG

chunk.ts

export function chunk(text: string, size = 300, overlap = 50): string[] {
  // Split on sentence boundaries so we never cut mid-thought.
  const sentences = text.match(/[^.!?]+[.!?]+|\S+$/g) ?? [];
  const chunks: string[] = [];
  let current = "";

  for (const s of sentences) {
    if ((current + s).length > size && current) {
      chunks.push(current.trim());
      // Carry the tail of this chunk into the next one for context.
      current = current.slice(-overlap) + s;
    } else {
      current += s;
    }
  }
  if (current.trim()) chunks.push(current.trim());
  return chunks;
}

0