Back to Journal
AI Architecture

How to Build RAG Pipeline Design Using Nestjs

Step-by-step tutorial for building RAG Pipeline Design with Nestjs, from project setup through deployment.

Muneer Puthiya Purayil 15 min read

NestJS provides an enterprise-grade framework for building RAG pipeline APIs in TypeScript. Its module system, dependency injection, and decorator-based architecture make it well-suited for the complex service composition that RAG pipelines require. This tutorial covers building a production RAG API with NestJS.

Project Setup

bash
1nest new rag-api
2cd rag-api
3npm install @anthropic-ai/sdk openai @qdrant/js-client-rest pdf-parse uuid
4npm install -D @types/uuid
5 

Module Architecture

1src/
2├── app.module.ts
3├── config/
4│ └── config.module.ts
5├── embedding/
6│ ├── embedding.module.ts
7│ └── embedding.service.ts
8├── chunker/
9│ ├── chunker.module.ts
10│ └── chunker.service.ts
11├── vector-store/
12│ ├── vector-store.module.ts
13│ └── vector-store.service.ts
14├── generator/
15│ ├── generator.module.ts
16│ └── generator.service.ts
17├── rag/
18│ ├── rag.module.ts
19│ ├── rag.controller.ts
20│ ├── rag.service.ts
21│ └── dto/
22│ ├── query.dto.ts
23│ └── ingest.dto.ts
24└── main.ts
25 

Configuration Module

typescript
1// src/config/config.module.ts
2import { Module, Global } from '@nestjs/common';
3import { ConfigModule as NestConfigModule, ConfigService } from '@nestjs/config';
4 
5export interface RAGConfig {
6 embeddingModel: string;
7 embeddingDimensions: number;
8 generationModel: string;
9 chunkSize: number;
10 chunkOverlap: number;
11 retrievalTopK: number;
12 maxGenerationTokens: number;
13 qdrantUrl: string;
14 qdrantApiKey: string;
15 collectionName: string;
16}
17 
18@Global()
19@Module({
20 imports: [
21 NestConfigModule.forRoot({
22 load: [() => ({
23 rag: {
24 embeddingModel: process.env.EMBEDDING_MODEL || 'text-embedding-3-small',
25 embeddingDimensions: parseInt(process.env.EMBEDDING_DIMENSIONS || '1536'),
26 generationModel: process.env.GENERATION_MODEL || 'claude-sonnet-4-5-20250514',
27 chunkSize: parseInt(process.env.CHUNK_SIZE || '512'),
28 chunkOverlap: parseInt(process.env.CHUNK_OVERLAP || '50'),
29 retrievalTopK: parseInt(process.env.RETRIEVAL_TOP_K || '5'),
30 maxGenerationTokens: parseInt(process.env.MAX_GENERATION_TOKENS || '1024'),
31 qdrantUrl: process.env.QDRANT_URL,
32 qdrantApiKey: process.env.QDRANT_API_KEY,
33 collectionName: process.env.COLLECTION_NAME || 'knowledge_base',
34 } satisfies RAGConfig,
35 })],
36 }),
37 ],
38 exports: [NestConfigModule],
39})
40export class AppConfigModule {}
41 

Chunker Service

typescript
1// src/chunker/chunker.service.ts
2import { Injectable } from '@nestjs/common';
3import { ConfigService } from '@nestjs/config';
4import { randomUUID } from 'node:crypto';
5 
6export interface Chunk {
7 id: string;
8 text: string;
9 metadata: {
10 documentId: string;
11 source: string;
12 section?: string;
13 chunkIndex: number;
14 };
15}
16 
17@Injectable()
18export class ChunkerService {
19 private maxTokens: number;
20 private overlap: number;
21 
22 constructor(config: ConfigService) {
23 this.maxTokens = config.get<number>('rag.chunkSize', 512);
24 this.overlap = config.get<number>('rag.chunkOverlap', 50);
25 }
26 
27 chunk(text: string, documentId: string, source: string): Chunk[] {
28 const sections = this.extractSections(text);
29 const chunks: Chunk[] = [];
30 
31 for (const section of sections) {
32 const prefix = `Section: ${section.header}\n\n`;
33 const sectionText = prefix + section.content;
34 
35 if (this.tokenCount(sectionText) <= this.maxTokens) {
36 chunks.push({
37 id: randomUUID(),
38 text: sectionText,
39 metadata: { documentId, source, section: section.header, chunkIndex: chunks.length },
40 });
41 } else {
42 const paragraphs = section.content.split('\n\n');
43 let current = prefix;
44 
45 for (const para of paragraphs) {
46 if (this.tokenCount(current + para) > this.maxTokens && current !== prefix) {
47 chunks.push({
48 id: randomUUID(),
49 text: current.trim(),
50 metadata: { documentId, source, section: section.header, chunkIndex: chunks.length },
51 });
52 current = prefix + para + '\n\n';
53 } else {
54 current += para + '\n\n';
55 }
56 }
57 
58 if (current.trim() !== prefix.trim()) {
59 chunks.push({
60 id: randomUUID(),
61 text: current.trim(),
62 metadata: { documentId, source, section: section.header, chunkIndex: chunks.length },
63 });
64 }
65 }
66 }
67 
68 return chunks;
69 }
70 
71 private extractSections(text: string): Array<{ header: string; content: string }> {
72 const sections: Array<{ header: string; content: string }> = [];
73 let currentHeader = 'Introduction';
74 let currentContent = '';
75 
76 for (const line of text.split('\n')) {
77 const match = line.match(/^#{1,3}\s+(.+)$/);
78 if (match) {
79 if (currentContent.trim()) {
80 sections.push({ header: currentHeader, content: currentContent.trim() });
81 }
82 currentHeader = match[1];
83 currentContent = '';
84 } else {
85 currentContent += line + '\n';
86 }
87 }
88 
89 if (currentContent.trim()) {
90 sections.push({ header: currentHeader, content: currentContent.trim() });
91 }
92 
93 return sections;
94 }
95 
96 private tokenCount(text: string): number {
97 return Math.ceil(text.split(/\s+/).length * 1.33);
98 }
99}
100 

Embedding Service

typescript
1// src/embedding/embedding.service.ts
2import { Injectable } from '@nestjs/common';
3import { ConfigService } from '@nestjs/config';
4import OpenAI from 'openai';
5 
6@Injectable()
7export class EmbeddingService {
8 private client: OpenAI;
9 private model: string;
10 
11 constructor(config: ConfigService) {
12 this.client = new OpenAI();
13 this.model = config.get<string>('rag.embeddingModel', 'text-embedding-3-small');
14 }
15 
16 async embed(texts: string[], batchSize = 64): Promise<number[][]> {
17 const embeddings: number[][] = [];
18 
19 for (let i = 0; i < texts.length; i += batchSize) {
20 const batch = texts.slice(i, i + batchSize);
21 const response = await this.client.embeddings.create({
22 model: this.model,
23 input: batch,
24 });
25 embeddings.push(...response.data.map(d => d.embedding));
26 }
27 
28 return embeddings;
29 }
30 
31 async embedQuery(query: string): Promise<number[]> {
32 const [embedding] = await this.embed([query]);
33 return embedding;
34 }
35}
36 

Need a second opinion on your AI systems architecture?

I run free 30-minute strategy calls for engineering teams tackling this exact problem.

Book a Free Call

Vector Store Service

typescript
1// src/vector-store/vector-store.service.ts
2import { Injectable, OnModuleInit } from '@nestjs/common';
3import { ConfigService } from '@nestjs/config';
4import { QdrantClient } from '@qdrant/js-client-rest';
5import type { Chunk } from '../chunker/chunker.service';
6 
7export interface SearchResult {
8 id: string;
9 text: string;
10 score: number;
11 source: string;
12 section?: string;
13}
14 
15@Injectable()
16export class VectorStoreService implements OnModuleInit {
17 private client: QdrantClient;
18 private collection: string;
19 private dimensions: number;
20 
21 constructor(config: ConfigService) {
22 this.client = new QdrantClient({
23 url: config.get<string>('rag.qdrantUrl'),
24 apiKey: config.get<string>('rag.qdrantApiKey'),
25 });
26 this.collection = config.get<string>('rag.collectionName', 'knowledge_base');
27 this.dimensions = config.get<number>('rag.embeddingDimensions', 1536);
28 }
29 
30 async onModuleInit() {
31 const collections = await this.client.getCollections();
32 const exists = collections.collections.some(c => c.name === this.collection);
33 if (!exists) {
34 await this.client.createCollection(this.collection, {
35 vectors: { size: this.dimensions, distance: 'Cosine' },
36 });
37 }
38 }
39 
40 async upsert(chunks: Chunk[], embeddings: number[][]): Promise<void> {
41 const points = chunks.map((chunk, i) => ({
42 id: chunk.id,
43 vector: embeddings[i],
44 payload: { text: chunk.text, ...chunk.metadata },
45 }));
46 
47 await this.client.upsert(this.collection, { points });
48 }
49 
50 async search(queryEmbedding: number[], topK: number, filters?: Record<string, string>): Promise<SearchResult[]> {
51 const queryFilter = filters
52 ? { must: Object.entries(filters).map(([key, value]) => ({ key, match: { value } })) }
53 : undefined;
54 
55 const results = await this.client.search(this.collection, {
56 vector: queryEmbedding,
57 limit: topK,
58 filter: queryFilter,
59 });
60 
61 return results.map(r => ({
62 id: String(r.id),
63 text: String(r.payload?.text ?? ''),
64 score: r.score,
65 source: String(r.payload?.source ?? 'Unknown'),
66 section: r.payload?.section as string | undefined,
67 }));
68 }
69}
70 

Generator Service

typescript
1// src/generator/generator.service.ts
2import { Injectable } from '@nestjs/common';
3import { ConfigService } from '@nestjs/config';
4import Anthropic from '@anthropic-ai/sdk';
5 
6@Injectable()
7export class GeneratorService {
8 private client: Anthropic;
9 private model: string;
10 private maxTokens: number;
11 
12 constructor(config: ConfigService) {
13 this.client = new Anthropic();
14 this.model = config.get<string>('rag.generationModel', 'claude-sonnet-4-5-20250514');
15 this.maxTokens = config.get<number>('rag.maxGenerationTokens', 1024);
16 }
17 
18 async generate(question: string, context: string) {
19 const response = await this.client.messages.create({
20 model: this.model,
21 max_tokens: this.maxTokens,
22 system: 'Answer using only the provided context. Cite your sources.',
23 messages: [
24 { role: 'user', content: `Context:\n${context}\n\nQuestion: ${question}` },
25 ],
26 });
27 
28 return {
29 text: response.content[0].type === 'text' ? response.content[0].text : '',
30 model: this.model,
31 tokens: response.usage.input_tokens + response.usage.output_tokens,
32 };
33 }
34}
35 

RAG Controller and Service

typescript
1// src/rag/rag.service.ts
2import { Injectable } from '@nestjs/common';
3import { ConfigService } from '@nestjs/config';
4import { EmbeddingService } from '../embedding/embedding.service';
5import { VectorStoreService, SearchResult } from '../vector-store/vector-store.service';
6import { GeneratorService } from '../generator/generator.service';
7import { ChunkerService } from '../chunker/chunker.service';
8import { randomUUID } from 'node:crypto';
9 
10@Injectable()
11export class RAGService {
12 private topK: number;
13 
14 constructor(
15 config: ConfigService,
16 private embedder: EmbeddingService,
17 private vectorStore: VectorStoreService,
18 private generator: GeneratorService,
19 private chunker: ChunkerService,
20 ) {
21 this.topK = config.get<number>('rag.retrievalTopK', 5);
22 }
23 
24 async query(question: string, topK?: number, filters?: Record<string, string>) {
25 const startTime = Date.now();
26 
27 const queryEmbedding = await this.embedder.embedQuery(question);
28 const results = await this.vectorStore.search(queryEmbedding, topK ?? this.topK, filters);
29 const retrievalMs = Date.now() - startTime;
30 
31 if (results.length === 0) {
32 return {
33 answer: 'I could not find relevant information to answer this question.',
34 sources: [],
35 model: '',
36 tokens: 0,
37 retrievalTimeMs: retrievalMs,
38 generationTimeMs: 0,
39 };
40 }
41 
42 const context = this.buildContext(results);
43 const genStart = Date.now();
44 const response = await this.generator.generate(question, context);
45 const genMs = Date.now() - genStart;
46 
47 return {
48 answer: response.text,
49 sources: results.map(r => ({
50 text: r.text.slice(0, 200),
51 score: r.score,
52 source: r.source,
53 section: r.section,
54 })),
55 model: response.model,
56 tokens: response.tokens,
57 retrievalTimeMs: retrievalMs,
58 generationTimeMs: genMs,
59 };
60 }
61 
62 async ingest(content: string, source: string) {
63 const documentId = randomUUID();
64 const chunks = this.chunker.chunk(content, documentId, source);
65 const embeddings = await this.embedder.embed(chunks.map(c => c.text));
66 await this.vectorStore.upsert(chunks, embeddings);
67 return { documentId, chunksCreated: chunks.length, source };
68 }
69 
70 private buildContext(results: SearchResult[]): string {
71 return results
72 .map((r, i) => `[Source ${i + 1}: ${r.source}${r.section ? ` - ${r.section}` : ''}]\n${r.text}`)
73 .join('\n\n---\n\n');
74 }
75}
76 
typescript
1// src/rag/rag.controller.ts
2import { Controller, Post, Body } from '@nestjs/common';
3import { RAGService } from './rag.service';
4 
5class QueryDto {
6 question: string;
7 topK?: number;
8 filters?: Record<string, string>;
9}
10 
11class IngestDto {
12 content: string;
13 source: string;
14}
15 
16@Controller('api/rag')
17export class RAGController {
18 constructor(private ragService: RAGService) {}
19 
20 @Post('query')
21 async query(@Body() dto: QueryDto) {
22 return this.ragService.query(dto.question, dto.topK, dto.filters);
23 }
24 
25 @Post('ingest')
26 async ingest(@Body() dto: IngestDto) {
27 return this.ragService.ingest(dto.content, dto.source);
28 }
29}
30 

Module Wiring

typescript
1// src/rag/rag.module.ts
2import { Module } from '@nestjs/common';
3import { RAGController } from './rag.controller';
4import { RAGService } from './rag.service';
5import { EmbeddingModule } from '../embedding/embedding.module';
6import { VectorStoreModule } from '../vector-store/vector-store.module';
7import { GeneratorModule } from '../generator/generator.module';
8import { ChunkerModule } from '../chunker/chunker.module';
9 
10@Module({
11 imports: [EmbeddingModule, VectorStoreModule, GeneratorModule, ChunkerModule],
12 controllers: [RAGController],
13 providers: [RAGService],
14})
15export class RAGModule {}
16 

Conclusion

NestJS's module architecture maps naturally to RAG pipeline components. Each service (embedding, chunking, vector store, generation) lives in its own module with clear dependencies. The DI system ensures services are singletons by default, preventing unnecessary client instantiation. Guard decorators can add authentication and rate limiting without modifying business logic.

The type safety extends from DTOs through service interfaces to database payloads. When the retrieval response shape changes, TypeScript catches every downstream consumer that needs updating. This compile-time safety is particularly valuable in RAG pipelines where data flows through multiple transformation stages.

FAQ

Need expert help?

Building with agentic AI?

I help teams ship production-grade systems. From architecture review to hands-on builds.

Muneer Puthiya Purayil

SaaS Architect & AI Systems Engineer. 10+ years shipping production infrastructure across fintech, automotive, e-commerce, and healthcare.

Engage

Start a
Conversation.

For teams building at scale: SaaS platforms, agentic AI systems, and enterprise mobile infrastructure. Scope and fit are evaluated before any engagement begins.

Limited availability · Q3 / Q4 2026