Back to Journal
AI Architecture

Complete Guide to Vector Database Architecture with Typescript

A comprehensive guide to implementing Vector Database Architecture using Typescript, covering architecture, code examples, and production-ready patterns.

Muneer Puthiya Purayil 17 min read

TypeScript has become the default language for AI application development — not because it's the fastest for vector computation, but because most AI products are web applications. Your vector search pipeline lives alongside your Next.js routes, Express middleware, and React components. This guide covers building production vector search systems entirely in TypeScript, from embedding generation through semantic search APIs.

Setting Up Your Vector Search Stack

The TypeScript vector search stack typically looks like this:

1Embedding: OpenAI / Cohere SDK
2Vector Store: Pinecone / Qdrant / pgvector (via Prisma)
3API: Next.js App Router / Express / Hono
4ORM: Prisma / Drizzle
5 

Install the core dependencies:

bash
npm install openai @pinecone-database/pinecone @qdrant/js-client-rest npm install -D @types/node

Embedding Service

Build a typed embedding service with batching and caching:

typescript
1// lib/embeddings.ts
2import OpenAI from 'openai';
3import crypto from 'crypto';
4 
5const openai = new OpenAI();
6 
7interface EmbeddingOptions {
8 model?: string;
9 dimensions?: number;
10 batchSize?: number;
11}
12 
13const DEFAULT_OPTIONS: Required<EmbeddingOptions> = {
14 model: 'text-embedding-3-small',
15 dimensions: 1536,
16 batchSize: 100,
17};
18 
19// In-memory cache for development. Use Redis in production.
20const cache = new Map<string, number[]>();
21 
22function cacheKey(text: string, model: string): string {
23 return crypto
24 .createHash('sha256')
25 .update(`${model}:${text}`)
26 .digest('hex')
27 .slice(0, 16);
28}
29 
30export async function embed(
31 texts: string[],
32 options: EmbeddingOptions = {}
33): Promise<number[][]> {
34 const opts = { ...DEFAULT_OPTIONS, ...options };
35 const results: (number[] | null)[] = new Array(texts.length).fill(null);
36 const uncachedIndices: number[] = [];
37 const uncachedTexts: string[] = [];
38 
39 // Check cache
40 for (let i = 0; i < texts.length; i++) {
41 const key = cacheKey(texts[i], opts.model);
42 const cached = cache.get(key);
43 if (cached) {
44 results[i] = cached;
45 } else {
46 uncachedIndices.push(i);
47 uncachedTexts.push(texts[i]);
48 }
49 }
50 
51 // Batch embed uncached texts
52 for (let i = 0; i < uncachedTexts.length; i += opts.batchSize) {
53 const batch = uncachedTexts.slice(i, i + opts.batchSize);
54 
55 const response = await openai.embeddings.create({
56 input: batch,
57 model: opts.model,
58 dimensions: opts.dimensions,
59 });
60 
61 for (let j = 0; j < response.data.length; j++) {
62 const embedding = response.data[j].embedding;
63 const originalIndex = uncachedIndices[i + j];
64 results[originalIndex] = embedding;
65 
66 const key = cacheKey(texts[originalIndex], opts.model);
67 cache.set(key, embedding);
68 }
69 }
70 
71 return results as number[][];
72}
73 
74export async function embedSingle(
75 text: string,
76 options?: EmbeddingOptions
77): Promise<number[]> {
78 const [embedding] = await embed([text], options);
79 return embedding;
80}
81 

Document Chunking

typescript
1// lib/chunking.ts
2 
3interface Chunk {
4 text: string;
5 index: number;
6 metadata: Record<string, unknown>;
7}
8 
9interface ChunkOptions {
10 maxTokens?: number;
11 overlapTokens?: number;
12}
13 
14function estimateTokens(text: string): number {
15 return Math.ceil(text.length / 4);
16}
17 
18export function chunkDocument(
19 text: string,
20 docMetadata: Record<string, unknown>,
21 options: ChunkOptions = {}
22): Chunk[] {
23 const { maxTokens = 500, overlapTokens = 50 } = options;
24 
25 const sentences = text.split(/(?<=[.!?])\s+/);
26 const chunks: Chunk[] = [];
27 let currentSentences: string[] = [];
28 let currentTokens = 0;
29 
30 for (const sentence of sentences) {
31 const sentenceTokens = estimateTokens(sentence);
32 
33 if (currentTokens + sentenceTokens > maxTokens && currentSentences.length > 0) {
34 chunks.push({
35 text: currentSentences.join(' '),
36 index: chunks.length,
37 metadata: { ...docMetadata, chunkIndex: chunks.length },
38 });
39 
40 // Compute overlap
41 const overlapSentences: string[] = [];
42 let overlapCount = 0;
43 for (let i = currentSentences.length - 1; i >= 0; i--) {
44 const tokens = estimateTokens(currentSentences[i]);
45 if (overlapCount + tokens > overlapTokens) break;
46 overlapSentences.unshift(currentSentences[i]);
47 overlapCount += tokens;
48 }
49 
50 currentSentences = overlapSentences;
51 currentTokens = overlapCount;
52 }
53 
54 currentSentences.push(sentence);
55 currentTokens += sentenceTokens;
56 }
57 
58 if (currentSentences.length > 0) {
59 chunks.push({
60 text: currentSentences.join(' '),
61 index: chunks.length,
62 metadata: { ...docMetadata, chunkIndex: chunks.length },
63 });
64 }
65 
66 return chunks;
67}
68 

Vector Search with Pinecone

typescript
1// lib/vectordb/pinecone.ts
2import { Pinecone, type ScoredPineconeRecord } from '@pinecone-database/pinecone';
3 
4const pinecone = new Pinecone();
5 
6interface UpsertParams {
7 indexName: string;
8 namespace: string;
9 vectors: {
10 id: string;
11 values: number[];
12 metadata: Record<string, unknown>;
13 }[];
14}
15 
16export async function upsertVectors({
17 indexName,
18 namespace,
19 vectors,
20}: UpsertParams): Promise<void> {
21 const index = pinecone.index(indexName);
22 const ns = index.namespace(namespace);
23 
24 // Pinecone supports max 100 vectors per upsert
25 const BATCH_SIZE = 100;
26 for (let i = 0; i < vectors.length; i += BATCH_SIZE) {
27 const batch = vectors.slice(i, i + BATCH_SIZE);
28 await ns.upsert(batch);
29 }
30}
31 
32interface SearchParams {
33 indexName: string;
34 namespace: string;
35 vector: number[];
36 topK?: number;
37 filter?: Record<string, unknown>;
38}
39 
40interface SearchResult {
41 id: string;
42 score: number;
43 metadata: Record<string, unknown>;
44}
45 
46export async function search({
47 indexName,
48 namespace,
49 vector,
50 topK = 10,
51 filter,
52}: SearchParams): Promise<SearchResult[]> {
53 const index = pinecone.index(indexName);
54 const ns = index.namespace(namespace);
55 
56 const response = await ns.query({
57 vector,
58 topK,
59 filter,
60 includeMetadata: true,
61 });
62 
63 return (response.matches ?? []).map((match) => ({
64 id: match.id,
65 score: match.score ?? 0,
66 metadata: (match.metadata ?? {}) as Record<string, unknown>,
67 }));
68}
69 
70export async function deleteByDocId(
71 indexName: string,
72 namespace: string,
73 docId: string
74): Promise<void> {
75 const index = pinecone.index(indexName);
76 const ns = index.namespace(namespace);
77 
78 // Pinecone requires listing then deleting
79 const results = await ns.query({
80 vector: new Array(1536).fill(0), // dummy vector
81 topK: 10000,
82 filter: { docId: { $eq: docId } },
83 });
84 
85 const ids = (results.matches ?? []).map((m) => m.id);
86 if (ids.length > 0) {
87 await ns.deleteMany(ids);
88 }
89}
90 

Vector Search with pgvector and Prisma

For teams using PostgreSQL:

typescript
1// lib/vectordb/pgvector.ts
2import { PrismaClient } from '@prisma/client';
3 
4const prisma = new PrismaClient();
5 
6interface PgVectorSearchParams {
7 embedding: number[];
8 tenantId: string;
9 topK?: number;
10 threshold?: number;
11 docType?: string;
12}
13 
14interface PgVectorResult {
15 id: string;
16 content: string;
17 docId: string;
18 similarity: number;
19 metadata: Record<string, unknown>;
20}
21 
22export async function searchPgVector({
23 embedding,
24 tenantId,
25 topK = 10,
26 threshold = 0.7,
27 docType,
28}: PgVectorSearchParams): Promise<PgVectorResult[]> {
29 const vectorStr = `[${embedding.join(',')}]`;
30 
31 const docTypeFilter = docType
32 ? `AND doc_type = '${docType}'`
33 : '';
34 
35 const results = await prisma.$queryRawUnsafe<PgVectorResult[]>(`
36 SELECT
37 id,
38 content,
39 doc_id as "docId",
40 metadata,
41 1 - (embedding <=> $1::vector) as similarity
42 FROM document_chunks
43 WHERE tenant_id = $2
44 AND 1 - (embedding <=> $1::vector) > $3
45 ${docTypeFilter}
46 ORDER BY embedding <=> $1::vector
47 LIMIT $4
48 `, vectorStr, tenantId, threshold, topK);
49 
50 return results;
51}
52 
53export async function upsertChunks(
54 chunks: {
55 id: string;
56 docId: string;
57 tenantId: string;
58 content: string;
59 embedding: number[];
60 metadata: Record<string, unknown>;
61 chunkIndex: number;
62 }[]
63): Promise<void> {
64 // Delete existing chunks for the document
65 const docIds = [...new Set(chunks.map((c) => c.docId))];
66 for (const docId of docIds) {
67 await prisma.$executeRawUnsafe(
68 'DELETE FROM document_chunks WHERE doc_id = $1',
69 docId
70 );
71 }
72 
73 // Insert new chunks
74 for (const chunk of chunks) {
75 const vectorStr = `[${chunk.embedding.join(',')}]`;
76 await prisma.$executeRawUnsafe(
77 `INSERT INTO document_chunks
78 (id, doc_id, tenant_id, content, embedding, metadata, chunk_index)
79 VALUES ($1, $2, $3, $4, $5::vector, $6::jsonb, $7)`,
80 chunk.id,
81 chunk.docId,
82 chunk.tenantId,
83 chunk.content,
84 vectorStr,
85 JSON.stringify(chunk.metadata),
86 chunk.chunkIndex
87 );
88 }
89}
90 

Need a second opinion on your AI systems architecture?

I run free 30-minute strategy calls for engineering teams tackling this exact problem.

Book a Free Call

RAG Pipeline

Complete retrieval-augmented generation with streaming:

typescript
1// lib/rag.ts
2import OpenAI from 'openai';
3import { embedSingle } from './embeddings';
4 
5const openai = new OpenAI();
6 
7interface RAGOptions {
8 tenantId: string;
9 topK?: number;
10 model?: string;
11 systemPrompt?: string;
12}
13 
14interface RAGResponse {
15 answer: string;
16 sources: {
17 content: string;
18 score: number;
19 docId: string;
20 }[];
21 usage: {
22 promptTokens: number;
23 completionTokens: number;
24 };
25}
26 
27export async function ragQuery(
28 question: string,
29 searchFn: (embedding: number[], tenantId: string, topK: number) => Promise<any[]>,
30 options: RAGOptions
31): Promise<RAGResponse> {
32 const {
33 tenantId,
34 topK = 5,
35 model = 'gpt-4o-mini',
36 systemPrompt = 'Answer based on the provided context. Cite sources using [Source N]. If unsure, say so.',
37 } = options;
38 
39 const embedding = await embedSingle(question);
40 const chunks = await searchFn(embedding, tenantId, topK);
41 
42 const context = chunks
43 .map(
44 (c, i) => `[Source ${i + 1}]\n${c.content ?? c.text}`
45 )
46 .join('\n\n---\n\n');
47 
48 const completion = await openai.chat.completions.create({
49 model,
50 messages: [
51 { role: 'system', content: systemPrompt },
52 {
53 role: 'user',
54 content: `Context:\n${context}\n\nQuestion: ${question}`,
55 },
56 ],
57 temperature: 0.1,
58 });
59 
60 return {
61 answer: completion.choices[0].message.content ?? '',
62 sources: chunks.map((c) => ({
63 content: (c.content ?? c.text).slice(0, 200),
64 score: c.score ?? c.similarity,
65 docId: c.docId ?? c.doc_id,
66 })),
67 usage: {
68 promptTokens: completion.usage?.prompt_tokens ?? 0,
69 completionTokens: completion.usage?.completion_tokens ?? 0,
70 },
71 };
72}
73 
74export async function* ragQueryStream(
75 question: string,
76 searchFn: (embedding: number[], tenantId: string, topK: number) => Promise<any[]>,
77 options: RAGOptions
78): AsyncGenerator<string> {
79 const { tenantId, topK = 5, model = 'gpt-4o-mini', systemPrompt } = options;
80 
81 const embedding = await embedSingle(question);
82 const chunks = await searchFn(embedding, tenantId, topK);
83 
84 const context = chunks
85 .map((c, i) => `[Source ${i + 1}]\n${c.content ?? c.text}`)
86 .join('\n\n---\n\n');
87 
88 const stream = await openai.chat.completions.create({
89 model,
90 messages: [
91 {
92 role: 'system',
93 content:
94 systemPrompt ??
95 'Answer based on the provided context. Cite sources.',
96 },
97 {
98 role: 'user',
99 content: `Context:\n${context}\n\nQuestion: ${question}`,
100 },
101 ],
102 temperature: 0.1,
103 stream: true,
104 });
105 
106 for await (const chunk of stream) {
107 const content = chunk.choices[0]?.delta?.content;
108 if (content) {
109 yield content;
110 }
111 }
112}
113 

Next.js API Routes

Serve search and RAG through Next.js:

typescript
1// app/api/search/route.ts
2import { NextRequest, NextResponse } from 'next/server';
3import { embedSingle } from '@/lib/embeddings';
4import { search } from '@/lib/vectordb/pinecone';
5 
6export async function POST(request: NextRequest) {
7 const { query, tenantId, topK = 10, docType } =
8 await request.json();
9 
10 if (!query || !tenantId) {
11 return NextResponse.json(
12 { error: 'query and tenantId are required' },
13 { status: 400 }
14 );
15 }
16 
17 const embedding = await embedSingle(query);
18 
19 const filter: Record<string, unknown> = {
20 tenantId: { $eq: tenantId },
21 };
22 if (docType) {
23 filter.docType = { $eq: docType };
24 }
25 
26 const results = await search({
27 indexName: 'documents',
28 namespace: tenantId,
29 vector: embedding,
30 topK,
31 filter,
32 });
33 
34 return NextResponse.json({ results });
35}
36 
typescript
1// app/api/rag/route.ts
2import { NextRequest } from 'next/server';
3import { ragQueryStream } from '@/lib/rag';
4import { search } from '@/lib/vectordb/pinecone';
5 
6export async function POST(request: NextRequest) {
7 const { question, tenantId, stream = false } =
8 await request.json();
9 
10 if (!question || !tenantId) {
11 return Response.json(
12 { error: 'question and tenantId are required' },
13 { status: 400 }
14 );
15 }
16 
17 const searchFn = async (
18 embedding: number[],
19 tid: string,
20 topK: number
21 ) => {
22 return search({
23 indexName: 'documents',
24 namespace: tid,
25 vector: embedding,
26 topK,
27 });
28 };
29 
30 if (stream) {
31 const encoder = new TextEncoder();
32 const readable = new ReadableStream({
33 async start(controller) {
34 for await (const token of ragQueryStream(
35 question,
36 searchFn,
37 { tenantId }
38 )) {
39 controller.enqueue(
40 encoder.encode(`data: ${JSON.stringify({ token })}\n\n`)
41 );
42 }
43 controller.enqueue(encoder.encode('data: [DONE]\n\n'));
44 controller.close();
45 },
46 });
47 
48 return new Response(readable, {
49 headers: {
50 'Content-Type': 'text/event-stream',
51 'Cache-Control': 'no-cache',
52 Connection: 'keep-alive',
53 },
54 });
55 }
56 
57 const { ragQuery: ragQueryFn } = await import('@/lib/rag');
58 const result = await ragQueryFn(question, searchFn, { tenantId });
59 return Response.json(result);
60}
61 

Document Ingestion Pipeline

typescript
1// lib/ingest.ts
2import { randomUUID } from 'crypto';
3import { embed } from './embeddings';
4import { chunkDocument } from './chunking';
5import { upsertVectors } from './vectordb/pinecone';
6 
7interface IngestParams {
8 docId: string;
9 title: string;
10 content: string;
11 tenantId: string;
12 docType?: string;
13}
14 
15export async function ingestDocument({
16 docId,
17 title,
18 content,
19 tenantId,
20 docType = 'document',
21}: IngestParams): Promise<{ chunks: number }> {
22 // Step 1: Chunk the document
23 const chunks = chunkDocument(content, {
24 docId,
25 title,
26 tenantId,
27 docType,
28 });
29 
30 // Step 2: Generate embeddings for all chunks
31 const texts = chunks.map((c) => c.text);
32 const embeddings = await embed(texts);
33 
34 // Step 3: Upsert to vector database
35 const vectors = chunks.map((chunk, i) => ({
36 id: `${docId}_chunk_${chunk.index}`,
37 values: embeddings[i],
38 metadata: {
39 text: chunk.text,
40 docId,
41 title,
42 tenantId,
43 docType,
44 chunkIndex: chunk.index,
45 },
46 }));
47 
48 await upsertVectors({
49 indexName: 'documents',
50 namespace: tenantId,
51 vectors,
52 });
53 
54 return { chunks: chunks.length };
55}
56 
57export async function deleteDocument(
58 docId: string,
59 tenantId: string
60): Promise<void> {
61 const { deleteByDocId } = await import('./vectordb/pinecone');
62 await deleteByDocId('documents', tenantId, docId);
63}
64 
typescript
1// hooks/use-search.ts
2'use client';
3 
4import { useState, useCallback, useRef } from 'react';
5 
6interface SearchResult {
7 id: string;
8 score: number;
9 metadata: Record<string, unknown>;
10}
11 
12interface UseSearchOptions {
13 debounceMs?: number;
14}
15 
16export function useSearch(options: UseSearchOptions = {}) {
17 const { debounceMs = 300 } = options;
18 const [results, setResults] = useState<SearchResult[]>([]);
19 const [loading, setLoading] = useState(false);
20 const [error, setError] = useState<string | null>(null);
21 const debounceRef = useRef<NodeJS.Timeout>();
22 
23 const search = useCallback(
24 (query: string, tenantId: string) => {
25 if (debounceRef.current) {
26 clearTimeout(debounceRef.current);
27 }
28 
29 if (!query.trim()) {
30 setResults([]);
31 return;
32 }
33 
34 debounceRef.current = setTimeout(async () => {
35 setLoading(true);
36 setError(null);
37 
38 try {
39 const res = await fetch('/api/search', {
40 method: 'POST',
41 headers: { 'Content-Type': 'application/json' },
42 body: JSON.stringify({ query, tenantId }),
43 });
44 
45 if (!res.ok) throw new Error('Search failed');
46 
47 const data = await res.json();
48 setResults(data.results);
49 } catch (err) {
50 setError(
51 err instanceof Error ? err.message : 'Search failed'
52 );
53 } finally {
54 setLoading(false);
55 }
56 }, debounceMs);
57 },
58 [debounceMs]
59 );
60 
61 return { results, loading, error, search };
62}
63 
typescript
1// __tests__/search.test.ts
2import { embed, embedSingle } from '@/lib/embeddings';
3import { chunkDocument } from '@/lib/chunking';
4 
5describe('Chunking', () => {
6 it('splits text at sentence boundaries', () => {
7 const text =
8 'First sentence. Second sentence. Third sentence. Fourth sentence.';
9 const chunks = chunkDocument(text, {}, { maxTokens: 10 });
10 
11 expect(chunks.length).toBeGreaterThan(1);
12 chunks.forEach((chunk) => {
13 expect(chunk.text).not.toMatch(/^\s/); // No leading whitespace
14 expect(chunk.text).toMatch(/\.$/); // Ends with period
15 });
16 });
17 
18 it('preserves overlap between chunks', () => {
19 const sentences = Array.from(
20 { length: 20 },
21 (_, i) => `Sentence number ${i + 1}.`
22 );
23 const text = sentences.join(' ');
24 const chunks = chunkDocument(
25 text,
26 {},
27 { maxTokens: 30, overlapTokens: 10 }
28 );
29 
30 // Adjacent chunks should share some content
31 for (let i = 1; i < chunks.length; i++) {
32 const prevWords = new Set(chunks[i - 1].text.split(' '));
33 const currWords = chunks[i].text.split(' ');
34 const overlap = currWords.filter((w) => prevWords.has(w));
35 expect(overlap.length).toBeGreaterThan(0);
36 }
37 });
38});
39 
40describe('Embeddings', () => {
41 it('returns consistent dimensions', async () => {
42 const embedding = await embedSingle('test query');
43 expect(embedding.length).toBe(1536);
44 });
45 
46 it('similar texts have higher cosine similarity', async () => {
47 const [a, b, c] = await embed([
48 'How to deploy a Next.js application',
49 'Deploying Next.js apps to production',
50 'Best recipe for chocolate cake',
51 ]);
52 
53 const simAB = cosineSimilarity(a, b);
54 const simAC = cosineSimilarity(a, c);
55 
56 expect(simAB).toBeGreaterThan(simAC);
57 });
58});
59 
60function cosineSimilarity(a: number[], b: number[]): number {
61 let dot = 0, normA = 0, normB = 0;
62 for (let i = 0; i < a.length; i++) {
63 dot += a[i] * b[i];
64 normA += a[i] * a[i];
65 normB += b[i] * b[i];
66 }
67 return dot / (Math.sqrt(normA) * Math.sqrt(normB));
68}
69 

FAQ

Need expert help?

Building with agentic AI?

I help teams ship production-grade systems. From architecture review to hands-on builds.

Muneer Puthiya Purayil

SaaS Architect & AI Systems Engineer. 10+ years shipping production infrastructure across fintech, automotive, e-commerce, and healthcare.

Engage

Start a
Conversation.

For teams building at scale: SaaS platforms, agentic AI systems, and enterprise mobile infrastructure. Scope and fit are evaluated before any engagement begins.

Limited availability · Q3 / Q4 2026