1
2import OpenAI from 'openai';
3import { embedSingle } from './embeddings';
4
5const openai = new OpenAI();
6
7interface RAGOptions {
8 tenantId: string;
9 topK?: number;
10 model?: string;
11 systemPrompt?: string;
12}
13
14interface RAGResponse {
15 answer: string;
16 sources: {
17 content: string;
18 score: number;
19 docId: string;
20 }[];
21 usage: {
22 promptTokens: number;
23 completionTokens: number;
24 };
25}
26
27export async function ragQuery(
28 question: string,
29 searchFn: (embedding: number[], tenantId: string, topK: number) => Promise<any[]>,
30 options: RAGOptions
31): Promise<RAGResponse> {
32 const {
33 tenantId,
34 topK = 5,
35 model = 'gpt-4o-mini',
36 systemPrompt = 'Answer based on the provided context. Cite sources using [Source N]. If unsure, say so.',
37 } = options;
38
39 const embedding = await embedSingle(question);
40 const chunks = await searchFn(embedding, tenantId, topK);
41
42 const context = chunks
43 .map(
44 (c, i) => `[Source ${i + 1}]\n${c.content ?? c.text}`
45 )
46 .join('\n\n---\n\n');
47
48 const completion = await openai.chat.completions.create({
49 model,
50 messages: [
51 { role: 'system', content: systemPrompt },
52 {
53 role: 'user',
54 content: `Context:\n${context}\n\nQuestion: ${question}`,
55 },
56 ],
57 temperature: 0.1,
58 });
59
60 return {
61 answer: completion.choices[0].message.content ?? '',
62 sources: chunks.map((c) => ({
63 content: (c.content ?? c.text).slice(0, 200),
64 score: c.score ?? c.similarity,
65 docId: c.docId ?? c.doc_id,
66 })),
67 usage: {
68 promptTokens: completion.usage?.prompt_tokens ?? 0,
69 completionTokens: completion.usage?.completion_tokens ?? 0,
70 },
71 };
72}
73
74export async function* ragQueryStream(
75 question: string,
76 searchFn: (embedding: number[], tenantId: string, topK: number) => Promise<any[]>,
77 options: RAGOptions
78): AsyncGenerator<string> {
79 const { tenantId, topK = 5, model = 'gpt-4o-mini', systemPrompt } = options;
80
81 const embedding = await embedSingle(question);
82 const chunks = await searchFn(embedding, tenantId, topK);
83
84 const context = chunks
85 .map((c, i) => `[Source ${i + 1}]\n${c.content ?? c.text}`)
86 .join('\n\n---\n\n');
87
88 const stream = await openai.chat.completions.create({
89 model,
90 messages: [
91 {
92 role: 'system',
93 content:
94 systemPrompt ??
95 'Answer based on the provided context. Cite sources.',
96 },
97 {
98 role: 'user',
99 content: `Context:\n${context}\n\nQuestion: ${question}`,
100 },
101 ],
102 temperature: 0.1,
103 stream: true,
104 });
105
106 for await (const chunk of stream) {
107 const content = chunk.choices[0]?.delta?.content;
108 if (content) {
109 yield content;
110 }
111 }
112}
113