Back to Journal
AI Architecture

Complete Guide to Vector Database Architecture with Rust

A comprehensive guide to implementing Vector Database Architecture using Rust, covering architecture, code examples, and production-ready patterns.

Muneer Puthiya Purayil 20 min read

Rust is the language of choice for building the core of vector databases — Qdrant, parts of Milvus, and Lance are all written in Rust. The language's zero-cost abstractions, memory safety guarantees, and native SIMD support make it ideal for the compute-intensive, latency-sensitive work that vector search demands. This guide covers building production vector search infrastructure in Rust, from HNSW implementation through serving and persistence.

Core Distance Functions with SIMD

Distance computation is the most performance-critical code path. Every query computes thousands to millions of distance calculations. Rust's explicit SIMD support through std::arch gives you direct hardware access:

rust
1use std::arch::x86_64::*;
2 
3/// Cosine similarity using AVX2 instructions.
4/// Returns a value between -1.0 and 1.0.
5#[target_feature(enable = "avx2", enable = "fma")]
6pub unsafe fn cosine_similarity_avx2(a: &[f32], b: &[f32]) -> f32 {
7 debug_assert_eq!(a.len(), b.len());
8 let n = a.len();
9 
10 let mut dot = _mm256_setzero_ps();
11 let mut norm_a = _mm256_setzero_ps();
12 let mut norm_b = _mm256_setzero_ps();
13 
14 let chunks = n / 8;
15 for i in 0..chunks {
16 let offset = i * 8;
17 let va = _mm256_loadu_ps(a.as_ptr().add(offset));
18 let vb = _mm256_loadu_ps(b.as_ptr().add(offset));
19 
20 dot = _mm256_fmadd_ps(va, vb, dot);
21 norm_a = _mm256_fmadd_ps(va, va, norm_a);
22 norm_b = _mm256_fmadd_ps(vb, vb, norm_b);
23 }
24 
25 let mut dot_sum = horizontal_sum_avx2(dot);
26 let mut norm_a_sum = horizontal_sum_avx2(norm_a);
27 let mut norm_b_sum = horizontal_sum_avx2(norm_b);
28 
29 // Handle remainder
30 for i in (chunks * 8)..n {
31 dot_sum += a[i] * b[i];
32 norm_a_sum += a[i] * a[i];
33 norm_b_sum += b[i] * b[i];
34 }
35 
36 let denom = (norm_a_sum * norm_b_sum).sqrt();
37 if denom == 0.0 {
38 return 0.0;
39 }
40 dot_sum / denom
41}
42 
43#[target_feature(enable = "avx2")]
44unsafe fn horizontal_sum_avx2(v: __m256) -> f32 {
45 let hi128 = _mm256_extractf128_ps(v, 1);
46 let lo128 = _mm256_castps256_ps128(v);
47 let sum128 = _mm_add_ps(hi128, lo128);
48 let hi64 = _mm_movehl_ps(sum128, sum128);
49 let sum64 = _mm_add_ps(sum128, hi64);
50 let hi32 = _mm_shuffle_ps(sum64, sum64, 0x1);
51 let sum32 = _mm_add_ss(sum64, hi32);
52 _mm_cvtss_f32(sum32)
53}
54 
55/// Runtime dispatch based on CPU features
56pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
57 if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
58 unsafe { cosine_similarity_avx2(a, b) }
59 } else {
60 cosine_similarity_scalar(a, b)
61 }
62}
63 
64fn cosine_similarity_scalar(a: &[f32], b: &[f32]) -> f32 {
65 let mut dot = 0.0f32;
66 let mut norm_a = 0.0f32;
67 let mut norm_b = 0.0f32;
68 for i in 0..a.len() {
69 dot += a[i] * b[i];
70 norm_a += a[i] * a[i];
71 norm_b += b[i] * b[i];
72 }
73 let denom = (norm_a * norm_b).sqrt();
74 if denom == 0.0 { 0.0 } else { dot / denom }
75}
76 

HNSW Index Implementation

A production HNSW implementation in Rust with proper concurrency:

rust
1use parking_lot::RwLock;
2use rand::Rng;
3use std::collections::{BinaryHeap, HashSet};
4use std::cmp::Reverse;
5 
6#[derive(Clone)]
7struct HnswNode {
8 vector: Vec<f32>,
9 neighbors: Vec<Vec<u32>>, // neighbors per level
10}
11 
12pub struct HnswIndex {
13 nodes: RwLock<Vec<HnswNode>>,
14 entry_point: RwLock<Option<u32>>,
15 max_level: RwLock<usize>,
16 m: usize,
17 m_max0: usize,
18 ef_construction: usize,
19 ml: f64,
20 dist_fn: fn(&[f32], &[f32]) -> f32,
21}
22 
23#[derive(Clone, PartialEq)]
24struct Candidate {
25 id: u32,
26 distance: f32,
27}
28 
29impl Eq for Candidate {}
30 
31impl PartialOrd for Candidate {
32 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
33 self.distance.partial_cmp(&other.distance)
34 }
35}
36 
37impl Ord for Candidate {
38 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
39 self.distance.partial_cmp(&other.distance).unwrap()
40 }
41}
42 
43impl HnswIndex {
44 pub fn new(m: usize, ef_construction: usize) -> Self {
45 Self {
46 nodes: RwLock::new(Vec::new()),
47 entry_point: RwLock::new(None),
48 max_level: RwLock::new(0),
49 m,
50 m_max0: m * 2,
51 ef_construction,
52 ml: 1.0 / (m as f64).ln(),
53 dist_fn: |a, b| 1.0 - cosine_similarity(a, b),
54 }
55 }
56 
57 fn random_level(&self) -> usize {
58 let mut rng = rand::thread_rng();
59 let r: f64 = rng.gen();
60 (-r.ln() * self.ml) as usize
61 }
62 
63 pub fn insert(&self, vector: Vec<f32>) -> u32 {
64 let level = self.random_level();
65 let mut node = HnswNode {
66 vector,
67 neighbors: vec![Vec::new(); level + 1],
68 };
69 
70 let mut nodes = self.nodes.write();
71 let id = nodes.len() as u32;
72 nodes.push(node);
73 
74 let entry_point = *self.entry_point.read();
75 if entry_point.is_none() {
76 *self.entry_point.write() = Some(id);
77 *self.max_level.write() = level;
78 return id;
79 }
80 
81 let ep = entry_point.unwrap();
82 let current_max_level = *self.max_level.read();
83 
84 // Greedy search from top to level+1
85 let mut current = ep;
86 let query = &nodes[id as usize].vector;
87 
88 for l in (level + 1..=current_max_level).rev() {
89 loop {
90 let mut changed = false;
91 let neighbors = &nodes[current as usize].neighbors;
92 if l < neighbors.len() {
93 for &neighbor in &neighbors[l] {
94 let dist = (self.dist_fn)(
95 query,
96 &nodes[neighbor as usize].vector,
97 );
98 let current_dist = (self.dist_fn)(
99 query,
100 &nodes[current as usize].vector,
101 );
102 if dist < current_dist {
103 current = neighbor;
104 changed = true;
105 }
106 }
107 }
108 if !changed { break; }
109 }
110 }
111 
112 // Insert at each level
113 let mut ep_list = vec![current];
114 let insert_level = level.min(current_max_level);
115 
116 for l in (0..=insert_level).rev() {
117 let candidates = self.search_layer(
118 &nodes, query, &ep_list, self.ef_construction, l,
119 );
120 
121 let max_conn = if l == 0 { self.m_max0 } else { self.m };
122 let selected: Vec<u32> = candidates
123 .iter()
124 .take(max_conn)
125 .map(|c| c.id)
126 .collect();
127 
128 // Set forward connections
129 nodes[id as usize].neighbors[l] = selected.clone();
130 
131 // Set reverse connections
132 for &neighbor_id in &selected {
133 let neighbor = &mut nodes[neighbor_id as usize];
134 if l < neighbor.neighbors.len() {
135 neighbor.neighbors[l].push(id);
136 if neighbor.neighbors[l].len() > max_conn {
137 // Shrink: keep closest
138 let nv = neighbor.vector.clone();
139 let mut scored: Vec<_> = neighbor.neighbors[l]
140 .iter()
141 .map(|&nid| Candidate {
142 id: nid,
143 distance: (self.dist_fn)(
144 &nv,
145 &nodes[nid as usize].vector,
146 ),
147 })
148 .collect();
149 scored.sort();
150 neighbor.neighbors[l] = scored
151 .into_iter()
152 .take(max_conn)
153 .map(|c| c.id)
154 .collect();
155 }
156 }
157 }
158 
159 ep_list = selected;
160 }
161 
162 if level > current_max_level {
163 *self.max_level.write() = level;
164 *self.entry_point.write() = Some(id);
165 }
166 
167 id
168 }
169 
170 fn search_layer(
171 &self,
172 nodes: &[HnswNode],
173 query: &[f32],
174 entry_points: &[u32],
175 ef: usize,
176 level: usize,
177 ) -> Vec<Candidate> {
178 let mut visited = HashSet::new();
179 let mut candidates = BinaryHeap::new(); // min-heap
180 let mut results = BinaryHeap::new(); // max-heap for top-ef
181 
182 for &ep in entry_points {
183 let dist = (self.dist_fn)(query, &nodes[ep as usize].vector);
184 visited.insert(ep);
185 candidates.push(Reverse(Candidate { id: ep, distance: dist }));
186 results.push(Candidate { id: ep, distance: dist });
187 }
188 
189 while let Some(Reverse(nearest)) = candidates.pop() {
190 let farthest_dist = results.peek().map(|c| c.distance).unwrap_or(f32::MAX);
191 if nearest.distance > farthest_dist {
192 break;
193 }
194 
195 let neighbors = &nodes[nearest.id as usize].neighbors;
196 if level >= neighbors.len() { continue; }
197 
198 for &neighbor_id in &neighbors[level] {
199 if visited.contains(&neighbor_id) { continue; }
200 visited.insert(neighbor_id);
201 
202 let dist = (self.dist_fn)(
203 query,
204 &nodes[neighbor_id as usize].vector,
205 );
206 
207 let farthest_dist = results.peek().map(|c| c.distance).unwrap_or(f32::MAX);
208 
209 if results.len() < ef || dist < farthest_dist {
210 candidates.push(Reverse(Candidate {
211 id: neighbor_id,
212 distance: dist,
213 }));
214 results.push(Candidate {
215 id: neighbor_id,
216 distance: dist,
217 });
218 if results.len() > ef {
219 results.pop();
220 }
221 }
222 }
223 }
224 
225 let mut result_vec: Vec<_> = results.into_vec();
226 result_vec.sort();
227 result_vec
228 }
229 
230 pub fn search(&self, query: &[f32], top_k: usize, ef_search: usize) -> Vec<(u32, f32)> {
231 let nodes = self.nodes.read();
232 let entry_point = *self.entry_point.read();
233 
234 let ep = match entry_point {
235 Some(ep) => ep,
236 None => return Vec::new(),
237 };
238 
239 let max_level = *self.max_level.read();
240 let mut current = ep;
241 
242 // Greedy descent from top level to level 1
243 for l in (1..=max_level).rev() {
244 loop {
245 let mut changed = false;
246 let neighbors = &nodes[current as usize].neighbors;
247 if l < neighbors.len() {
248 for &neighbor in &neighbors[l] {
249 let dist = (self.dist_fn)(
250 query,
251 &nodes[neighbor as usize].vector,
252 );
253 let cur_dist = (self.dist_fn)(
254 query,
255 &nodes[current as usize].vector,
256 );
257 if dist < cur_dist {
258 current = neighbor;
259 changed = true;
260 }
261 }
262 }
263 if !changed { break; }
264 }
265 }
266 
267 // Search layer 0
268 let results = self.search_layer(
269 &nodes, query, &[current], ef_search, 0,
270 );
271 
272 results
273 .into_iter()
274 .take(top_k)
275 .map(|c| (c.id, 1.0 - c.distance)) // Convert distance to similarity
276 .collect()
277 }
278}
279 

Memory-Mapped Vector Storage

For datasets larger than RAM, use memory-mapped files:

rust
1use memmap2::{Mmap, MmapMut, MmapOptions};
2use std::fs::{File, OpenOptions};
3use std::io::Write;
4use std::path::Path;
5 
6pub struct MmapVectorStore {
7 mmap: Mmap,
8 dimensions: usize,
9 count: usize,
10}
11 
12pub struct MmapVectorStoreBuilder {
13 file: File,
14 mmap: MmapMut,
15 dimensions: usize,
16 count: usize,
17 capacity: usize,
18}
19 
20impl MmapVectorStoreBuilder {
21 pub fn new(path: &Path, dimensions: usize, capacity: usize) -> std::io::Result<Self> {
22 let byte_size = capacity * dimensions * std::mem::size_of::<f32>();
23 
24 let file = OpenOptions::new()
25 .read(true)
26 .write(true)
27 .create(true)
28 .open(path)?;
29 
30 file.set_len(byte_size as u64)?;
31 
32 let mmap = unsafe { MmapMut::map_mut(&file)? };
33 
34 Ok(Self {
35 file,
36 mmap,
37 dimensions,
38 count: 0,
39 capacity,
40 })
41 }
42 
43 pub fn add_vector(&mut self, vector: &[f32]) -> u32 {
44 assert_eq!(vector.len(), self.dimensions);
45 assert!(self.count < self.capacity);
46 
47 let offset = self.count * self.dimensions * 4;
48 let bytes: &[u8] = unsafe {
49 std::slice::from_raw_parts(
50 vector.as_ptr() as *const u8,
51 vector.len() * 4,
52 )
53 };
54 self.mmap[offset..offset + bytes.len()].copy_from_slice(bytes);
55 
56 let id = self.count as u32;
57 self.count += 1;
58 id
59 }
60 
61 pub fn finish(self) -> std::io::Result<MmapVectorStore> {
62 self.mmap.flush()?;
63 let mmap = unsafe { Mmap::map(&self.file)? };
64 Ok(MmapVectorStore {
65 mmap,
66 dimensions: self.dimensions,
67 count: self.count,
68 })
69 }
70}
71 
72impl MmapVectorStore {
73 pub fn open(path: &Path, dimensions: usize) -> std::io::Result<Self> {
74 let file = File::open(path)?;
75 let mmap = unsafe { Mmap::map(&file)? };
76 let count = mmap.len() / (dimensions * 4);
77 
78 Ok(Self {
79 mmap,
80 dimensions,
81 count,
82 })
83 }
84 
85 /// Zero-copy vector access
86 pub fn get_vector(&self, id: u32) -> &[f32] {
87 let offset = id as usize * self.dimensions;
88 let byte_offset = offset * 4;
89 let byte_end = byte_offset + self.dimensions * 4;
90 unsafe {
91 std::slice::from_raw_parts(
92 self.mmap[byte_offset..byte_end].as_ptr() as *const f32,
93 self.dimensions,
94 )
95 }
96 }
97 
98 pub fn len(&self) -> usize {
99 self.count
100 }
101}
102 

Need a second opinion on your AI systems architecture?

I run free 30-minute strategy calls for engineering teams tackling this exact problem.

Book a Free Call

Scalar Quantization

Reduce memory usage by 4x while maintaining >95% recall:

rust
1pub struct ScalarQuantizer {
2 min_vals: Vec<f32>,
3 max_vals: Vec<f32>,
4 dimensions: usize,
5}
6 
7impl ScalarQuantizer {
8 pub fn fit(vectors: &[&[f32]], dimensions: usize) -> Self {
9 let mut min_vals = vec![f32::MAX; dimensions];
10 let mut max_vals = vec![f32::MIN; dimensions];
11 
12 for vec in vectors {
13 for (i, &val) in vec.iter().enumerate() {
14 min_vals[i] = min_vals[i].min(val);
15 max_vals[i] = max_vals[i].max(val);
16 }
17 }
18 
19 Self { min_vals, max_vals, dimensions }
20 }
21 
22 pub fn quantize(&self, vector: &[f32]) -> Vec<u8> {
23 vector
24 .iter()
25 .enumerate()
26 .map(|(i, &val)| {
27 let range = self.max_vals[i] - self.min_vals[i];
28 if range == 0.0 {
29 128u8
30 } else {
31 let normalized = (val - self.min_vals[i]) / range;
32 (normalized * 255.0).clamp(0.0, 255.0) as u8
33 }
34 })
35 .collect()
36 }
37 
38 pub fn quantized_cosine_distance(a: &[u8], b: &[u8]) -> f32 {
39 let mut dot: i32 = 0;
40 let mut norm_a: i32 = 0;
41 let mut norm_b: i32 = 0;
42 
43 for i in 0..a.len() {
44 let ai = a[i] as i32 - 128;
45 let bi = b[i] as i32 - 128;
46 dot += ai * bi;
47 norm_a += ai * ai;
48 norm_b += bi * bi;
49 }
50 
51 let denom = ((norm_a as f64) * (norm_b as f64)).sqrt();
52 if denom == 0.0 {
53 1.0
54 } else {
55 1.0 - (dot as f64 / denom) as f32
56 }
57 }
58}
59 

gRPC Service with Tonic

Expose your index as a gRPC service:

rust
1use tonic::{transport::Server, Request, Response, Status};
2use std::sync::Arc;
3use std::time::Instant;
4 
5pub mod vectordb {
6 tonic::include_proto!("vectordb");
7}
8 
9use vectordb::vector_search_server::{VectorSearch, VectorSearchServer};
10use vectordb::*;
11 
12pub struct VectorSearchService {
13 index: Arc<HnswIndex>,
14 metadata: Arc<parking_lot::RwLock<std::collections::HashMap<u32, std::collections::HashMap<String, String>>>>,
15}
16 
17#[tonic::async_trait]
18impl VectorSearch for VectorSearchService {
19 async fn search(
20 &self,
21 request: Request<SearchRequest>,
22 ) -> Result<Response<SearchResponse>, Status> {
23 let start = Instant::now();
24 let req = request.into_inner();
25 
26 let query: Vec<f32> = req.vector;
27 let top_k = req.top_k as usize;
28 let ef_search = if req.ef_search > 0 {
29 req.ef_search as usize
30 } else {
31 100
32 };
33 
34 let raw_results = self.index.search(&query, top_k, ef_search);
35 
36 let metadata = self.metadata.read();
37 let results: Vec<SearchResultProto> = raw_results
38 .into_iter()
39 .filter(|(id, _)| {
40 // Apply metadata filters
41 if req.filter.is_empty() {
42 return true;
43 }
44 if let Some(meta) = metadata.get(id) {
45 req.filter.iter().all(|(k, v)| {
46 meta.get(k).map_or(false, |mv| mv == v)
47 })
48 } else {
49 false
50 }
51 })
52 .map(|(id, score)| SearchResultProto {
53 id: id.to_string(),
54 score,
55 metadata: metadata
56 .get(&id)
57 .cloned()
58 .unwrap_or_default(),
59 })
60 .collect();
61 
62 Ok(Response::new(SearchResponse {
63 results,
64 latency_us: start.elapsed().as_micros() as i64,
65 }))
66 }
67 
68 async fn upsert(
69 &self,
70 request: Request<UpsertRequest>,
71 ) -> Result<Response<UpsertResponse>, Status> {
72 let req = request.into_inner();
73 let mut ids = Vec::new();
74 
75 for point in req.points {
76 let id = self.index.insert(point.vector);
77 self.metadata
78 .write()
79 .insert(id, point.metadata);
80 ids.push(id.to_string());
81 }
82 
83 Ok(Response::new(UpsertResponse {
84 ids,
85 status: "ok".to_string(),
86 }))
87 }
88}
89 
90#[tokio::main]
91async fn main() -> Result<(), Box<dyn std::error::Error>> {
92 let index = Arc::new(HnswIndex::new(16, 200));
93 let metadata = Arc::new(parking_lot::RwLock::new(
94 std::collections::HashMap::new(),
95 ));
96 
97 let service = VectorSearchService {
98 index: index.clone(),
99 metadata: metadata.clone(),
100 };
101 
102 let addr = "0.0.0.0:50051".parse()?;
103 println!("VectorSearch server listening on {}", addr);
104 
105 Server::builder()
106 .add_service(VectorSearchServer::new(service))
107 .serve(addr)
108 .await?;
109 
110 Ok(())
111}
112 

Write-Ahead Log for Durability

rust
1use std::fs::{File, OpenOptions};
2use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
3use std::path::Path;
4use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
5use crc32fast::Hasher;
6 
7pub enum WalEntry {
8 Upsert {
9 id: u32,
10 vector: Vec<f32>,
11 metadata: Vec<u8>,
12 },
13 Delete {
14 id: u32,
15 },
16}
17 
18pub struct WriteAheadLog {
19 writer: BufWriter<File>,
20 path: std::path::PathBuf,
21}
22 
23impl WriteAheadLog {
24 pub fn open(path: &Path) -> io::Result<Self> {
25 let file = OpenOptions::new()
26 .create(true)
27 .append(true)
28 .open(path)?;
29 
30 Ok(Self {
31 writer: BufWriter::new(file),
32 path: path.to_path_buf(),
33 })
34 }
35 
36 pub fn append(&mut self, entry: &WalEntry) -> io::Result<()> {
37 let data = Self::encode(entry);
38 
39 let mut hasher = Hasher::new();
40 hasher.update(&data);
41 let checksum = hasher.finalize();
42 
43 self.writer.write_u32::<LittleEndian>(data.len() as u32)?;
44 self.writer.write_u32::<LittleEndian>(checksum)?;
45 self.writer.write_all(&data)?;
46 self.writer.flush()?;
47 
48 Ok(())
49 }
50 
51 pub fn replay(path: &Path) -> io::Result<Vec<WalEntry>> {
52 let mut file = File::open(path)?;
53 let mut entries = Vec::new();
54 
55 loop {
56 let size = match file.read_u32::<LittleEndian>() {
57 Ok(s) => s as usize,
58 Err(ref e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
59 Err(e) => return Err(e),
60 };
61 
62 let expected_checksum = file.read_u32::<LittleEndian>()?;
63 let mut data = vec![0u8; size];
64 file.read_exact(&mut data)?;
65 
66 let mut hasher = Hasher::new();
67 hasher.update(&data);
68 let actual_checksum = hasher.finalize();
69 
70 if actual_checksum != expected_checksum {
71 eprintln!("WAL corruption detected, stopping replay");
72 break;
73 }
74 
75 entries.push(Self::decode(&data));
76 }
77 
78 Ok(entries)
79 }
80 
81 fn encode(entry: &WalEntry) -> Vec<u8> {
82 let mut buf = Vec::new();
83 match entry {
84 WalEntry::Upsert { id, vector, metadata } => {
85 buf.push(1u8); // type tag
86 buf.extend_from_slice(&id.to_le_bytes());
87 buf.extend_from_slice(&(vector.len() as u32).to_le_bytes());
88 for &v in vector {
89 buf.extend_from_slice(&v.to_le_bytes());
90 }
91 buf.extend_from_slice(&(metadata.len() as u32).to_le_bytes());
92 buf.extend_from_slice(metadata);
93 }
94 WalEntry::Delete { id } => {
95 buf.push(2u8);
96 buf.extend_from_slice(&id.to_le_bytes());
97 }
98 }
99 buf
100 }
101 
102 fn decode(data: &[u8]) -> WalEntry {
103 match data[0] {
104 1 => {
105 let id = u32::from_le_bytes(data[1..5].try_into().unwrap());
106 let vec_len = u32::from_le_bytes(data[5..9].try_into().unwrap()) as usize;
107 let mut vector = Vec::with_capacity(vec_len);
108 for i in 0..vec_len {
109 let offset = 9 + i * 4;
110 vector.push(f32::from_le_bytes(
111 data[offset..offset + 4].try_into().unwrap(),
112 ));
113 }
114 let meta_offset = 9 + vec_len * 4;
115 let meta_len = u32::from_le_bytes(
116 data[meta_offset..meta_offset + 4].try_into().unwrap(),
117 ) as usize;
118 let metadata = data[meta_offset + 4..meta_offset + 4 + meta_len].to_vec();
119 WalEntry::Upsert { id, vector, metadata }
120 }
121 2 => {
122 let id = u32::from_le_bytes(data[1..5].try_into().unwrap());
123 WalEntry::Delete { id }
124 }
125 _ => panic!("Unknown WAL entry type"),
126 }
127 }
128}
129 

Benchmarking Framework

Systematic benchmarking is essential for tuning:

rust
1use std::time::{Duration, Instant};
2use rand::Rng;
3 
4pub struct BenchmarkResult {
5 pub qps: f64,
6 pub latency_p50: Duration,
7 pub latency_p95: Duration,
8 pub latency_p99: Duration,
9 pub recall_at_k: f64,
10}
11 
12pub fn benchmark_search(
13 index: &HnswIndex,
14 queries: &[Vec<f32>],
15 ground_truth: &[Vec<u32>], // exact nearest neighbors
16 top_k: usize,
17 ef_search: usize,
18) -> BenchmarkResult {
19 let mut latencies = Vec::with_capacity(queries.len());
20 let mut recall_sum = 0.0;
21 
22 for (query, truth) in queries.iter().zip(ground_truth.iter()) {
23 let start = Instant::now();
24 let results = index.search(query, top_k, ef_search);
25 latencies.push(start.elapsed());
26 
27 // Calculate recall
28 let result_ids: std::collections::HashSet<_> =
29 results.iter().map(|(id, _)| *id).collect();
30 let truth_set: std::collections::HashSet<_> =
31 truth.iter().take(top_k).copied().collect();
32 let intersection = result_ids.intersection(&truth_set).count();
33 recall_sum += intersection as f64 / top_k as f64;
34 }
35 
36 latencies.sort();
37 let total_time: Duration = latencies.iter().sum();
38 
39 BenchmarkResult {
40 qps: queries.len() as f64 / total_time.as_secs_f64(),
41 latency_p50: latencies[latencies.len() / 2],
42 latency_p95: latencies[latencies.len() * 95 / 100],
43 latency_p99: latencies[latencies.len() * 99 / 100],
44 recall_at_k: recall_sum / queries.len() as f64,
45 }
46}
47 

FAQ

Need expert help?

Building with agentic AI?

I help teams ship production-grade systems. From architecture review to hands-on builds.

Muneer Puthiya Purayil

SaaS Architect & AI Systems Engineer. 10+ years shipping production infrastructure across fintech, automotive, e-commerce, and healthcare.

Engage

Start a
Conversation.

For teams building at scale: SaaS platforms, agentic AI systems, and enterprise mobile infrastructure. Scope and fit are evaluated before any engagement begins.

Limited availability · Q3 / Q4 2026