mirror of
https://github.com/kbenestad/mdcms.git
synced 2026-06-18 15:24:32 +00:00
79 lines
1.6 KiB
Markdown
79 lines
1.6 KiB
Markdown
---
|
|
title: Vector Queries
|
|
sort: 110
|
|
section-id: query-language
|
|
keywords: vector queries, NEAREST, SIMILAR, cosine, dot product, euclidean, ANN
|
|
description: Writing vector similarity queries in NQL — NEAREST, SIMILAR, distance operators, and recall tuning
|
|
language: en
|
|
---
|
|
|
|
# Vector Queries
|
|
|
|
## Distance Operators
|
|
|
|
```sql
|
|
embedding <=> query_vector -- cosine distance
|
|
embedding <-> query_vector -- euclidean (L2)
|
|
embedding <#> query_vector -- negative dot product
|
|
```
|
|
|
|
Always pair `ORDER BY` with `LIMIT` to use the HNSW index:
|
|
|
|
```sql
|
|
SELECT id, content FROM documents
|
|
ORDER BY embedding <=> '[0.1, 0.2, ...]'
|
|
LIMIT 10;
|
|
```
|
|
|
|
## NEAREST Clause
|
|
|
|
```sql
|
|
SELECT id, content, score
|
|
FROM documents
|
|
NEAREST TO embedding = '[0.1, 0.2, ...]' USING COSINE
|
|
TOP 10;
|
|
```
|
|
|
|
## SIMILAR Clause
|
|
|
|
```sql
|
|
SELECT id, content, score
|
|
FROM documents
|
|
SIMILAR TO embedding = $1 USING COSINE THRESHOLD 0.75
|
|
LIMIT 100;
|
|
```
|
|
|
|
## Recall Tuning
|
|
|
|
```sql
|
|
SET hnsw.ef_search = 200; -- higher = better recall, slower
|
|
```
|
|
|
|
| ef_search | Recall@10 | p50 latency | QPS |
|
|
|-----------|-----------|-------------|-----|
|
|
| 20 | 89% | 0.7ms | 12,000 |
|
|
| 40 | 95% | 1.2ms | 8,400 |
|
|
| 80 | 98% | 2.1ms | 4,800 |
|
|
| 200 | 99.5% | 4.8ms | 2,100 |
|
|
|
|
## Exact Search
|
|
|
|
```sql
|
|
SET neuraldb.vector_scan = 'exact';
|
|
SELECT * FROM documents ORDER BY embedding <=> $1 LIMIT 10;
|
|
RESET neuraldb.vector_scan;
|
|
```
|
|
|
|
## Multi-Vector Queries
|
|
|
|
```sql
|
|
WITH queries AS (
|
|
SELECT UNNEST(ARRAY['[...]'::VECTOR(1536), '[...]'::VECTOR(1536)]) AS qv
|
|
),
|
|
ranked AS (
|
|
SELECT d.id, d.content, MIN(d.embedding <=> q.qv) AS best_distance
|
|
FROM documents d, queries q
|
|
GROUP BY d.id, d.content
|
|
)
|
|
SELECT * FROM ranked ORDER BY best_distance LIMIT 20;
|
|
```
|