mirror of
https://github.com/kbenestad/mdcms.git
synced 2026-06-18 15:24:32 +00:00
89 lines
2 KiB
Markdown
89 lines
2 KiB
Markdown
---
|
|
title: Aggregations
|
|
sort: 130
|
|
section-id: query-language
|
|
keywords: aggregations, GROUP BY, COUNT, SUM, vectors, AVG, centroid, analytics
|
|
description: Aggregating data in NQL including GROUP BY, COUNT, SUM, and vector-specific aggregation functions
|
|
language: en
|
|
---
|
|
|
|
# Aggregations
|
|
|
|
NQL supports the full SQL aggregation toolkit, extended with vector-specific aggregate functions.
|
|
|
|
## Standard Aggregations
|
|
|
|
```sql
|
|
SELECT category, COUNT(*) AS doc_count
|
|
FROM documents
|
|
GROUP BY category
|
|
ORDER BY doc_count DESC;
|
|
|
|
SELECT category, AVG(price), MIN(price), MAX(price)
|
|
FROM products
|
|
WHERE available = true
|
|
GROUP BY category;
|
|
```
|
|
|
|
## Vector Aggregations
|
|
|
|
### `AVG(embedding)` — Centroid
|
|
|
|
```sql
|
|
SELECT AVG(embedding) AS centroid
|
|
FROM documents
|
|
WHERE category = 'technology';
|
|
```
|
|
|
|
Find documents closest to the centroid:
|
|
|
|
```sql
|
|
WITH centroid AS (
|
|
SELECT AVG(embedding) AS c FROM documents WHERE category = 'technology'
|
|
)
|
|
SELECT id, title, 1 - (embedding <=> centroid.c) AS similarity
|
|
FROM documents, centroid
|
|
WHERE category = 'technology'
|
|
ORDER BY embedding <=> centroid.c
|
|
LIMIT 10;
|
|
```
|
|
|
|
### `vector_centroid(embedding, weight)`
|
|
|
|
```sql
|
|
SELECT vector_centroid(embedding, rating) AS weighted_centroid
|
|
FROM products WHERE category = 'electronics';
|
|
```
|
|
|
|
## GROUP BY with Vector Search
|
|
|
|
```sql
|
|
SELECT DISTINCT ON (category)
|
|
id, category, title, 1 - (embedding <=> $1) AS similarity
|
|
FROM documents
|
|
ORDER BY category, embedding <=> $1;
|
|
```
|
|
|
|
## Window Functions
|
|
|
|
```sql
|
|
SELECT id, title, category,
|
|
1 - (embedding <=> $1) AS similarity,
|
|
RANK() OVER (PARTITION BY category ORDER BY embedding <=> $1) AS rank_in_category
|
|
FROM documents
|
|
WHERE 1 - (embedding <=> $1) > 0.5
|
|
ORDER BY category, rank_in_category;
|
|
```
|
|
|
|
## Time-Series Semantic Analytics
|
|
|
|
```sql
|
|
WITH weekly_centroids AS (
|
|
SELECT date_trunc('week', created_at) AS week, AVG(embedding) AS centroid
|
|
FROM documents GROUP BY week
|
|
)
|
|
SELECT w1.week, 1 - (w1.centroid <=> w2.centroid) AS similarity_to_prev_week
|
|
FROM weekly_centroids w1
|
|
LEFT JOIN weekly_centroids w2 ON w2.week = w1.week - INTERVAL '1 week'
|
|
ORDER BY w1.week;
|
|
```
|