mirror of
https://github.com/kbenestad/mdcms.git
synced 2026-06-18 15:24:32 +00:00
67 lines
2 KiB
Markdown
67 lines
2 KiB
Markdown
---
|
|
title: Migration
|
|
sort: 130
|
|
section-id: operations
|
|
keywords: migration, import, Postgres, Pinecone, Weaviate, data migration, ETL
|
|
description: Migrating data to NeuralDB from PostgreSQL, Pinecone, Weaviate, and other sources
|
|
language: en
|
|
---
|
|
|
|
# Migration
|
|
|
|
## From PostgreSQL
|
|
|
|
```bash
|
|
pg_dump -h source-host -U source-user -d source-db --format=custom > source-backup.dump
|
|
psql -h neuraldb-host -U neuraldb -c "CREATE DATABASE myapp;"
|
|
pg_restore -h neuraldb-host -U neuraldb -d myapp --jobs=8 --no-owner source-backup.dump
|
|
```
|
|
|
|
Add vector columns post-migration:
|
|
|
|
```sql
|
|
ALTER TABLE documents ADD COLUMN embedding VECTOR(1536);
|
|
CREATE INDEX CONCURRENTLY documents_embedding_idx
|
|
ON documents USING hnsw (embedding vector_cosine_ops);
|
|
```
|
|
|
|
## From PostgreSQL + pgvector
|
|
|
|
```bash
|
|
pg_dump -h source-host -U source-user -d source-db --format=custom \
|
|
--exclude-extension=vector > pgvector-backup.dump
|
|
pg_restore -h neuraldb-host -U neuraldb -d myapp --jobs=8 pgvector-backup.dump
|
|
```
|
|
|
|
## From Pinecone
|
|
|
|
```python
|
|
import pinecone
|
|
from neuraldb import NeuralDB, BulkIngestor
|
|
|
|
pc = pinecone.Pinecone(api_key=os.environ["PINECONE_API_KEY"])
|
|
index = pc.Index("my-index")
|
|
client = NeuralDB(os.environ["NEURALDB_URL"])
|
|
|
|
client.execute("""
|
|
CREATE TABLE IF NOT EXISTS pinecone_migration (
|
|
id TEXT PRIMARY KEY, embedding VECTOR(1536), metadata JSONB,
|
|
migrated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
|
)
|
|
""")
|
|
|
|
ingestor = BulkIngestor(client, table="pinecone_migration", batch_size=500)
|
|
with ingestor as ing:
|
|
for ids_batch in paginate_pinecone_ids(index, batch_size=1000):
|
|
fetch_response = index.fetch(ids=ids_batch)
|
|
for vector_id, vector_data in fetch_response.vectors.items():
|
|
ing.add({"id": vector_id, "embedding": vector_data.values, "metadata": vector_data.metadata or {}})
|
|
```
|
|
|
|
## Verifying Migration
|
|
|
|
```sql
|
|
SELECT COUNT(*) FROM documents;
|
|
SELECT COUNT(*) FROM documents WHERE embedding IS NULL;
|
|
SELECT index_name, hnsw_in_memory, estimated_recall FROM neuraldb_stat_vector_indexes;
|
|
```
|