mirror of
https://github.com/kbenestad/mdcms.git
synced 2026-06-18 15:24:32 +00:00
2 KiB
2 KiB
| title | sort | section-id | keywords | description | language |
|---|---|---|---|---|---|
| Migration | 130 | operations | migration, import, Postgres, Pinecone, Weaviate, data migration, ETL | Migrating data to NeuralDB from PostgreSQL, Pinecone, Weaviate, and other sources | en |
Migration
From PostgreSQL
pg_dump -h source-host -U source-user -d source-db --format=custom > source-backup.dump
psql -h neuraldb-host -U neuraldb -c "CREATE DATABASE myapp;"
pg_restore -h neuraldb-host -U neuraldb -d myapp --jobs=8 --no-owner source-backup.dump
Add vector columns post-migration:
ALTER TABLE documents ADD COLUMN embedding VECTOR(1536);
CREATE INDEX CONCURRENTLY documents_embedding_idx
ON documents USING hnsw (embedding vector_cosine_ops);
From PostgreSQL + pgvector
pg_dump -h source-host -U source-user -d source-db --format=custom \
--exclude-extension=vector > pgvector-backup.dump
pg_restore -h neuraldb-host -U neuraldb -d myapp --jobs=8 pgvector-backup.dump
From Pinecone
import pinecone
from neuraldb import NeuralDB, BulkIngestor
pc = pinecone.Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index = pc.Index("my-index")
client = NeuralDB(os.environ["NEURALDB_URL"])
client.execute("""
CREATE TABLE IF NOT EXISTS pinecone_migration (
id TEXT PRIMARY KEY, embedding VECTOR(1536), metadata JSONB,
migrated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
)
""")
ingestor = BulkIngestor(client, table="pinecone_migration", batch_size=500)
with ingestor as ing:
for ids_batch in paginate_pinecone_ids(index, batch_size=1000):
fetch_response = index.fetch(ids=ids_batch)
for vector_id, vector_data in fetch_response.vectors.items():
ing.add({"id": vector_id, "embedding": vector_data.values, "metadata": vector_data.metadata or {}})
Verifying Migration
SELECT COUNT(*) FROM documents;
SELECT COUNT(*) FROM documents WHERE embedding IS NULL;
SELECT index_name, hnsw_in_memory, estimated_recall FROM neuraldb_stat_vector_indexes;