mdcms/neuraldb-docs/pages/ops-migration.md

2 KiB

title sort section-id keywords description language
Migration 130 operations migration, import, Postgres, Pinecone, Weaviate, data migration, ETL Migrating data to NeuralDB from PostgreSQL, Pinecone, Weaviate, and other sources en

Migration

From PostgreSQL

pg_dump -h source-host -U source-user -d source-db --format=custom > source-backup.dump
psql -h neuraldb-host -U neuraldb -c "CREATE DATABASE myapp;"
pg_restore -h neuraldb-host -U neuraldb -d myapp --jobs=8 --no-owner source-backup.dump

Add vector columns post-migration:

ALTER TABLE documents ADD COLUMN embedding VECTOR(1536);
CREATE INDEX CONCURRENTLY documents_embedding_idx
  ON documents USING hnsw (embedding vector_cosine_ops);

From PostgreSQL + pgvector

pg_dump -h source-host -U source-user -d source-db --format=custom \
  --exclude-extension=vector > pgvector-backup.dump
pg_restore -h neuraldb-host -U neuraldb -d myapp --jobs=8 pgvector-backup.dump

From Pinecone

import pinecone
from neuraldb import NeuralDB, BulkIngestor

pc = pinecone.Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index = pc.Index("my-index")
client = NeuralDB(os.environ["NEURALDB_URL"])

client.execute("""
    CREATE TABLE IF NOT EXISTS pinecone_migration (
        id TEXT PRIMARY KEY, embedding VECTOR(1536), metadata JSONB,
        migrated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
    )
""")

ingestor = BulkIngestor(client, table="pinecone_migration", batch_size=500)
with ingestor as ing:
    for ids_batch in paginate_pinecone_ids(index, batch_size=1000):
        fetch_response = index.fetch(ids=ids_batch)
        for vector_id, vector_data in fetch_response.vectors.items():
            ing.add({"id": vector_id, "embedding": vector_data.values, "metadata": vector_data.metadata or {}})

Verifying Migration

SELECT COUNT(*) FROM documents;
SELECT COUNT(*) FROM documents WHERE embedding IS NULL;
SELECT index_name, hnsw_in_memory, estimated_recall FROM neuraldb_stat_vector_indexes;