forked from innovacion/Mayacontigo
ic
This commit is contained in:
127
notebooks/vector-db-migrator/qdrant_to_cosmbosdb_mongo.py
Normal file
127
notebooks/vector-db-migrator/qdrant_to_cosmbosdb_mongo.py
Normal file
@@ -0,0 +1,127 @@
|
||||
# /// script
|
||||
# requires-python = ">=3.12"
|
||||
# dependencies = [
|
||||
# "marimo",
|
||||
# "numpy==2.1.0",
|
||||
# "pymongo==4.11",
|
||||
# "qdrant-client==1.11.0",
|
||||
# "scikit-learn==1.6.1",
|
||||
# "umap-learn==0.5.7",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
import marimo
|
||||
|
||||
__generated_with = "0.11.0"
|
||||
app = marimo.App(width="medium")
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
from qdrant_client import QdrantClient, models
|
||||
from pymongo import MongoClient
|
||||
|
||||
return MongoClient, QdrantClient, models
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(QdrantClient):
|
||||
qdrant = QdrantClient(
|
||||
api_key="g2nZn0AMxuBREAqfna1YlednbVO1D8wAG3KNrKbYghyrftgVTP0TIg",
|
||||
location="https://82ba8a5d-26e6-41ff-a4f0-ac5e7554ef15.eastus-0.azure.cloud.qdrant.io:6333",
|
||||
)
|
||||
print(qdrant.get_collection("MayaOCP").points_count)
|
||||
return (qdrant,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(MongoClient):
|
||||
mongo = MongoClient(
|
||||
"mongodb+srv://banorte:innovacion2024.@mayacontigo-mongo.global.mongocluster.cosmos.azure.com/?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000"
|
||||
)
|
||||
print(mongo.admin.command("ping"))
|
||||
return (mongo,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(qdrant):
|
||||
points = qdrant.scroll(
|
||||
collection_name="MayaOCP", with_vectors=True, with_payload=True, limit=100000
|
||||
)[0]
|
||||
print(len(points))
|
||||
return (points,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(mongo):
|
||||
mongodb = mongo["MayaContigo"]
|
||||
collection = mongodb["MayaOCP"]
|
||||
return collection, mongodb
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(points):
|
||||
documents = [{"vector": p.vector[:2000], **p.payload} for p in points]
|
||||
documents[:2]
|
||||
return (documents,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(collection, documents):
|
||||
collection.insert_many(documents)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(mongodb):
|
||||
mongodb.command(
|
||||
{
|
||||
"createIndexes": "MayaOCP",
|
||||
"indexes": [
|
||||
{
|
||||
"name": "VectorSearchIndex",
|
||||
"key": {"vector": "cosmosSearch"},
|
||||
"cosmosSearchOptions": {
|
||||
"kind": "vector-hnsw",
|
||||
"similarity": "COS",
|
||||
"dimensions": 2000,
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(points):
|
||||
query_vector = points[0].vector
|
||||
query_vector
|
||||
return (query_vector,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def _(collection, query_vector):
|
||||
pipeline = [
|
||||
{
|
||||
"$search": {
|
||||
"cosmosSearch": {
|
||||
"path": "vector",
|
||||
"vector": query_vector[:2000],
|
||||
"k": 5,
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
for r in collection.aggregate(pipeline):
|
||||
print(r)
|
||||
return pipeline, r
|
||||
|
||||
|
||||
@app.cell
|
||||
def _():
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
||||
Reference in New Issue
Block a user