# /// script # requires-python = ">=3.12" # dependencies = [ # "marimo", # "numpy==2.1.0", # "pymongo==4.11", # "qdrant-client==1.11.0", # "scikit-learn==1.6.1", # "umap-learn==0.5.7", # ] # /// import marimo __generated_with = "0.11.0" app = marimo.App(width="medium") @app.cell def _(): from qdrant_client import QdrantClient, models from pymongo import MongoClient return MongoClient, QdrantClient, models @app.cell def _(QdrantClient): qdrant = QdrantClient( api_key="g2nZn0AMxuBREAqfna1YlednbVO1D8wAG3KNrKbYghyrftgVTP0TIg", location="https://82ba8a5d-26e6-41ff-a4f0-ac5e7554ef15.eastus-0.azure.cloud.qdrant.io:6333", ) print(qdrant.get_collection("MayaOCP").points_count) return (qdrant,) @app.cell def _(MongoClient): mongo = MongoClient( "mongodb+srv://banorte:innovacion2024.@mayacontigo-mongo.global.mongocluster.cosmos.azure.com/?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000" ) print(mongo.admin.command("ping")) return (mongo,) @app.cell def _(qdrant): points = qdrant.scroll( collection_name="MayaOCP", with_vectors=True, with_payload=True, limit=100000 )[0] print(len(points)) return (points,) @app.cell def _(mongo): mongodb = mongo["MayaContigo"] collection = mongodb["MayaOCP"] return collection, mongodb @app.cell def _(points): documents = [{"vector": p.vector[:2000], **p.payload} for p in points] documents[:2] return (documents,) @app.cell def _(collection, documents): collection.insert_many(documents) return @app.cell def _(mongodb): mongodb.command( { "createIndexes": "MayaOCP", "indexes": [ { "name": "VectorSearchIndex", "key": {"vector": "cosmosSearch"}, "cosmosSearchOptions": { "kind": "vector-hnsw", "similarity": "COS", "dimensions": 2000, }, } ], } ) return @app.cell def _(points): query_vector = points[0].vector query_vector return (query_vector,) @app.cell def _(collection, query_vector): pipeline = [ { "$search": { "cosmosSearch": { "path": "vector", "vector": query_vector[:2000], "k": 5, } } } ] for r in collection.aggregate(pipeline): print(r) return pipeline, r @app.cell def _(): return if __name__ == "__main__": app.run()