forked from innovacion/Mayacontigo
128 lines
2.6 KiB
Python
128 lines
2.6 KiB
Python
# /// script
|
|
# requires-python = ">=3.12"
|
|
# dependencies = [
|
|
# "marimo",
|
|
# "numpy==2.1.0",
|
|
# "pymongo==4.11",
|
|
# "qdrant-client==1.11.0",
|
|
# "scikit-learn==1.6.1",
|
|
# "umap-learn==0.5.7",
|
|
# ]
|
|
# ///
|
|
|
|
import marimo
|
|
|
|
__generated_with = "0.11.0"
|
|
app = marimo.App(width="medium")
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
from qdrant_client import QdrantClient, models
|
|
from pymongo import MongoClient
|
|
|
|
return MongoClient, QdrantClient, models
|
|
|
|
|
|
@app.cell
|
|
def _(QdrantClient):
|
|
qdrant = QdrantClient(
|
|
api_key="g2nZn0AMxuBREAqfna1YlednbVO1D8wAG3KNrKbYghyrftgVTP0TIg",
|
|
location="https://82ba8a5d-26e6-41ff-a4f0-ac5e7554ef15.eastus-0.azure.cloud.qdrant.io:6333",
|
|
)
|
|
print(qdrant.get_collection("MayaOCP").points_count)
|
|
return (qdrant,)
|
|
|
|
|
|
@app.cell
|
|
def _(MongoClient):
|
|
mongo = MongoClient(
|
|
"mongodb+srv://banorte:innovacion2024.@mayacontigo-mongo.global.mongocluster.cosmos.azure.com/?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000"
|
|
)
|
|
print(mongo.admin.command("ping"))
|
|
return (mongo,)
|
|
|
|
|
|
@app.cell
|
|
def _(qdrant):
|
|
points = qdrant.scroll(
|
|
collection_name="MayaOCP", with_vectors=True, with_payload=True, limit=100000
|
|
)[0]
|
|
print(len(points))
|
|
return (points,)
|
|
|
|
|
|
@app.cell
|
|
def _(mongo):
|
|
mongodb = mongo["MayaContigo"]
|
|
collection = mongodb["MayaOCP"]
|
|
return collection, mongodb
|
|
|
|
|
|
@app.cell
|
|
def _(points):
|
|
documents = [{"vector": p.vector[:2000], **p.payload} for p in points]
|
|
documents[:2]
|
|
return (documents,)
|
|
|
|
|
|
@app.cell
|
|
def _(collection, documents):
|
|
collection.insert_many(documents)
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(mongodb):
|
|
mongodb.command(
|
|
{
|
|
"createIndexes": "MayaOCP",
|
|
"indexes": [
|
|
{
|
|
"name": "VectorSearchIndex",
|
|
"key": {"vector": "cosmosSearch"},
|
|
"cosmosSearchOptions": {
|
|
"kind": "vector-hnsw",
|
|
"similarity": "COS",
|
|
"dimensions": 2000,
|
|
},
|
|
}
|
|
],
|
|
}
|
|
)
|
|
return
|
|
|
|
|
|
@app.cell
|
|
def _(points):
|
|
query_vector = points[0].vector
|
|
query_vector
|
|
return (query_vector,)
|
|
|
|
|
|
@app.cell
|
|
def _(collection, query_vector):
|
|
pipeline = [
|
|
{
|
|
"$search": {
|
|
"cosmosSearch": {
|
|
"path": "vector",
|
|
"vector": query_vector[:2000],
|
|
"k": 5,
|
|
}
|
|
}
|
|
}
|
|
]
|
|
for r in collection.aggregate(pipeline):
|
|
print(r)
|
|
return pipeline, r
|
|
|
|
|
|
@app.cell
|
|
def _():
|
|
return
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run()
|