This commit is contained in:
Rogelio
2025-10-13 18:16:25 +00:00
parent 739f087cef
commit 325f1ef439
415 changed files with 46870 additions and 0 deletions

View File

@@ -0,0 +1,127 @@
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "marimo",
# "numpy==2.1.0",
# "pymongo==4.11",
# "qdrant-client==1.11.0",
# "scikit-learn==1.6.1",
# "umap-learn==0.5.7",
# ]
# ///
import marimo
__generated_with = "0.11.0"
app = marimo.App(width="medium")
@app.cell
def _():
from qdrant_client import QdrantClient, models
from pymongo import MongoClient
return MongoClient, QdrantClient, models
@app.cell
def _(QdrantClient):
qdrant = QdrantClient(
api_key="g2nZn0AMxuBREAqfna1YlednbVO1D8wAG3KNrKbYghyrftgVTP0TIg",
location="https://82ba8a5d-26e6-41ff-a4f0-ac5e7554ef15.eastus-0.azure.cloud.qdrant.io:6333",
)
print(qdrant.get_collection("MayaOCP").points_count)
return (qdrant,)
@app.cell
def _(MongoClient):
mongo = MongoClient(
"mongodb+srv://banorte:innovacion2024.@mayacontigo-mongo.global.mongocluster.cosmos.azure.com/?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000"
)
print(mongo.admin.command("ping"))
return (mongo,)
@app.cell
def _(qdrant):
points = qdrant.scroll(
collection_name="MayaOCP", with_vectors=True, with_payload=True, limit=100000
)[0]
print(len(points))
return (points,)
@app.cell
def _(mongo):
mongodb = mongo["MayaContigo"]
collection = mongodb["MayaOCP"]
return collection, mongodb
@app.cell
def _(points):
documents = [{"vector": p.vector[:2000], **p.payload} for p in points]
documents[:2]
return (documents,)
@app.cell
def _(collection, documents):
collection.insert_many(documents)
return
@app.cell
def _(mongodb):
mongodb.command(
{
"createIndexes": "MayaOCP",
"indexes": [
{
"name": "VectorSearchIndex",
"key": {"vector": "cosmosSearch"},
"cosmosSearchOptions": {
"kind": "vector-hnsw",
"similarity": "COS",
"dimensions": 2000,
},
}
],
}
)
return
@app.cell
def _(points):
query_vector = points[0].vector
query_vector
return (query_vector,)
@app.cell
def _(collection, query_vector):
pipeline = [
{
"$search": {
"cosmosSearch": {
"path": "vector",
"vector": query_vector[:2000],
"k": 5,
}
}
}
]
for r in collection.aggregate(pipeline):
print(r)
return pipeline, r
@app.cell
def _():
return
if __name__ == "__main__":
app.run()