code-shredding

2026-02-24 03:50:34 +00:00
parent 98a1b5939e
commit e8f407d313
71 changed files with 1025 additions and 2417 deletions
--- a/README.md
+++ b/README.md
@@ -260,30 +260,18 @@ embeddings = embedder.generate_embeddings_batch(texts, batch_size=10)
 ### **Opción 4: Almacenar en GCS**

 ```python
-from file_storage.google_cloud import GoogleCloudFileStorage
+import gcsfs

-storage = GoogleCloudFileStorage(bucket="mi-bucket")
+fs = gcsfs.GCSFileSystem()

 # Subir archivo
-storage.upload_file(
-    file_path="local_file.md",
-    destination_blob_name="chunks/documento_0.md",
-    content_type="text/markdown"
-)
+fs.put("local_file.md", "mi-bucket/chunks/documento_0.md")

 # Listar archivos
-files = storage.list_files(path="chunks/")
+files = fs.ls("mi-bucket/chunks/")

 # Descargar archivo
-file_stream = storage.get_file_stream("chunks/documento_0.md")
-content = file_stream.read().decode("utf-8")
-```
-
-**CLI:**
-```bash
-file-storage upload local_file.md chunks/documento_0.md
-file-storage list chunks/
-file-storage download chunks/documento_0.md
+content = fs.cat_file("mi-bucket/chunks/documento_0.md").decode("utf-8")
 ```

 ---
@@ -340,10 +328,10 @@ vector-search delete mi-indice
 ## 🔄 Flujo Completo de Ejemplo

 ```python
+import gcsfs
 from pathlib import Path
 from chunker.contextual_chunker import ContextualChunker
 from embedder.vertex_ai import VertexAIEmbedder
-from file_storage.google_cloud import GoogleCloudFileStorage
 from llm.vertex_ai import VertexAILLM

 # 1. Setup
@@ -354,7 +342,7 @@ embedder = VertexAIEmbedder(
    project="mi-proyecto",
    location="us-central1"
 )
-storage = GoogleCloudFileStorage(bucket="mi-bucket")
+fs = gcsfs.GCSFileSystem()

 # 2. Chunking
 documents = chunker.process_path(Path("documento.pdf"))
@@ -368,10 +356,7 @@ for i, doc in enumerate(documents):
    embedding = embedder.generate_embedding(doc["page_content"])

    # Guardar contenido en GCS
-    storage.upload_file(
-        file_path=f"temp_{chunk_id}.md",
-        destination_blob_name=f"contents/{chunk_id}.md"
-    )
+    fs.put(f"temp_{chunk_id}.md", f"mi-bucket/contents/{chunk_id}.md")

    # Guardar vector (escribir a JSONL localmente, luego subir)
    print(f"Chunk {chunk_id}: {len(embedding)} dimensiones")