import json
import os
from pathlib import Path
from typing import Annotated, List

import chonkie
import typer

from .base_chunker import BaseChunker, Document


class RecursiveChunker(BaseChunker):
    """A chunker that uses the chonkie RecursiveChunker."""

    def __init__(self) -> None:
        """Initializes the RecursiveChunker."""
        self.processor = chonkie.RecursiveChunker()

    def process_text(self, text: str) -> List[Document]:
        """
        Processes a string of text into a list of Document chunks.

        Args:
            text: The input string to process.

        Returns:
            A list of Document objects.
        """
        chunks = self.processor(text)
        documents: List[Document] = []
        for i, chunk in enumerate(chunks):
            doc: Document = {
                "page_content": chunk.text,
                "metadata": {"chunk_index": i},
            }
            documents.append(doc)
        return documents


app = typer.Typer()


@app.command()
def main(
    input_file_path: Annotated[
        str, typer.Argument(help="Path to the input text file.")
    ],
    output_dir: Annotated[
        str, typer.Argument(help="Directory to save the output file.")
    ],
):
    """
    Processes a text file using RecursiveChunker and saves the output to a JSONL file.
    """
    print(f"Starting to process {input_file_path}...")

    # 1. Instantiate chunker and process the file using the inherited method
    chunker = RecursiveChunker()
    documents = chunker.process_path(Path(input_file_path))

    print(f"Successfully created {len(documents)} chunks.")

    # 2. Prepare and save the output
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created output directory: {output_dir}")

    output_file_path = os.path.join(output_dir, "chunked_documents.jsonl")

    with open(output_file_path, "w", encoding="utf-8") as f:
        for doc in documents:
            # Add source file info to metadata before writing
            doc["metadata"]["source_file"] = os.path.basename(input_file_path)
            f.write(json.dumps(doc, ensure_ascii=False) + "\n")

    print(f"Successfully saved {len(documents)} chunks to {output_file_path}")


if __name__ == "__main__":
    app()