Spaces:

ric912
/

customer-feedback-intelligence-demo

Sleeping

File size: 12,023 Bytes

73b0303

"""CLI entrypoints for the project reboot."""

from __future__ import annotations

import json
from pathlib import Path

import typer

from feedback_intelligence.app.gradio_app import create_demo
from feedback_intelligence.benchmarks.tfidf_logreg import run_tfidf_logreg_baseline
from feedback_intelligence.config import (
    AmazonTransferEvaluationConfig,
    BaselineExperimentConfig,
    LocalEvaluationConfig,
    ReviewAnalysisConfig,
    TransformerTrainingConfig,
)
from feedback_intelligence.data.amazon_reviews import (
    load_amazon_polarity_reviews,
    summarize_reviews as summarize_amazon_reviews,
)
from feedback_intelligence.data.imdb import load_local_imdb_reviews, summarize_reviews
from feedback_intelligence.data.local_reviews import load_local_labeled_reviews
from feedback_intelligence.inference.sentiment import load_sentiment_predictor
from feedback_intelligence.pipeline.review_analysis import analyze_reviews_with_predictor
from feedback_intelligence.pipeline.transfer_evaluation import evaluate_reviews_with_predictor
from feedback_intelligence.training.transformer import train_transformer_model
from feedback_intelligence.utils.io import write_json

app = typer.Typer(
    help="Feedback Intelligence project commands.",
    no_args_is_help=True,
)
BASE_PATH_OPTION = typer.Option(
    Path("aclImdb"),
    exists=True,
    file_okay=False,
    dir_okay=True,
    help="Path to the local IMDb dataset root.",
)
SAMPLE_SIZE_OPTION = typer.Option(
    2_000,
    min=2,
    help="Balanced number of train rows to sample for inspection.",
)
SEED_OPTION = typer.Option(42, help="Deterministic sampling seed.")
OUTPUT_PATH_OPTION = typer.Option(
    Path("artifacts/benchmarks/tfidf_logreg_imdb.json"),
    help="Where to write the benchmark artifact.",
)
CONFIG_PATH_OPTION = typer.Option(
    None,
    exists=True,
    file_okay=True,
    dir_okay=False,
    help="Optional JSON config for the baseline experiment.",
)
ANALYSIS_OUTPUT_OPTION = typer.Option(
    Path("artifacts/analysis/review_analysis_imdb.json"),
    help="Where to write the review analysis artifact.",
)
ANALYSIS_CONFIG_PATH_OPTION = typer.Option(
    None,
    exists=True,
    file_okay=True,
    dir_okay=False,
    help="Optional JSON config for the review analysis workflow.",
)
HOST_OPTION = typer.Option("127.0.0.1", help="Host interface for the demo server.")
PORT_OPTION = typer.Option(7860, min=1, max=65535, help="Port for the demo server.")
SHARE_OPTION = typer.Option(False, help="Create a public Gradio share link.")
TRAINER_CONFIG_PATH_OPTION = typer.Option(
    None,
    exists=True,
    file_okay=True,
    dir_okay=False,
    help="Optional JSON config for transformer training.",
)
TRANSFER_CONFIG_PATH_OPTION = typer.Option(
    None,
    exists=True,
    file_okay=True,
    dir_okay=False,
    help="Optional JSON config for Amazon transfer evaluation.",
)
TRANSFER_OUTPUT_OPTION = typer.Option(
    Path("artifacts/evaluations/amazon_transfer_tfidf_imdb.json"),
    help="Where to write the Amazon transfer evaluation artifact.",
)
LOCAL_EVAL_CONFIG_PATH_OPTION = typer.Option(
    None,
    exists=True,
    file_okay=True,
    dir_okay=False,
    help="Optional JSON config for evaluating a labeled local feedback CSV.",
)
LOCAL_EVAL_OUTPUT_OPTION = typer.Option(
    Path("artifacts/evaluations/customer_feedback_eval_200.json"),
    help="Where to write the local customer-feedback evaluation artifact.",
)


@app.callback()
def main() -> None:
    """Top-level CLI group."""


@app.command("status")
def status(message: str | None = None) -> None:
    """Show the current reboot status."""
    if message:
        typer.echo(message)
        return
    typer.echo("Feedback Intelligence environment is configured.")


@app.command("describe-dataset")
def describe_dataset(
    base_path: Path = BASE_PATH_OPTION,
    sample_size: int = SAMPLE_SIZE_OPTION,
    seed: int = SEED_OPTION,
) -> None:
    """Print a compact summary of the local IMDb dataset."""
    train_records = load_local_imdb_reviews(
        base_path=base_path,
        split="train",
        sample_size=sample_size,
        seed=seed,
    )
    test_records = load_local_imdb_reviews(
        base_path=base_path,
        split="test",
        sample_size=max(sample_size // 2, 2),
        seed=seed,
    )
    payload = {
        "train": summarize_reviews(train_records),
        "test": summarize_reviews(test_records),
    }
    typer.echo(json.dumps(payload, indent=2))


@app.command("describe-amazon-dataset")
def describe_amazon_dataset(
    sample_size: int = SAMPLE_SIZE_OPTION,
    seed: int = SEED_OPTION,
) -> None:
    """Print a compact summary of a sampled Amazon polarity dataset slice."""
    train_records = load_amazon_polarity_reviews(
        split="train",
        sample_size=sample_size,
        seed=seed,
    )
    test_records = load_amazon_polarity_reviews(
        split="test",
        sample_size=max(sample_size // 2, 2),
        seed=seed,
    )
    payload = {
        "train": summarize_amazon_reviews(train_records),
        "test": summarize_amazon_reviews(test_records),
    }
    typer.echo(json.dumps(payload, indent=2))


@app.command("run-baseline")
def run_baseline(
    base_path: Path = BASE_PATH_OPTION,
    output_path: Path = OUTPUT_PATH_OPTION,
    config_path: Path = CONFIG_PATH_OPTION,
) -> None:
    """Run the first reproducible benchmark on the local IMDb dataset."""
    config = (
        BaselineExperimentConfig.from_json(config_path)
        if config_path is not None
        else BaselineExperimentConfig()
    )

    train_records = load_local_imdb_reviews(
        base_path=base_path,
        split="train",
        sample_size=config.sample_size,
        seed=config.seed,
    )
    test_records = load_local_imdb_reviews(
        base_path=base_path,
        split="test",
        sample_size=max(config.sample_size // 2, 2),
        seed=config.seed,
    )
    result = run_tfidf_logreg_baseline(
        train_records=train_records,
        test_records=test_records,
        config=config,
    )
    write_json(output_path, result.to_dict())
    typer.echo(f"Wrote benchmark artifact to {output_path}")
    typer.echo(f"Saved baseline model to {config.model_output_path}")


@app.command("train-transformer")
def train_transformer(
    base_path: Path = BASE_PATH_OPTION,
    config_path: Path = TRAINER_CONFIG_PATH_OPTION,
) -> None:
    """Fine-tune a transformer sentiment model and save it for inference."""
    config = (
        TransformerTrainingConfig.from_json(config_path)
        if config_path is not None
        else TransformerTrainingConfig()
    )
    train_records = load_local_imdb_reviews(
        base_path=base_path,
        split="train",
        sample_size=config.train_sample_size,
        seed=config.seed,
    )
    test_records = load_local_imdb_reviews(
        base_path=base_path,
        split="test",
        sample_size=config.test_sample_size,
        seed=config.seed,
    )
    result = train_transformer_model(
        train_records=train_records,
        test_records=test_records,
        config=config,
    )
    typer.echo(f"Saved transformer model to {result.output_dir}")
    typer.echo(f"Best validation checkpoint came from epoch {result.best_epoch}")
    typer.echo(f"Wrote transformer metrics to {config.metrics_output_path}")


@app.command("analyze-reviews")
def analyze_reviews_command(
    base_path: Path = BASE_PATH_OPTION,
    output_path: Path = ANALYSIS_OUTPUT_OPTION,
    config_path: Path = ANALYSIS_CONFIG_PATH_OPTION,
) -> None:
    """Generate clustered review insights and review priorities."""
    analysis_config = (
        ReviewAnalysisConfig.from_json(config_path)
        if config_path is not None
        else ReviewAnalysisConfig()
    )
    analysis_records = load_local_imdb_reviews(
        base_path=base_path,
        split="test",
        sample_size=analysis_config.analysis_sample_size,
        seed=analysis_config.seed,
    )
    predictor = load_sentiment_predictor(
        model_path=Path(analysis_config.sentiment_model_path).resolve(),
        backend=analysis_config.sentiment_backend,
        max_length=analysis_config.sentiment_max_length,
    )
    artifact = analyze_reviews_with_predictor(
        review_records=analysis_records,
        predictor=predictor,
        analysis_config=analysis_config,
        sentiment_model_info=predictor.describe(),
    )
    write_json(output_path, artifact.to_dict())
    typer.echo(f"Wrote review analysis artifact to {output_path}")


@app.command("evaluate-amazon-transfer")
def evaluate_amazon_transfer(
    output_path: Path = TRANSFER_OUTPUT_OPTION,
    config_path: Path = TRANSFER_CONFIG_PATH_OPTION,
) -> None:
    """Evaluate a saved sentiment model on Amazon polarity reviews."""
    config = (
        AmazonTransferEvaluationConfig.from_json(config_path)
        if config_path is not None
        else AmazonTransferEvaluationConfig()
    )
    amazon_records = load_amazon_polarity_reviews(
        split=str(config.dataset_split),
        sample_size=config.dataset_sample_size,
        seed=config.seed,
        include_title=config.include_title,
        dataset_name=config.dataset_name,
    )
    predictor = load_sentiment_predictor(
        model_path=Path(config.sentiment_model_path).resolve(),
        backend=config.sentiment_backend,
        max_length=config.sentiment_max_length,
    )
    artifact = evaluate_reviews_with_predictor(
        review_records=amazon_records,
        predictor=predictor,
        dataset_info={
            "dataset_name": config.dataset_name,
            "split": config.dataset_split,
            "sample_size": config.dataset_sample_size,
            "include_title": config.include_title,
            "seed": config.seed,
        },
        max_error_examples=config.max_error_examples,
    )
    write_json(output_path, artifact.to_dict())
    typer.echo(f"Wrote Amazon transfer evaluation artifact to {output_path}")


@app.command("evaluate-local-feedback")
def evaluate_local_feedback(
    output_path: Path = LOCAL_EVAL_OUTPUT_OPTION,
    config_path: Path = LOCAL_EVAL_CONFIG_PATH_OPTION,
) -> None:
    """Evaluate a saved model on a fixed local labeled customer-feedback CSV."""
    config = (
        LocalEvaluationConfig.from_json(config_path)
        if config_path is not None
        else LocalEvaluationConfig()
    )
    local_records = load_local_labeled_reviews(
        dataset_path=Path(config.dataset_path),
        text_column=config.text_column,
        title_column=config.title_column,
        label_column=config.label_column,
        review_id_column=config.review_id_column,
        split_name=config.split_name,
        source_name=config.source_name,
    )
    predictor = load_sentiment_predictor(
        model_path=Path(config.sentiment_model_path).resolve(),
        backend=config.sentiment_backend,
        max_length=config.sentiment_max_length,
    )
    artifact = evaluate_reviews_with_predictor(
        review_records=local_records,
        predictor=predictor,
        dataset_info={
            "dataset_name": config.source_name,
            "split": config.split_name,
            "dataset_path": config.dataset_path,
        },
        max_error_examples=config.max_error_examples,
    )
    write_json(output_path, artifact.to_dict())
    typer.echo(f"Wrote local feedback evaluation artifact to {output_path}")


@app.command("launch-demo")
def launch_demo(
    base_path: Path = BASE_PATH_OPTION,
    config_path: Path = ANALYSIS_CONFIG_PATH_OPTION,
    host: str = HOST_OPTION,
    port: int = PORT_OPTION,
    share: bool = SHARE_OPTION,
) -> None:
    """Launch the Gradio feedback-intelligence demo."""
    analysis_config = (
        ReviewAnalysisConfig.from_json(config_path)
        if config_path is not None
        else ReviewAnalysisConfig()
    )
    demo = create_demo(base_path=base_path, analysis_config=analysis_config)
    demo.launch(server_name=host, server_port=port, share=share)