Richard CHEAM
Deploy customer feedback intelligence demo
73b0303
"""CLI entrypoints for the project reboot."""
from __future__ import annotations
import json
from pathlib import Path
import typer
from feedback_intelligence.app.gradio_app import create_demo
from feedback_intelligence.benchmarks.tfidf_logreg import run_tfidf_logreg_baseline
from feedback_intelligence.config import (
AmazonTransferEvaluationConfig,
BaselineExperimentConfig,
LocalEvaluationConfig,
ReviewAnalysisConfig,
TransformerTrainingConfig,
)
from feedback_intelligence.data.amazon_reviews import (
load_amazon_polarity_reviews,
summarize_reviews as summarize_amazon_reviews,
)
from feedback_intelligence.data.imdb import load_local_imdb_reviews, summarize_reviews
from feedback_intelligence.data.local_reviews import load_local_labeled_reviews
from feedback_intelligence.inference.sentiment import load_sentiment_predictor
from feedback_intelligence.pipeline.review_analysis import analyze_reviews_with_predictor
from feedback_intelligence.pipeline.transfer_evaluation import evaluate_reviews_with_predictor
from feedback_intelligence.training.transformer import train_transformer_model
from feedback_intelligence.utils.io import write_json
app = typer.Typer(
help="Feedback Intelligence project commands.",
no_args_is_help=True,
)
BASE_PATH_OPTION = typer.Option(
Path("aclImdb"),
exists=True,
file_okay=False,
dir_okay=True,
help="Path to the local IMDb dataset root.",
)
SAMPLE_SIZE_OPTION = typer.Option(
2_000,
min=2,
help="Balanced number of train rows to sample for inspection.",
)
SEED_OPTION = typer.Option(42, help="Deterministic sampling seed.")
OUTPUT_PATH_OPTION = typer.Option(
Path("artifacts/benchmarks/tfidf_logreg_imdb.json"),
help="Where to write the benchmark artifact.",
)
CONFIG_PATH_OPTION = typer.Option(
None,
exists=True,
file_okay=True,
dir_okay=False,
help="Optional JSON config for the baseline experiment.",
)
ANALYSIS_OUTPUT_OPTION = typer.Option(
Path("artifacts/analysis/review_analysis_imdb.json"),
help="Where to write the review analysis artifact.",
)
ANALYSIS_CONFIG_PATH_OPTION = typer.Option(
None,
exists=True,
file_okay=True,
dir_okay=False,
help="Optional JSON config for the review analysis workflow.",
)
HOST_OPTION = typer.Option("127.0.0.1", help="Host interface for the demo server.")
PORT_OPTION = typer.Option(7860, min=1, max=65535, help="Port for the demo server.")
SHARE_OPTION = typer.Option(False, help="Create a public Gradio share link.")
TRAINER_CONFIG_PATH_OPTION = typer.Option(
None,
exists=True,
file_okay=True,
dir_okay=False,
help="Optional JSON config for transformer training.",
)
TRANSFER_CONFIG_PATH_OPTION = typer.Option(
None,
exists=True,
file_okay=True,
dir_okay=False,
help="Optional JSON config for Amazon transfer evaluation.",
)
TRANSFER_OUTPUT_OPTION = typer.Option(
Path("artifacts/evaluations/amazon_transfer_tfidf_imdb.json"),
help="Where to write the Amazon transfer evaluation artifact.",
)
LOCAL_EVAL_CONFIG_PATH_OPTION = typer.Option(
None,
exists=True,
file_okay=True,
dir_okay=False,
help="Optional JSON config for evaluating a labeled local feedback CSV.",
)
LOCAL_EVAL_OUTPUT_OPTION = typer.Option(
Path("artifacts/evaluations/customer_feedback_eval_200.json"),
help="Where to write the local customer-feedback evaluation artifact.",
)
@app.callback()
def main() -> None:
"""Top-level CLI group."""
@app.command("status")
def status(message: str | None = None) -> None:
"""Show the current reboot status."""
if message:
typer.echo(message)
return
typer.echo("Feedback Intelligence environment is configured.")
@app.command("describe-dataset")
def describe_dataset(
base_path: Path = BASE_PATH_OPTION,
sample_size: int = SAMPLE_SIZE_OPTION,
seed: int = SEED_OPTION,
) -> None:
"""Print a compact summary of the local IMDb dataset."""
train_records = load_local_imdb_reviews(
base_path=base_path,
split="train",
sample_size=sample_size,
seed=seed,
)
test_records = load_local_imdb_reviews(
base_path=base_path,
split="test",
sample_size=max(sample_size // 2, 2),
seed=seed,
)
payload = {
"train": summarize_reviews(train_records),
"test": summarize_reviews(test_records),
}
typer.echo(json.dumps(payload, indent=2))
@app.command("describe-amazon-dataset")
def describe_amazon_dataset(
sample_size: int = SAMPLE_SIZE_OPTION,
seed: int = SEED_OPTION,
) -> None:
"""Print a compact summary of a sampled Amazon polarity dataset slice."""
train_records = load_amazon_polarity_reviews(
split="train",
sample_size=sample_size,
seed=seed,
)
test_records = load_amazon_polarity_reviews(
split="test",
sample_size=max(sample_size // 2, 2),
seed=seed,
)
payload = {
"train": summarize_amazon_reviews(train_records),
"test": summarize_amazon_reviews(test_records),
}
typer.echo(json.dumps(payload, indent=2))
@app.command("run-baseline")
def run_baseline(
base_path: Path = BASE_PATH_OPTION,
output_path: Path = OUTPUT_PATH_OPTION,
config_path: Path = CONFIG_PATH_OPTION,
) -> None:
"""Run the first reproducible benchmark on the local IMDb dataset."""
config = (
BaselineExperimentConfig.from_json(config_path)
if config_path is not None
else BaselineExperimentConfig()
)
train_records = load_local_imdb_reviews(
base_path=base_path,
split="train",
sample_size=config.sample_size,
seed=config.seed,
)
test_records = load_local_imdb_reviews(
base_path=base_path,
split="test",
sample_size=max(config.sample_size // 2, 2),
seed=config.seed,
)
result = run_tfidf_logreg_baseline(
train_records=train_records,
test_records=test_records,
config=config,
)
write_json(output_path, result.to_dict())
typer.echo(f"Wrote benchmark artifact to {output_path}")
typer.echo(f"Saved baseline model to {config.model_output_path}")
@app.command("train-transformer")
def train_transformer(
base_path: Path = BASE_PATH_OPTION,
config_path: Path = TRAINER_CONFIG_PATH_OPTION,
) -> None:
"""Fine-tune a transformer sentiment model and save it for inference."""
config = (
TransformerTrainingConfig.from_json(config_path)
if config_path is not None
else TransformerTrainingConfig()
)
train_records = load_local_imdb_reviews(
base_path=base_path,
split="train",
sample_size=config.train_sample_size,
seed=config.seed,
)
test_records = load_local_imdb_reviews(
base_path=base_path,
split="test",
sample_size=config.test_sample_size,
seed=config.seed,
)
result = train_transformer_model(
train_records=train_records,
test_records=test_records,
config=config,
)
typer.echo(f"Saved transformer model to {result.output_dir}")
typer.echo(f"Best validation checkpoint came from epoch {result.best_epoch}")
typer.echo(f"Wrote transformer metrics to {config.metrics_output_path}")
@app.command("analyze-reviews")
def analyze_reviews_command(
base_path: Path = BASE_PATH_OPTION,
output_path: Path = ANALYSIS_OUTPUT_OPTION,
config_path: Path = ANALYSIS_CONFIG_PATH_OPTION,
) -> None:
"""Generate clustered review insights and review priorities."""
analysis_config = (
ReviewAnalysisConfig.from_json(config_path)
if config_path is not None
else ReviewAnalysisConfig()
)
analysis_records = load_local_imdb_reviews(
base_path=base_path,
split="test",
sample_size=analysis_config.analysis_sample_size,
seed=analysis_config.seed,
)
predictor = load_sentiment_predictor(
model_path=Path(analysis_config.sentiment_model_path).resolve(),
backend=analysis_config.sentiment_backend,
max_length=analysis_config.sentiment_max_length,
)
artifact = analyze_reviews_with_predictor(
review_records=analysis_records,
predictor=predictor,
analysis_config=analysis_config,
sentiment_model_info=predictor.describe(),
)
write_json(output_path, artifact.to_dict())
typer.echo(f"Wrote review analysis artifact to {output_path}")
@app.command("evaluate-amazon-transfer")
def evaluate_amazon_transfer(
output_path: Path = TRANSFER_OUTPUT_OPTION,
config_path: Path = TRANSFER_CONFIG_PATH_OPTION,
) -> None:
"""Evaluate a saved sentiment model on Amazon polarity reviews."""
config = (
AmazonTransferEvaluationConfig.from_json(config_path)
if config_path is not None
else AmazonTransferEvaluationConfig()
)
amazon_records = load_amazon_polarity_reviews(
split=str(config.dataset_split),
sample_size=config.dataset_sample_size,
seed=config.seed,
include_title=config.include_title,
dataset_name=config.dataset_name,
)
predictor = load_sentiment_predictor(
model_path=Path(config.sentiment_model_path).resolve(),
backend=config.sentiment_backend,
max_length=config.sentiment_max_length,
)
artifact = evaluate_reviews_with_predictor(
review_records=amazon_records,
predictor=predictor,
dataset_info={
"dataset_name": config.dataset_name,
"split": config.dataset_split,
"sample_size": config.dataset_sample_size,
"include_title": config.include_title,
"seed": config.seed,
},
max_error_examples=config.max_error_examples,
)
write_json(output_path, artifact.to_dict())
typer.echo(f"Wrote Amazon transfer evaluation artifact to {output_path}")
@app.command("evaluate-local-feedback")
def evaluate_local_feedback(
output_path: Path = LOCAL_EVAL_OUTPUT_OPTION,
config_path: Path = LOCAL_EVAL_CONFIG_PATH_OPTION,
) -> None:
"""Evaluate a saved model on a fixed local labeled customer-feedback CSV."""
config = (
LocalEvaluationConfig.from_json(config_path)
if config_path is not None
else LocalEvaluationConfig()
)
local_records = load_local_labeled_reviews(
dataset_path=Path(config.dataset_path),
text_column=config.text_column,
title_column=config.title_column,
label_column=config.label_column,
review_id_column=config.review_id_column,
split_name=config.split_name,
source_name=config.source_name,
)
predictor = load_sentiment_predictor(
model_path=Path(config.sentiment_model_path).resolve(),
backend=config.sentiment_backend,
max_length=config.sentiment_max_length,
)
artifact = evaluate_reviews_with_predictor(
review_records=local_records,
predictor=predictor,
dataset_info={
"dataset_name": config.source_name,
"split": config.split_name,
"dataset_path": config.dataset_path,
},
max_error_examples=config.max_error_examples,
)
write_json(output_path, artifact.to_dict())
typer.echo(f"Wrote local feedback evaluation artifact to {output_path}")
@app.command("launch-demo")
def launch_demo(
base_path: Path = BASE_PATH_OPTION,
config_path: Path = ANALYSIS_CONFIG_PATH_OPTION,
host: str = HOST_OPTION,
port: int = PORT_OPTION,
share: bool = SHARE_OPTION,
) -> None:
"""Launch the Gradio feedback-intelligence demo."""
analysis_config = (
ReviewAnalysisConfig.from_json(config_path)
if config_path is not None
else ReviewAnalysisConfig()
)
demo = create_demo(base_path=base_path, analysis_config=analysis_config)
demo.launch(server_name=host, server_port=port, share=share)