Spaces:

ric912
/

customer-feedback-intelligence-demo

Sleeping

customer-feedback-intelligence-demo / src /feedback_intelligence /cli.py

Richard CHEAM

Deploy customer feedback intelligence demo

73b0303 about 2 months ago

12 kB

	"""CLI entrypoints for the project reboot."""

	from __future__ import annotations

	import json
	from pathlib import Path

	import typer

	from feedback_intelligence.app.gradio_app import create_demo
	from feedback_intelligence.benchmarks.tfidf_logreg import run_tfidf_logreg_baseline
	from feedback_intelligence.config import (
	AmazonTransferEvaluationConfig,
	BaselineExperimentConfig,
	LocalEvaluationConfig,
	ReviewAnalysisConfig,
	TransformerTrainingConfig,
	)
	from feedback_intelligence.data.amazon_reviews import (
	load_amazon_polarity_reviews,
	summarize_reviews as summarize_amazon_reviews,
	)
	from feedback_intelligence.data.imdb import load_local_imdb_reviews, summarize_reviews
	from feedback_intelligence.data.local_reviews import load_local_labeled_reviews
	from feedback_intelligence.inference.sentiment import load_sentiment_predictor
	from feedback_intelligence.pipeline.review_analysis import analyze_reviews_with_predictor
	from feedback_intelligence.pipeline.transfer_evaluation import evaluate_reviews_with_predictor
	from feedback_intelligence.training.transformer import train_transformer_model
	from feedback_intelligence.utils.io import write_json

	app = typer.Typer(
	help="Feedback Intelligence project commands.",
	no_args_is_help=True,
	)
	BASE_PATH_OPTION = typer.Option(
	Path("aclImdb"),
	exists=True,
	file_okay=False,
	dir_okay=True,
	help="Path to the local IMDb dataset root.",
	)
	SAMPLE_SIZE_OPTION = typer.Option(
	2_000,
	min=2,
	help="Balanced number of train rows to sample for inspection.",
	)
	SEED_OPTION = typer.Option(42, help="Deterministic sampling seed.")
	OUTPUT_PATH_OPTION = typer.Option(
	Path("artifacts/benchmarks/tfidf_logreg_imdb.json"),
	help="Where to write the benchmark artifact.",
	)
	CONFIG_PATH_OPTION = typer.Option(
	None,
	exists=True,
	file_okay=True,
	dir_okay=False,
	help="Optional JSON config for the baseline experiment.",
	)
	ANALYSIS_OUTPUT_OPTION = typer.Option(
	Path("artifacts/analysis/review_analysis_imdb.json"),
	help="Where to write the review analysis artifact.",
	)
	ANALYSIS_CONFIG_PATH_OPTION = typer.Option(
	None,
	exists=True,
	file_okay=True,
	dir_okay=False,
	help="Optional JSON config for the review analysis workflow.",
	)
	HOST_OPTION = typer.Option("127.0.0.1", help="Host interface for the demo server.")
	PORT_OPTION = typer.Option(7860, min=1, max=65535, help="Port for the demo server.")
	SHARE_OPTION = typer.Option(False, help="Create a public Gradio share link.")
	TRAINER_CONFIG_PATH_OPTION = typer.Option(
	None,
	exists=True,
	file_okay=True,
	dir_okay=False,
	help="Optional JSON config for transformer training.",
	)
	TRANSFER_CONFIG_PATH_OPTION = typer.Option(
	None,
	exists=True,
	file_okay=True,
	dir_okay=False,
	help="Optional JSON config for Amazon transfer evaluation.",
	)
	TRANSFER_OUTPUT_OPTION = typer.Option(
	Path("artifacts/evaluations/amazon_transfer_tfidf_imdb.json"),
	help="Where to write the Amazon transfer evaluation artifact.",
	)
	LOCAL_EVAL_CONFIG_PATH_OPTION = typer.Option(
	None,
	exists=True,
	file_okay=True,
	dir_okay=False,
	help="Optional JSON config for evaluating a labeled local feedback CSV.",
	)
	LOCAL_EVAL_OUTPUT_OPTION = typer.Option(
	Path("artifacts/evaluations/customer_feedback_eval_200.json"),
	help="Where to write the local customer-feedback evaluation artifact.",
	)


	@app.callback()
	def main() -> None:
	"""Top-level CLI group."""


	@app.command("status")
	def status(message: str \| None = None) -> None:
	"""Show the current reboot status."""
	if message:
	typer.echo(message)
	return
	typer.echo("Feedback Intelligence environment is configured.")


	@app.command("describe-dataset")
	def describe_dataset(
	base_path: Path = BASE_PATH_OPTION,
	sample_size: int = SAMPLE_SIZE_OPTION,
	seed: int = SEED_OPTION,
	) -> None:
	"""Print a compact summary of the local IMDb dataset."""
	train_records = load_local_imdb_reviews(
	base_path=base_path,
	split="train",
	sample_size=sample_size,
	seed=seed,
	)
	test_records = load_local_imdb_reviews(
	base_path=base_path,
	split="test",
	sample_size=max(sample_size // 2, 2),
	seed=seed,
	)
	payload = {
	"train": summarize_reviews(train_records),
	"test": summarize_reviews(test_records),
	}
	typer.echo(json.dumps(payload, indent=2))


	@app.command("describe-amazon-dataset")
	def describe_amazon_dataset(
	sample_size: int = SAMPLE_SIZE_OPTION,
	seed: int = SEED_OPTION,
	) -> None:
	"""Print a compact summary of a sampled Amazon polarity dataset slice."""
	train_records = load_amazon_polarity_reviews(
	split="train",
	sample_size=sample_size,
	seed=seed,
	)
	test_records = load_amazon_polarity_reviews(
	split="test",
	sample_size=max(sample_size // 2, 2),
	seed=seed,
	)
	payload = {
	"train": summarize_amazon_reviews(train_records),
	"test": summarize_amazon_reviews(test_records),
	}
	typer.echo(json.dumps(payload, indent=2))


	@app.command("run-baseline")
	def run_baseline(
	base_path: Path = BASE_PATH_OPTION,
	output_path: Path = OUTPUT_PATH_OPTION,
	config_path: Path = CONFIG_PATH_OPTION,
	) -> None:
	"""Run the first reproducible benchmark on the local IMDb dataset."""
	config = (
	BaselineExperimentConfig.from_json(config_path)
	if config_path is not None
	else BaselineExperimentConfig()
	)

	train_records = load_local_imdb_reviews(
	base_path=base_path,
	split="train",
	sample_size=config.sample_size,
	seed=config.seed,
	)
	test_records = load_local_imdb_reviews(
	base_path=base_path,
	split="test",
	sample_size=max(config.sample_size // 2, 2),
	seed=config.seed,
	)
	result = run_tfidf_logreg_baseline(
	train_records=train_records,
	test_records=test_records,
	config=config,
	)
	write_json(output_path, result.to_dict())
	typer.echo(f"Wrote benchmark artifact to {output_path}")
	typer.echo(f"Saved baseline model to {config.model_output_path}")


	@app.command("train-transformer")
	def train_transformer(
	base_path: Path = BASE_PATH_OPTION,
	config_path: Path = TRAINER_CONFIG_PATH_OPTION,
	) -> None:
	"""Fine-tune a transformer sentiment model and save it for inference."""
	config = (
	TransformerTrainingConfig.from_json(config_path)
	if config_path is not None
	else TransformerTrainingConfig()
	)
	train_records = load_local_imdb_reviews(
	base_path=base_path,
	split="train",
	sample_size=config.train_sample_size,
	seed=config.seed,
	)
	test_records = load_local_imdb_reviews(
	base_path=base_path,
	split="test",
	sample_size=config.test_sample_size,
	seed=config.seed,
	)
	result = train_transformer_model(
	train_records=train_records,
	test_records=test_records,
	config=config,
	)
	typer.echo(f"Saved transformer model to {result.output_dir}")
	typer.echo(f"Best validation checkpoint came from epoch {result.best_epoch}")
	typer.echo(f"Wrote transformer metrics to {config.metrics_output_path}")


	@app.command("analyze-reviews")
	def analyze_reviews_command(
	base_path: Path = BASE_PATH_OPTION,
	output_path: Path = ANALYSIS_OUTPUT_OPTION,
	config_path: Path = ANALYSIS_CONFIG_PATH_OPTION,
	) -> None:
	"""Generate clustered review insights and review priorities."""
	analysis_config = (
	ReviewAnalysisConfig.from_json(config_path)
	if config_path is not None
	else ReviewAnalysisConfig()
	)
	analysis_records = load_local_imdb_reviews(
	base_path=base_path,
	split="test",
	sample_size=analysis_config.analysis_sample_size,
	seed=analysis_config.seed,
	)
	predictor = load_sentiment_predictor(
	model_path=Path(analysis_config.sentiment_model_path).resolve(),
	backend=analysis_config.sentiment_backend,
	max_length=analysis_config.sentiment_max_length,
	)
	artifact = analyze_reviews_with_predictor(
	review_records=analysis_records,
	predictor=predictor,
	analysis_config=analysis_config,
	sentiment_model_info=predictor.describe(),
	)
	write_json(output_path, artifact.to_dict())
	typer.echo(f"Wrote review analysis artifact to {output_path}")


	@app.command("evaluate-amazon-transfer")
	def evaluate_amazon_transfer(
	output_path: Path = TRANSFER_OUTPUT_OPTION,
	config_path: Path = TRANSFER_CONFIG_PATH_OPTION,
	) -> None:
	"""Evaluate a saved sentiment model on Amazon polarity reviews."""
	config = (
	AmazonTransferEvaluationConfig.from_json(config_path)
	if config_path is not None
	else AmazonTransferEvaluationConfig()
	)
	amazon_records = load_amazon_polarity_reviews(
	split=str(config.dataset_split),
	sample_size=config.dataset_sample_size,
	seed=config.seed,
	include_title=config.include_title,
	dataset_name=config.dataset_name,
	)
	predictor = load_sentiment_predictor(
	model_path=Path(config.sentiment_model_path).resolve(),
	backend=config.sentiment_backend,
	max_length=config.sentiment_max_length,
	)
	artifact = evaluate_reviews_with_predictor(
	review_records=amazon_records,
	predictor=predictor,
	dataset_info={
	"dataset_name": config.dataset_name,
	"split": config.dataset_split,
	"sample_size": config.dataset_sample_size,
	"include_title": config.include_title,
	"seed": config.seed,
	},
	max_error_examples=config.max_error_examples,
	)
	write_json(output_path, artifact.to_dict())
	typer.echo(f"Wrote Amazon transfer evaluation artifact to {output_path}")


	@app.command("evaluate-local-feedback")
	def evaluate_local_feedback(
	output_path: Path = LOCAL_EVAL_OUTPUT_OPTION,
	config_path: Path = LOCAL_EVAL_CONFIG_PATH_OPTION,
	) -> None:
	"""Evaluate a saved model on a fixed local labeled customer-feedback CSV."""
	config = (
	LocalEvaluationConfig.from_json(config_path)
	if config_path is not None
	else LocalEvaluationConfig()
	)
	local_records = load_local_labeled_reviews(
	dataset_path=Path(config.dataset_path),
	text_column=config.text_column,
	title_column=config.title_column,
	label_column=config.label_column,
	review_id_column=config.review_id_column,
	split_name=config.split_name,
	source_name=config.source_name,
	)
	predictor = load_sentiment_predictor(
	model_path=Path(config.sentiment_model_path).resolve(),
	backend=config.sentiment_backend,
	max_length=config.sentiment_max_length,
	)
	artifact = evaluate_reviews_with_predictor(
	review_records=local_records,
	predictor=predictor,
	dataset_info={
	"dataset_name": config.source_name,
	"split": config.split_name,
	"dataset_path": config.dataset_path,
	},
	max_error_examples=config.max_error_examples,
	)
	write_json(output_path, artifact.to_dict())
	typer.echo(f"Wrote local feedback evaluation artifact to {output_path}")


	@app.command("launch-demo")
	def launch_demo(
	base_path: Path = BASE_PATH_OPTION,
	config_path: Path = ANALYSIS_CONFIG_PATH_OPTION,
	host: str = HOST_OPTION,
	port: int = PORT_OPTION,
	share: bool = SHARE_OPTION,
	) -> None:
	"""Launch the Gradio feedback-intelligence demo."""
	analysis_config = (
	ReviewAnalysisConfig.from_json(config_path)
	if config_path is not None
	else ReviewAnalysisConfig()
	)
	demo = create_demo(base_path=base_path, analysis_config=analysis_config)
	demo.launch(server_name=host, server_port=port, share=share)