| """CLI entrypoints for the project reboot.""" |
|
|
| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
| import typer |
|
|
| from feedback_intelligence.app.gradio_app import create_demo |
| from feedback_intelligence.benchmarks.tfidf_logreg import run_tfidf_logreg_baseline |
| from feedback_intelligence.config import ( |
| AmazonTransferEvaluationConfig, |
| BaselineExperimentConfig, |
| LocalEvaluationConfig, |
| ReviewAnalysisConfig, |
| TransformerTrainingConfig, |
| ) |
| from feedback_intelligence.data.amazon_reviews import ( |
| load_amazon_polarity_reviews, |
| summarize_reviews as summarize_amazon_reviews, |
| ) |
| from feedback_intelligence.data.imdb import load_local_imdb_reviews, summarize_reviews |
| from feedback_intelligence.data.local_reviews import load_local_labeled_reviews |
| from feedback_intelligence.inference.sentiment import load_sentiment_predictor |
| from feedback_intelligence.pipeline.review_analysis import analyze_reviews_with_predictor |
| from feedback_intelligence.pipeline.transfer_evaluation import evaluate_reviews_with_predictor |
| from feedback_intelligence.training.transformer import train_transformer_model |
| from feedback_intelligence.utils.io import write_json |
|
|
| app = typer.Typer( |
| help="Feedback Intelligence project commands.", |
| no_args_is_help=True, |
| ) |
| BASE_PATH_OPTION = typer.Option( |
| Path("aclImdb"), |
| exists=True, |
| file_okay=False, |
| dir_okay=True, |
| help="Path to the local IMDb dataset root.", |
| ) |
| SAMPLE_SIZE_OPTION = typer.Option( |
| 2_000, |
| min=2, |
| help="Balanced number of train rows to sample for inspection.", |
| ) |
| SEED_OPTION = typer.Option(42, help="Deterministic sampling seed.") |
| OUTPUT_PATH_OPTION = typer.Option( |
| Path("artifacts/benchmarks/tfidf_logreg_imdb.json"), |
| help="Where to write the benchmark artifact.", |
| ) |
| CONFIG_PATH_OPTION = typer.Option( |
| None, |
| exists=True, |
| file_okay=True, |
| dir_okay=False, |
| help="Optional JSON config for the baseline experiment.", |
| ) |
| ANALYSIS_OUTPUT_OPTION = typer.Option( |
| Path("artifacts/analysis/review_analysis_imdb.json"), |
| help="Where to write the review analysis artifact.", |
| ) |
| ANALYSIS_CONFIG_PATH_OPTION = typer.Option( |
| None, |
| exists=True, |
| file_okay=True, |
| dir_okay=False, |
| help="Optional JSON config for the review analysis workflow.", |
| ) |
| HOST_OPTION = typer.Option("127.0.0.1", help="Host interface for the demo server.") |
| PORT_OPTION = typer.Option(7860, min=1, max=65535, help="Port for the demo server.") |
| SHARE_OPTION = typer.Option(False, help="Create a public Gradio share link.") |
| TRAINER_CONFIG_PATH_OPTION = typer.Option( |
| None, |
| exists=True, |
| file_okay=True, |
| dir_okay=False, |
| help="Optional JSON config for transformer training.", |
| ) |
| TRANSFER_CONFIG_PATH_OPTION = typer.Option( |
| None, |
| exists=True, |
| file_okay=True, |
| dir_okay=False, |
| help="Optional JSON config for Amazon transfer evaluation.", |
| ) |
| TRANSFER_OUTPUT_OPTION = typer.Option( |
| Path("artifacts/evaluations/amazon_transfer_tfidf_imdb.json"), |
| help="Where to write the Amazon transfer evaluation artifact.", |
| ) |
| LOCAL_EVAL_CONFIG_PATH_OPTION = typer.Option( |
| None, |
| exists=True, |
| file_okay=True, |
| dir_okay=False, |
| help="Optional JSON config for evaluating a labeled local feedback CSV.", |
| ) |
| LOCAL_EVAL_OUTPUT_OPTION = typer.Option( |
| Path("artifacts/evaluations/customer_feedback_eval_200.json"), |
| help="Where to write the local customer-feedback evaluation artifact.", |
| ) |
|
|
|
|
| @app.callback() |
| def main() -> None: |
| """Top-level CLI group.""" |
|
|
|
|
| @app.command("status") |
| def status(message: str | None = None) -> None: |
| """Show the current reboot status.""" |
| if message: |
| typer.echo(message) |
| return |
| typer.echo("Feedback Intelligence environment is configured.") |
|
|
|
|
| @app.command("describe-dataset") |
| def describe_dataset( |
| base_path: Path = BASE_PATH_OPTION, |
| sample_size: int = SAMPLE_SIZE_OPTION, |
| seed: int = SEED_OPTION, |
| ) -> None: |
| """Print a compact summary of the local IMDb dataset.""" |
| train_records = load_local_imdb_reviews( |
| base_path=base_path, |
| split="train", |
| sample_size=sample_size, |
| seed=seed, |
| ) |
| test_records = load_local_imdb_reviews( |
| base_path=base_path, |
| split="test", |
| sample_size=max(sample_size // 2, 2), |
| seed=seed, |
| ) |
| payload = { |
| "train": summarize_reviews(train_records), |
| "test": summarize_reviews(test_records), |
| } |
| typer.echo(json.dumps(payload, indent=2)) |
|
|
|
|
| @app.command("describe-amazon-dataset") |
| def describe_amazon_dataset( |
| sample_size: int = SAMPLE_SIZE_OPTION, |
| seed: int = SEED_OPTION, |
| ) -> None: |
| """Print a compact summary of a sampled Amazon polarity dataset slice.""" |
| train_records = load_amazon_polarity_reviews( |
| split="train", |
| sample_size=sample_size, |
| seed=seed, |
| ) |
| test_records = load_amazon_polarity_reviews( |
| split="test", |
| sample_size=max(sample_size // 2, 2), |
| seed=seed, |
| ) |
| payload = { |
| "train": summarize_amazon_reviews(train_records), |
| "test": summarize_amazon_reviews(test_records), |
| } |
| typer.echo(json.dumps(payload, indent=2)) |
|
|
|
|
| @app.command("run-baseline") |
| def run_baseline( |
| base_path: Path = BASE_PATH_OPTION, |
| output_path: Path = OUTPUT_PATH_OPTION, |
| config_path: Path = CONFIG_PATH_OPTION, |
| ) -> None: |
| """Run the first reproducible benchmark on the local IMDb dataset.""" |
| config = ( |
| BaselineExperimentConfig.from_json(config_path) |
| if config_path is not None |
| else BaselineExperimentConfig() |
| ) |
|
|
| train_records = load_local_imdb_reviews( |
| base_path=base_path, |
| split="train", |
| sample_size=config.sample_size, |
| seed=config.seed, |
| ) |
| test_records = load_local_imdb_reviews( |
| base_path=base_path, |
| split="test", |
| sample_size=max(config.sample_size // 2, 2), |
| seed=config.seed, |
| ) |
| result = run_tfidf_logreg_baseline( |
| train_records=train_records, |
| test_records=test_records, |
| config=config, |
| ) |
| write_json(output_path, result.to_dict()) |
| typer.echo(f"Wrote benchmark artifact to {output_path}") |
| typer.echo(f"Saved baseline model to {config.model_output_path}") |
|
|
|
|
| @app.command("train-transformer") |
| def train_transformer( |
| base_path: Path = BASE_PATH_OPTION, |
| config_path: Path = TRAINER_CONFIG_PATH_OPTION, |
| ) -> None: |
| """Fine-tune a transformer sentiment model and save it for inference.""" |
| config = ( |
| TransformerTrainingConfig.from_json(config_path) |
| if config_path is not None |
| else TransformerTrainingConfig() |
| ) |
| train_records = load_local_imdb_reviews( |
| base_path=base_path, |
| split="train", |
| sample_size=config.train_sample_size, |
| seed=config.seed, |
| ) |
| test_records = load_local_imdb_reviews( |
| base_path=base_path, |
| split="test", |
| sample_size=config.test_sample_size, |
| seed=config.seed, |
| ) |
| result = train_transformer_model( |
| train_records=train_records, |
| test_records=test_records, |
| config=config, |
| ) |
| typer.echo(f"Saved transformer model to {result.output_dir}") |
| typer.echo(f"Best validation checkpoint came from epoch {result.best_epoch}") |
| typer.echo(f"Wrote transformer metrics to {config.metrics_output_path}") |
|
|
|
|
| @app.command("analyze-reviews") |
| def analyze_reviews_command( |
| base_path: Path = BASE_PATH_OPTION, |
| output_path: Path = ANALYSIS_OUTPUT_OPTION, |
| config_path: Path = ANALYSIS_CONFIG_PATH_OPTION, |
| ) -> None: |
| """Generate clustered review insights and review priorities.""" |
| analysis_config = ( |
| ReviewAnalysisConfig.from_json(config_path) |
| if config_path is not None |
| else ReviewAnalysisConfig() |
| ) |
| analysis_records = load_local_imdb_reviews( |
| base_path=base_path, |
| split="test", |
| sample_size=analysis_config.analysis_sample_size, |
| seed=analysis_config.seed, |
| ) |
| predictor = load_sentiment_predictor( |
| model_path=Path(analysis_config.sentiment_model_path).resolve(), |
| backend=analysis_config.sentiment_backend, |
| max_length=analysis_config.sentiment_max_length, |
| ) |
| artifact = analyze_reviews_with_predictor( |
| review_records=analysis_records, |
| predictor=predictor, |
| analysis_config=analysis_config, |
| sentiment_model_info=predictor.describe(), |
| ) |
| write_json(output_path, artifact.to_dict()) |
| typer.echo(f"Wrote review analysis artifact to {output_path}") |
|
|
|
|
| @app.command("evaluate-amazon-transfer") |
| def evaluate_amazon_transfer( |
| output_path: Path = TRANSFER_OUTPUT_OPTION, |
| config_path: Path = TRANSFER_CONFIG_PATH_OPTION, |
| ) -> None: |
| """Evaluate a saved sentiment model on Amazon polarity reviews.""" |
| config = ( |
| AmazonTransferEvaluationConfig.from_json(config_path) |
| if config_path is not None |
| else AmazonTransferEvaluationConfig() |
| ) |
| amazon_records = load_amazon_polarity_reviews( |
| split=str(config.dataset_split), |
| sample_size=config.dataset_sample_size, |
| seed=config.seed, |
| include_title=config.include_title, |
| dataset_name=config.dataset_name, |
| ) |
| predictor = load_sentiment_predictor( |
| model_path=Path(config.sentiment_model_path).resolve(), |
| backend=config.sentiment_backend, |
| max_length=config.sentiment_max_length, |
| ) |
| artifact = evaluate_reviews_with_predictor( |
| review_records=amazon_records, |
| predictor=predictor, |
| dataset_info={ |
| "dataset_name": config.dataset_name, |
| "split": config.dataset_split, |
| "sample_size": config.dataset_sample_size, |
| "include_title": config.include_title, |
| "seed": config.seed, |
| }, |
| max_error_examples=config.max_error_examples, |
| ) |
| write_json(output_path, artifact.to_dict()) |
| typer.echo(f"Wrote Amazon transfer evaluation artifact to {output_path}") |
|
|
|
|
| @app.command("evaluate-local-feedback") |
| def evaluate_local_feedback( |
| output_path: Path = LOCAL_EVAL_OUTPUT_OPTION, |
| config_path: Path = LOCAL_EVAL_CONFIG_PATH_OPTION, |
| ) -> None: |
| """Evaluate a saved model on a fixed local labeled customer-feedback CSV.""" |
| config = ( |
| LocalEvaluationConfig.from_json(config_path) |
| if config_path is not None |
| else LocalEvaluationConfig() |
| ) |
| local_records = load_local_labeled_reviews( |
| dataset_path=Path(config.dataset_path), |
| text_column=config.text_column, |
| title_column=config.title_column, |
| label_column=config.label_column, |
| review_id_column=config.review_id_column, |
| split_name=config.split_name, |
| source_name=config.source_name, |
| ) |
| predictor = load_sentiment_predictor( |
| model_path=Path(config.sentiment_model_path).resolve(), |
| backend=config.sentiment_backend, |
| max_length=config.sentiment_max_length, |
| ) |
| artifact = evaluate_reviews_with_predictor( |
| review_records=local_records, |
| predictor=predictor, |
| dataset_info={ |
| "dataset_name": config.source_name, |
| "split": config.split_name, |
| "dataset_path": config.dataset_path, |
| }, |
| max_error_examples=config.max_error_examples, |
| ) |
| write_json(output_path, artifact.to_dict()) |
| typer.echo(f"Wrote local feedback evaluation artifact to {output_path}") |
|
|
|
|
| @app.command("launch-demo") |
| def launch_demo( |
| base_path: Path = BASE_PATH_OPTION, |
| config_path: Path = ANALYSIS_CONFIG_PATH_OPTION, |
| host: str = HOST_OPTION, |
| port: int = PORT_OPTION, |
| share: bool = SHARE_OPTION, |
| ) -> None: |
| """Launch the Gradio feedback-intelligence demo.""" |
| analysis_config = ( |
| ReviewAnalysisConfig.from_json(config_path) |
| if config_path is not None |
| else ReviewAnalysisConfig() |
| ) |
| demo = create_demo(base_path=base_path, analysis_config=analysis_config) |
| demo.launch(server_name=host, server_port=port, share=share) |
|
|