Spaces:
Running
Running
| """Run the full support agent pipeline on the test set and save results.""" | |
| import json | |
| import sys | |
| from pathlib import Path | |
| import yaml | |
| from loguru import logger | |
| from tqdm import tqdm | |
| sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) | |
| from src.data.dataset import load_splits | |
| from src.pipeline.agent import build_agent | |
| def main() -> None: | |
| """Run generation pipeline on test set and save all results.""" | |
| Path("logs").mkdir(exist_ok=True) | |
| logger.add("logs/run_generation.log", rotation="10 MB") | |
| with open("config/config.yaml") as f: | |
| cfg = yaml.safe_load(f) | |
| _, _, test_df = load_splits(cfg["paths"]["data_processed"]) | |
| # Subsample to RAGAS target size — no need to call API on all 4k examples | |
| n_generate = cfg["evaluation"]["ragas_sample_size"] | |
| if len(test_df) > n_generate: | |
| from sklearn.model_selection import train_test_split as _tts | |
| _, test_df = _tts( | |
| test_df, | |
| test_size=min(n_generate / len(test_df), 0.9999), | |
| stratify=test_df["label"], | |
| random_state=42, | |
| ) | |
| test_df = test_df.reset_index(drop=True) | |
| logger.info(f"Subsampled test set to {len(test_df)} examples for generation.") | |
| agent = build_agent(cfg) | |
| results = [] | |
| errors = 0 | |
| for _, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Generating responses"): | |
| try: | |
| result = agent.resolve(row["text"]) | |
| result["true_label"] = row["label"] | |
| results.append(result) | |
| except Exception as e: | |
| logger.error(f"Error processing query '{row['text'][:60]}': {e}") | |
| errors += 1 | |
| logger.info(f"Generated {len(results)} responses. Errors: {errors}") | |
| # Save results | |
| Path(cfg["paths"]["results"]).mkdir(parents=True, exist_ok=True) | |
| out_path = Path(cfg["paths"]["results"]) / "generation_results.json" | |
| with open(out_path, "w") as f: | |
| json.dump(results, f, indent=2) | |
| logger.info(f"Saved generation results → {out_path}") | |
| if __name__ == "__main__": | |
| main() | |