Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import yaml | |
| from yourbench_space.utils import CONFIG_PATH | |
| def generate_base_config( | |
| hf_org, | |
| model_name, | |
| provider, | |
| base_url, | |
| model_api_key, | |
| max_concurrent_requests, | |
| hf_dataset_prefix, | |
| private_dataset, | |
| ingestion_model, | |
| summarization_model, | |
| single_shot_question_generation_model, | |
| multi_hop_question_generation_model, | |
| answer_generation_model, | |
| judge_answers_model, | |
| ): | |
| config = { | |
| "hf_configuration": { | |
| "token": "$HF_TOKEN", | |
| "private": private_dataset, | |
| "hf_organization": hf_org, | |
| }, | |
| "model_list": [ | |
| { | |
| "model_name": model_name, | |
| "provider": provider, | |
| "base_url": base_url, | |
| "api_key": "$MODEL_API_KEY", | |
| "max_concurrent_requests": max_concurrent_requests, | |
| } | |
| ], | |
| "model_roles": { | |
| role: [model_name] | |
| for role in [ | |
| "ingestion", | |
| "summarization", | |
| "single_shot_question_generation", | |
| "multi_hop_question_generation", | |
| "answer_generation", | |
| "judge_answers", | |
| ] | |
| }, | |
| "inference_config": {"max_concurrent_requests": 16}, | |
| "pipeline": { | |
| "ingestion": { | |
| "source_documents_dir": "/app/uploaded_files", | |
| "output_dir": "/app/ingested", | |
| "run": True, | |
| }, | |
| "upload_ingest_to_hub": { | |
| "source_documents_dir": "/app/ingested", | |
| "hub_dataset_name": f"{hf_dataset_prefix}_ingested_documents", | |
| "run": True, | |
| }, | |
| "summarization": { | |
| "source_dataset_name": f"{hf_dataset_prefix}_ingested_documents", | |
| "output_dataset_name": f"{hf_dataset_prefix}_summaries", | |
| "concat_existing_dataset": False, | |
| "run": True, | |
| }, | |
| "chunking": { | |
| "source_dataset_name": f"{hf_dataset_prefix}_summaries", | |
| "output_dataset_name": f"{hf_dataset_prefix}_chunked_documents", | |
| "concat_existing_dataset": False, | |
| "chunking_configuration": { | |
| "l_min_tokens": 64, | |
| "l_max_tokens": 128, | |
| "tau_threshold": 0.3, | |
| "h_min": 2, | |
| "h_max": 4, | |
| }, | |
| "run": True, | |
| }, | |
| "single_shot_question_generation": { | |
| "source_dataset_name": f"{hf_dataset_prefix}_chunked_documents", | |
| "output_dataset_name": f"{hf_dataset_prefix}_single_shot_questions", | |
| "diversification_seed": "24 year old adult", | |
| "concat_existing_dataset": False, | |
| "run": True, | |
| }, | |
| "multi_hop_question_generation": { | |
| "source_dataset_name": f"{hf_dataset_prefix}_chunked_documents", | |
| "output_dataset_name": f"{hf_dataset_prefix}_multi_hop_questions", | |
| "concat_existing_dataset": False, | |
| "run": True, | |
| }, | |
| "answer_generation": { | |
| "question_dataset_name": f"{hf_dataset_prefix}_single_shot_questions", | |
| "output_dataset_name": f"{hf_dataset_prefix}_answered_questions", | |
| "concat_existing_dataset": False, | |
| "strategies": [ | |
| { | |
| "name": "zeroshot", | |
| "prompt": "ZEROSHOT_QA_USER_PROMPT", | |
| "model_name": model_name, | |
| }, | |
| { | |
| "name": "gold", | |
| "prompt": "GOLD_QA_USER_PROMPT", | |
| "model_name": model_name, | |
| }, | |
| ], | |
| "run": True, | |
| }, | |
| "judge_answers": { | |
| "source_judge_dataset_name": f"{hf_dataset_prefix}_answered_questions", | |
| "output_judged_dataset_name": f"{hf_dataset_prefix}_judged_comparisons", | |
| "concat_existing_dataset": False, | |
| "comparing_strategies": [["zeroshot", "gold"]], | |
| "chunk_column_index": 0, | |
| "random_seed": 42, | |
| "run": True, | |
| }, | |
| }, | |
| } | |
| return yaml.dump(config, sort_keys=False) | |
| def save_config(yaml_text): | |
| with open(CONFIG_PATH, "w") as file: | |
| file.write(yaml_text) | |
| return "✅ Config saved!" | |