Spaces:

saravanatanjiro
/

Openenv

Paused

kavin57447 commited on Apr 25

Commit

8d95050

1 Parent(s): 27c9425

Switch to Llama 3.1 8B + fix low-timestep crash (min 5000)

Files changed (2) hide show

app.py CHANGED Viewed

@@ -18,7 +18,10 @@ os.makedirs("./outputs", exist_ok=True)
 def run_math_training(timesteps):
     from cloud_arena.training import train_model
     try:
-        model, callback, _ = train_model(total_timesteps=int(timesteps))
         from cloud_arena.visualization import generate_dashboard
         img_path = generate_dashboard(callback, "outputs/dashboard.png")
         summary = (
@@ -94,9 +97,9 @@ with gr.Blocks(title="Cloud Arena RL") as demo:
         eval_btn.click(run_math_evaluation, outputs=eval_output)
     with gr.Tab("🧠 LLM RL"):
-        gr.Markdown("### LLM Model — Gemma 7B + REINFORCE + LoRA")
         gr.Markdown("> ⚠️ Requires `HF_TOKEN` secret set in Space settings + accepted model license")
-        llm_model = gr.Textbox(value="google/gemma-7b-it", label="Model Name")
         llm_iters = gr.Number(value=10, label="Training Iterations")
         llm_steps = gr.Number(value=5, label="Steps per Episode")
         llm_btn = gr.Button("🚀 Start LLM Training", variant="primary")

 def run_math_training(timesteps):
     from cloud_arena.training import train_model
     try:
+        ts = max(int(timesteps), 5000)  # minimum 5000 to avoid sampling errors
+        if int(timesteps) < 5000:
+            print(f"⚠️ Timesteps too low ({int(timesteps)}), using minimum 5000")
+        model, callback, _ = train_model(total_timesteps=ts)
         from cloud_arena.visualization import generate_dashboard
         img_path = generate_dashboard(callback, "outputs/dashboard.png")
         summary = (
         eval_btn.click(run_math_evaluation, outputs=eval_output)
     with gr.Tab("🧠 LLM RL"):
+        gr.Markdown("### LLM Model — Llama 3.1 8B + REINFORCE + LoRA")
         gr.Markdown("> ⚠️ Requires `HF_TOKEN` secret set in Space settings + accepted model license")
+        llm_model = gr.Textbox(value="meta-llama/Llama-3.1-8B", label="Model Name")
         llm_iters = gr.Number(value=10, label="Training Iterations")
         llm_steps = gr.Number(value=5, label="Steps per Episode")
         llm_btn = gr.Button("🚀 Start LLM Training", variant="primary")

cloud_arena/llm_training.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ============================================================
-# LLM RL Training — LLaMA 3.1 8B + REINFORCE + LoRA
 # This is the LLM model, SEPARATE from the mathematical model.
 # Uses AWSCostEnv (llm_environment.py), NOT CloudArenaEnv.
 # ============================================================
@@ -147,7 +147,7 @@ def run_episode(model, tokenizer, env, is_training=False, optimizer=None,
     return episode_reward, reasoning_log
-def train_llm(model_name="google/gemma-7b-it",
               num_iterations=10, steps_per_episode=5, learning_rate=5e-5,
               progress_callback=None):
     """

 # ============================================================
+# LLM RL Training — Llama 3.1 8B + REINFORCE + LoRA
 # This is the LLM model, SEPARATE from the mathematical model.
 # Uses AWSCostEnv (llm_environment.py), NOT CloudArenaEnv.
 # ============================================================
     return episode_reward, reasoning_log
+def train_llm(model_name="meta-llama/Llama-3.1-8B",
               num_iterations=10, steps_per_episode=5, learning_rate=5e-5,
               progress_callback=None):
     """