Spaces:

Percy3822
/

Python_ai_attempt2

Sleeping

App Files Files Community

Percy3822 commited on Aug 10, 2025

Commit

69cca4e

verified ·

1 Parent(s): 81255b6

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -48

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
-import os, shutil, subprocess, zipfile
 from pathlib import Path
 from datetime import datetime
 import gradio as gr
@@ -9,33 +10,43 @@ LOG  = ROOT / "train.log"
 RUNS = ROOT / "runs"
 RUNS.mkdir(exist_ok=True)
-# ---------- helpers ----------
 def ls_workspace() -> str:
     rows = []
     for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
-        try: size = p.stat().st_size
-        except Exception: size = 0
         rows.append(f"{'[DIR]' if p.is_dir() else '     '}\t{size:>10}\t{p.name}")
     return "\n".join(rows) or "(empty)"
 def list_models():
     out = []
     for base in [ROOT, RUNS]:
-        if not base.exists():
             continue
         for p in base.iterdir():
             if p.is_dir() and (p / "config.json").exists() and (
                 (p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
             ):
                 out.append(str(p))
-    # ensure uniqueness & sorted
     return sorted(set(out))
 def dropdown_update_safe(models, prefer=None):
     val = prefer if (prefer and prefer in models) else (models[0] if models else None)
     return gr.update(choices=models, value=val)
-# ---------- training ----------
 def upload_dataset(file):
     if not file:
         return "❌ No file selected.", ls_workspace()
@@ -44,13 +55,13 @@ def upload_dataset(file):
         return f"✅ Uploaded → {DATA.name}", ls_workspace()
     return "⚠ Unexpected item; please upload a .jsonl file.", ls_workspace()
 def start_training(run_name):
-    # Unique run folder and zip
     run_id = (run_name or "").strip() or datetime.now().strftime("run_%Y%m%d_%H%M%S")
     out_dir  = RUNS / run_id
     zip_path = RUNS / f"{run_id}.zip"
-    # Clean previous artifacts only for this run
     if out_dir.exists():
         shutil.rmtree(out_dir, ignore_errors=True)
     if zip_path.exists():
@@ -69,6 +80,7 @@ def start_training(run_name):
         "--block_size", "256",
         "--learning_rate", "5e-5",
     ]
     with open(LOG, "a", encoding="utf-8") as lf:
         code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
@@ -82,13 +94,10 @@ def start_training(run_name):
         info = f"❌ Training failed (exit {code}). Check logs below."
         dl_update = gr.update(value=None, visible=False)
     return info, dl_update, ls_workspace(), read_logs(), model_update
-def read_logs():
-    return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
 def refresh_download():
-    # We don’t know which run user wants; show the newest zip if any
     zips = sorted(RUNS.glob("*.zip"), key=lambda p: p.stat().st_mtime, reverse=True)
     latest = zips[0] if zips else None
     models = list_models()
@@ -98,7 +107,7 @@ def refresh_download():
         dropdown_update_safe(models)
     )
-# ---------- testing ----------
 def import_zip(zfile):
     if not zfile:
         return "❌ No zip selected.", list_models()
@@ -110,38 +119,61 @@ def import_zip(zfile):
         z.extractall(dest)
     return f"✅ Imported to {dest.name}", list_models()
 def generate(model_path, prompt):
-    # 1) Validate inputs
     if not model_path:
         return "❌ Select a model from the dropdown first."
     if not Path(model_path).exists():
         return f"❌ Model folder not found: {model_path}"
     if not prompt or not prompt.strip():
         return "❌ Enter a prompt."
     try:
-        from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-        import torch
-        tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
-        # ensure pad token
-        if tok.pad_token_id is None:
-            if tok.eos_token_id is not None:
-                tok.pad_token = tok.eos_token
-            else:
-                tok.add_special_tokens({"pad_token": "[PAD]"})
-        model = AutoModelForCausalLM.from_pretrained(model_path)
-        # align embeddings if we added tokens
-        if getattr(model, "config", None) and getattr(model.config, "vocab_size", None) and len(tok) > model.config.vocab_size:
-            model.resize_token_embeddings(len(tok))
-        pipe = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tok,
-            device_map="auto" if torch.cuda.is_available() else None,
-        )
         out = pipe(
             prompt.strip(),
             max_new_tokens=120,
@@ -150,29 +182,29 @@ def generate(model_path, prompt):
             top_p=0.9,
             repetition_penalty=1.15,
             no_repeat_ngram_size=4,
-            eos_token_id=tok.eos_token_id,
-            pad_token_id=tok.pad_token_id,
             truncation=True
         )[0]["generated_text"]
         return out
     except Exception as e:
-        import traceback
         return "❌ Error during generation:\n" + "".join(traceback.format_exception_only(type(e), e))
-# ---------- UI ----------
 with gr.Blocks(title="Python AI — Train & Test") as app:
-    gr.Markdown("## 🧠 Python AI — Train & Test\n• Unique run folders • Safe download • Reliable generation\n")
-    # ---- Test tab first so Train can target its dropdown
     with gr.Tab("Test"):
-        gr.Markdown("### Pick a model folder or upload a .zip, then prompt it")
         refresh_btn = gr.Button("↻ Refresh Model List")
         model_list = gr.Dropdown(
             choices=list_models(),
             label="Available AIs",
             interactive=True,
-            allow_custom_value=True  # no warnings when empty
         )
         zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
         import_status = gr.Textbox(label="Import Status", interactive=False)
@@ -180,7 +212,6 @@ with gr.Blocks(title="Python AI — Train & Test") as app:
         go = gr.Button("Generate")
         out = gr.Textbox(label="AI Response", lines=20)
-    # ---- Train tab
     with gr.Tab("Train"):
         with gr.Row():
             ds = gr.File(label="📥 Upload JSONL", file_types=[".jsonl"])
@@ -204,7 +235,6 @@ with gr.Blocks(title="Python AI — Train & Test") as app:
         refresh_download,
         outputs=[download_file, ws, model_list]
     )
     refresh_btn.click(lambda: dropdown_update_safe(list_models()), outputs=model_list)
     zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
     go.click(generate, inputs=[model_list, prompt], outputs=out)

+# app.py
+import os, shutil, subprocess, zipfile, traceback
 from pathlib import Path
 from datetime import datetime
 import gradio as gr
 RUNS = ROOT / "runs"
 RUNS.mkdir(exist_ok=True)
+# -------- logging helpers --------
+def append_log(msg: str):
+    msg = msg.rstrip()
+    with open(LOG, "a", encoding="utf-8") as lf:
+        lf.write(msg + "\n")
+def read_logs():
+    return LOG.read_text(encoding="utf-8")[-20000:] if LOG.exists() else "⏳ Waiting…"
+# -------- workspace + models --------
 def ls_workspace() -> str:
     rows = []
     for p in sorted(ROOT.iterdir(), key=lambda x: (x.is_file(), x.name.lower())):
+        try:
+            size = p.stat().st_size
+        except Exception:
+            size = 0
         rows.append(f"{'[DIR]' if p.is_dir() else '     '}\t{size:>10}\t{p.name}")
     return "\n".join(rows) or "(empty)"
 def list_models():
     out = []
     for base in [ROOT, RUNS]:
+        if not base.exists():
             continue
         for p in base.iterdir():
             if p.is_dir() and (p / "config.json").exists() and (
                 (p / "tokenizer.json").exists() or (p / "tokenizer_config.json").exists()
             ):
                 out.append(str(p))
     return sorted(set(out))
 def dropdown_update_safe(models, prefer=None):
     val = prefer if (prefer and prefer in models) else (models[0] if models else None)
     return gr.update(choices=models, value=val)
+# -------- dataset upload --------
 def upload_dataset(file):
     if not file:
         return "❌ No file selected.", ls_workspace()
         return f"✅ Uploaded → {DATA.name}", ls_workspace()
     return "⚠ Unexpected item; please upload a .jsonl file.", ls_workspace()
+# -------- training --------
 def start_training(run_name):
     run_id = (run_name or "").strip() or datetime.now().strftime("run_%Y%m%d_%H%M%S")
     out_dir  = RUNS / run_id
     zip_path = RUNS / f"{run_id}.zip"
+    # clean only this run
     if out_dir.exists():
         shutil.rmtree(out_dir, ignore_errors=True)
     if zip_path.exists():
         "--block_size", "256",
         "--learning_rate", "5e-5",
     ]
+    append_log("▶ " + " ".join(cmd))
     with open(LOG, "a", encoding="utf-8") as lf:
         code = subprocess.Popen(cmd, stdout=lf, stderr=subprocess.STDOUT).wait()
         info = f"❌ Training failed (exit {code}). Check logs below."
         dl_update = gr.update(value=None, visible=False)
+    append_log(info)
     return info, dl_update, ls_workspace(), read_logs(), model_update
 def refresh_download():
     zips = sorted(RUNS.glob("*.zip"), key=lambda p: p.stat().st_mtime, reverse=True)
     latest = zips[0] if zips else None
     models = list_models()
         dropdown_update_safe(models)
     )
+# -------- import a zip as a model folder --------
 def import_zip(zfile):
     if not zfile:
         return "❌ No zip selected.", list_models()
         z.extractall(dest)
     return f"✅ Imported to {dest.name}", list_models()
+# -------- generation: cached pipeline --------
+_GEN_CACHE = {"path": None, "pipe": None}
+def get_generation_pipeline(model_path: str):
+    from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+    import torch
+    if _GEN_CACHE["path"] == model_path and _GEN_CACHE["pipe"] is not None:
+        return _GEN_CACHE["pipe"]
+    append_log(f"🧩 Loading pipeline from: {model_path}")
+    tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
+    if tok.pad_token_id is None:
+        if tok.eos_token_id is not None:
+            tok.pad_token = tok.eos_token
+            append_log("ℹ No pad_token; using eos_token as pad_token.")
+        else:
+            tok.add_special_tokens({"pad_token": "[PAD]"})
+            append_log("ℹ Added [PAD] token to tokenizer.")
+    model = AutoModelForCausalLM.from_pretrained(model_path)
+    if getattr(model, "config", None) and getattr(model.config, "vocab_size", None) and len(tok) > model.config.vocab_size:
+        model.resize_token_embeddings(len(tok))
+        append_log(f"ℹ Resized embeddings to {len(tok)}.")
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tok,
+        device_map="auto" if torch.cuda.is_available() else None,
+    )
+    _GEN_CACHE["path"] = model_path
+    _GEN_CACHE["pipe"] = pipe
+    append_log("✅ Pipeline loaded.")
+    return pipe
 def generate(model_path, prompt):
+    from pathlib import Path
+    # Coerce Dropdown value (can be list)
+    if isinstance(model_path, list):
+        model_path = model_path[0] if model_path else None
+    # validate
     if not model_path:
         return "❌ Select a model from the dropdown first."
+    if not isinstance(model_path, str):
+        return f"❌ Invalid model path type: {type(model_path)._name_}"
     if not Path(model_path).exists():
         return f"❌ Model folder not found: {model_path}"
     if not prompt or not prompt.strip():
         return "❌ Enter a prompt."
     try:
+        pipe = get_generation_pipeline(model_path)
+        append_log(f"📝 Generating for prompt ({len(prompt)} chars)…")
         out = pipe(
             prompt.strip(),
             max_new_tokens=120,
             top_p=0.9,
             repetition_penalty=1.15,
             no_repeat_ngram_size=4,
             truncation=True
         )[0]["generated_text"]
+        append_log("✅ Generation OK.")
         return out
     except Exception as e:
+        tb = traceback.format_exc()
+        append_log("❌ Generation error:\n" + tb)
         return "❌ Error during generation:\n" + "".join(traceback.format_exception_only(type(e), e))
+# -------- UI --------
 with gr.Blocks(title="Python AI — Train & Test") as app:
+    gr.Markdown("## 🧠 Python AI — Train & Test\n• Unique runs • Safe download • Cached generation\n")
+    # Test first (so Train can update its dropdown)
     with gr.Tab("Test"):
+        gr.Markdown("### Choose a model folder or upload a .zip, then prompt it")
         refresh_btn = gr.Button("↻ Refresh Model List")
         model_list = gr.Dropdown(
             choices=list_models(),
             label="Available AIs",
             interactive=True,
+            allow_custom_value=True,  # keeps UI quiet when empty
+            multiselect=False         # force single selection
         )
         zip_in = gr.File(label="Or upload a model .zip", file_types=[".zip"])
         import_status = gr.Textbox(label="Import Status", interactive=False)
         go = gr.Button("Generate")
         out = gr.Textbox(label="AI Response", lines=20)
     with gr.Tab("Train"):
         with gr.Row():
             ds = gr.File(label="📥 Upload JSONL", file_types=[".jsonl"])
         refresh_download,
         outputs=[download_file, ws, model_list]
     )
     refresh_btn.click(lambda: dropdown_update_safe(list_models()), outputs=model_list)
     zip_in.change(import_zip, inputs=zip_in, outputs=[import_status, model_list])
     go.click(generate, inputs=[model_list, prompt], outputs=out)