Spaces:

vedaco
/

veda-programming

Sleeping

App Files Files Community

vedaco commited on Jan 15

Commit

5e1076e

verified ·

1 Parent(s): e926670

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -35

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 """
 Veda Programming Assistant (Gradio 6.x)
 - Hidden teacher fallback (OpenRouter) when student fails
 - Auto-training in background using teacher responses
 - Math solver for simple arithmetic
-- Compatible with Gradio "messages format" + multimodal inputs
 """
 import os
@@ -39,13 +40,18 @@ current_conv_id = -1
 # Teacher usage stats (not shown in chat)
 teacher_used_count = 0
 teacher_failed_count = 0
 # Auto-training control
 AUTO_TRAIN_ENABLED = True
 AUTO_TRAIN_MIN_TEACHER_SAMPLES = 10     # retrain after this many new teacher samples
 AUTO_TRAIN_CHECK_EVERY_SEC = 120        # check every 2 minutes
-AUTO_TRAIN_EPOCHS = 5                   # keep small for Spaces CPU
-AUTO_TRAIN_COOLDOWN_SEC = 60 * 20       # at least 20 minutes between trainings
 _is_training = False
 _last_train_time = 0
@@ -147,7 +153,6 @@ def try_math_answer(user_text: str):
     s = s.replace("=", "").replace("?", "").strip()
     s = s.replace("^", "**")  # allow ^
-    # only allow numeric math chars
     if not re.fullmatch(r"[0-9\.\s\+\-\*\/\(\)%]+", s):
         return None
@@ -168,20 +173,33 @@ def is_code_request(user_text: str) -> bool:
     triggers = [
         "write", "implement", "code", "function", "algorithm",
         "bubble sort", "binary search", "merge sort", "quick sort", "quicksort",
-        "linked list", "stack", "queue", "class ", "def "
     ]
     return any(k in t for k in triggers)
 def looks_like_python_code(text: str) -> bool:
     if not text:
         return False
     t = text.strip()
     if "```" in t:
         return True
-    if "def " in t or "class " in t:
         return True
-    if "\n    " in t:
         return True
     return False
 def is_gibberish(text: str) -> bool:
@@ -189,12 +207,11 @@ def is_gibberish(text: str) -> bool:
         return True
     t = text.strip()
-    # repeated greeting
-    if t.lower().count("hello how are you") >= 2:
         return True
-    # too short
-    if len(t) < 25:
         return True
     # lots of symbols vs letters
@@ -210,31 +227,41 @@ def is_gibberish(text: str) -> bool:
         if uniq_ratio < 0.35:
             return True
-    # known “junk” patterns
     junk_patterns = [
-        r"\[\s*\"?\s*\]",        # empty brackets patterns
         r"return\s+if\s+is",
         r"=\s*=\s*=",
         r"def\s+def",
         r"class\s+class",
         r"return\s+return",
     ]
     for p in junk_patterns:
         if re.search(p, t):
             return True
     return False
 def should_use_teacher(user_text: str, student_text: str) -> bool:
-    # teacher must be available
     if not teacher.is_available():
         return False
-    # always use teacher for code requests unless student produced real code
     if is_code_request(user_text) and not looks_like_python_code(student_text):
         return True
-    # use teacher if student output is gibberish
     if is_gibberish(student_text):
         return True
@@ -295,7 +322,6 @@ def clean_response(text: str) -> str:
     for token in ["<PAD>", "<UNK>", "<START>", "<END>", "<USER>", "<ASSISTANT>"]:
         text = text.replace(token, "")
-    # reduce empty lines
     lines = text.split("\n")
     cleaned = []
     empty = 0
@@ -314,10 +340,10 @@ def clean_response(text: str) -> str:
 # STUDENT + TEACHER RESPONSE
 # -----------------------------
 def get_student_response(user_text: str, temperature: float, max_tokens: int) -> str:
     if model is None or tokenizer is None:
         return ""
-    # build context from internal conversation_history
     context = ""
     for m in conversation_history[-3:]:
         context += f"<USER> {m['user']}\n<ASSISTANT> {m['assistant']}\n"
@@ -344,11 +370,11 @@ def get_student_response(user_text: str, temperature: float, max_tokens: int) ->
     if "<USER>" in out:
         out = out.split("<USER>")[0].strip()
     return clean_response(out)
 def get_teacher_response(user_text: str) -> str:
-    # Build teacher history from our internal conversation_history
     teacher_hist = []
     for m in conversation_history[-4:]:
         teacher_hist.append({"role": "user", "content": m["user"]})
@@ -367,7 +393,7 @@ def generate_response(user_input, temperature=0.7, max_tokens=200) -> str:
     if not user_text:
         return "Please type a message."
-    # math solver first
     math_ans = try_math_answer(user_text)
     if math_ans is not None:
         conversation_history.append({"user": user_text, "assistant": math_ans})
@@ -375,15 +401,15 @@ def generate_response(user_input, temperature=0.7, max_tokens=200) -> str:
         return math_ans
     # student attempt
-    student = get_student_response(user_text, temperature, max_tokens)
-    # teacher fallback
     if should_use_teacher(user_text, student):
         teacher_resp = get_teacher_response(user_text)
         if teacher_resp.strip():
             teacher_used_count += 1
-            # save distillation sample
             try:
                 db.save_distillation_data(
                     user_input=user_text,
@@ -428,7 +454,6 @@ def auto_train_loop():
         if _train_lock.locked():
             continue
-        # check distillation samples
         try:
             unused = db.get_unused_distillation_data(limit=1000)
         except Exception as e:
@@ -438,10 +463,9 @@ def auto_train_loop():
         if len(unused) < AUTO_TRAIN_MIN_TEACHER_SAMPLES:
             continue
-        # train in this background thread
         with _train_lock:
             _is_training = True
-            print(f"[AutoTrain] Starting training on {len(unused)} teacher samples...")
             try:
                 distill_text = ""
@@ -450,7 +474,7 @@ def auto_train_loop():
                     ids.append(row["id"])
                     distill_text += f"<USER> {row['user_input']}\n<ASSISTANT> {row['teacher_response']}\n\n"
-                # include good user-rated conversations too
                 extra = ""
                 try:
                     good = db.get_good_conversations(limit=200)
@@ -469,7 +493,6 @@ def auto_train_loop():
                 model = trainer.model
                 tokenizer = trainer.tokenizer
-                # mark distillation used
                 try:
                     db.mark_distillation_used(ids)
                 except Exception as e:
@@ -535,13 +558,13 @@ def clear_chat():
 def get_stats_md():
     stats = db.get_stats()
     teacher_ok = teacher.is_available()
     return f"""
 ## Statistics
 **Teacher available:** `{teacher_ok}`
 **Teacher used (this runtime):** `{teacher_used_count}`
 **Teacher failed (this runtime):** `{teacher_failed_count}`
 **Auto-training enabled:** `{AUTO_TRAIN_ENABLED}`
 **Currently training:** `{_is_training}`
@@ -579,8 +602,6 @@ with gr.Blocks(title="Veda Programming Assistant") as demo:
 # Veda Programming Assistant
 Ask programming questions, request code, or do math like `2+2=?` or `(10+5)/3`.
-(Teacher is hidden. Auto-learning is automatic.)
 """
     )
@@ -591,7 +612,7 @@ Ask programming questions, request code, or do math like `2+2=?` or `(10+5)/3`.
             with gr.Row():
                 msg = gr.Textbox(
                     label="Message",
-                    placeholder="Example: Write bubble sort",
                     lines=2,
                     scale=4,
                 )
@@ -618,8 +639,8 @@ Ask programming questions, request code, or do math like `2+2=?` or `(10+5)/3`.
             gr.Examples(
                 examples=[
                     ["Write bubble sort in python"],
-                    ["Write binary search"],
-                    ["Explain recursion"],
                     ["2+2=?"],
                     ["(10+5)/3"],
                     ["2^5"],
@@ -631,7 +652,6 @@ Ask programming questions, request code, or do math like `2+2=?` or `(10+5)/3`.
             stats_md = gr.Markdown()
             refresh = gr.Button("Refresh")
             refresh.click(get_stats_md, outputs=stats_md)
-            # Show stats immediately
             demo.load(get_stats_md, outputs=stats_md)
 if __name__ == "__main__":

 """
 Veda Programming Assistant (Gradio 6.x)
 - Hidden teacher fallback (OpenRouter) when student fails
+- IMPORTANT FIX: Always use teacher for CODE requests (bubble sort etc.)
 - Auto-training in background using teacher responses
 - Math solver for simple arithmetic
+- Compatible with Gradio messages format + multimodal inputs
 """
 import os
 # Teacher usage stats (not shown in chat)
 teacher_used_count = 0
 teacher_failed_count = 0
+student_used_count = 0
+# ---- IMPORTANT BEHAVIOR SWITCH ----
+# Forces teacher for code requests so user sees correct code now.
+FORCE_TEACHER_FOR_CODE_REQUESTS = True
 # Auto-training control
 AUTO_TRAIN_ENABLED = True
 AUTO_TRAIN_MIN_TEACHER_SAMPLES = 10     # retrain after this many new teacher samples
 AUTO_TRAIN_CHECK_EVERY_SEC = 120        # check every 2 minutes
+AUTO_TRAIN_EPOCHS = 3                   # keep small for Spaces CPU
+AUTO_TRAIN_COOLDOWN_SEC = 60 * 20       # 20 minutes between trainings
 _is_training = False
 _last_train_time = 0
     s = s.replace("=", "").replace("?", "").strip()
     s = s.replace("^", "**")  # allow ^
     if not re.fullmatch(r"[0-9\.\s\+\-\*\/\(\)%]+", s):
         return None
     triggers = [
         "write", "implement", "code", "function", "algorithm",
         "bubble sort", "binary search", "merge sort", "quick sort", "quicksort",
+        "linked list", "stack", "queue", "class ", "def ",
+        "sort "
     ]
     return any(k in t for k in triggers)
 def looks_like_python_code(text: str) -> bool:
+    """
+    Stronger code detector.
+    Only returns True if we see real python structure.
+    """
     if not text:
         return False
     t = text.strip()
     if "```" in t:
         return True
+    # must contain python keywords + structure
+    if "def " in t and ":" in t:
+        return True
+    if "class " in t and ":" in t:
         return True
+    # allow indented blocks only if also includes python keywords
+    if "\n    " in t and ("for " in t or "while " in t or "if " in t or "return " in t):
         return True
     return False
 def is_gibberish(text: str) -> bool:
         return True
     t = text.strip()
+    if len(t) < 25:
         return True
+    # repeated greeting
+    if t.lower().count("hello how are you") >= 1:
         return True
     # lots of symbols vs letters
         if uniq_ratio < 0.35:
             return True
+    # junk patterns
     junk_patterns = [
         r"return\s+if\s+is",
         r"=\s*=\s*=",
         r"def\s+def",
         r"class\s+class",
         r"return\s+return",
+        r"\[\s*\"?\s*\]",
     ]
     for p in junk_patterns:
         if re.search(p, t):
             return True
+    # “p y t h o n” style (too many single-letter tokens)
+    single_letter_words = re.findall(r"\b[a-zA-Z]\b", t)
+    word_count = len(re.findall(r"\b[a-zA-Z_]+\b", t))
+    if word_count > 0 and (len(single_letter_words) / word_count) > 0.4:
+        return True
     return False
 def should_use_teacher(user_text: str, student_text: str) -> bool:
     if not teacher.is_available():
         return False
+    # IMPORTANT: Force teacher for code requests (until student becomes good)
+    if FORCE_TEACHER_FOR_CODE_REQUESTS and is_code_request(user_text):
+        # If student actually produced code, you could skip teacher,
+        # but early stage student is bad, so use teacher always.
+        return True
+    # fallback to teacher if gibberish or not code when code asked
     if is_code_request(user_text) and not looks_like_python_code(student_text):
         return True
     if is_gibberish(student_text):
         return True
     for token in ["<PAD>", "<UNK>", "<START>", "<END>", "<USER>", "<ASSISTANT>"]:
         text = text.replace(token, "")
     lines = text.split("\n")
     cleaned = []
     empty = 0
 # STUDENT + TEACHER RESPONSE
 # -----------------------------
 def get_student_response(user_text: str, temperature: float, max_tokens: int) -> str:
+    global student_used_count
     if model is None or tokenizer is None:
         return ""
     context = ""
     for m in conversation_history[-3:]:
         context += f"<USER> {m['user']}\n<ASSISTANT> {m['assistant']}\n"
     if "<USER>" in out:
         out = out.split("<USER>")[0].strip()
+    student_used_count += 1
     return clean_response(out)
 def get_teacher_response(user_text: str) -> str:
     teacher_hist = []
     for m in conversation_history[-4:]:
         teacher_hist.append({"role": "user", "content": m["user"]})
     if not user_text:
         return "Please type a message."
+    # math first
     math_ans = try_math_answer(user_text)
     if math_ans is not None:
         conversation_history.append({"user": user_text, "assistant": math_ans})
         return math_ans
     # student attempt
+    student = get_student_response(user_text, float(temperature), int(max_tokens))
     if should_use_teacher(user_text, student):
         teacher_resp = get_teacher_response(user_text)
         if teacher_resp.strip():
             teacher_used_count += 1
+            # Save distillation sample
             try:
                 db.save_distillation_data(
                     user_input=user_text,
         if _train_lock.locked():
             continue
         try:
             unused = db.get_unused_distillation_data(limit=1000)
         except Exception as e:
         if len(unused) < AUTO_TRAIN_MIN_TEACHER_SAMPLES:
             continue
         with _train_lock:
             _is_training = True
+            print(f"[AutoTrain] Training on {len(unused)} teacher samples...")
             try:
                 distill_text = ""
                     ids.append(row["id"])
                     distill_text += f"<USER> {row['user_input']}\n<ASSISTANT> {row['teacher_response']}\n\n"
+                # include user-positive feedback too
                 extra = ""
                 try:
                     good = db.get_good_conversations(limit=200)
                 model = trainer.model
                 tokenizer = trainer.tokenizer
                 try:
                     db.mark_distillation_used(ids)
                 except Exception as e:
 def get_stats_md():
     stats = db.get_stats()
     teacher_ok = teacher.is_available()
     return f"""
 ## Statistics
 **Teacher available:** `{teacher_ok}`
 **Teacher used (this runtime):** `{teacher_used_count}`
 **Teacher failed (this runtime):** `{teacher_failed_count}`
+**Student calls (this runtime):** `{student_used_count}`
 **Auto-training enabled:** `{AUTO_TRAIN_ENABLED}`
 **Currently training:** `{_is_training}`
 # Veda Programming Assistant
 Ask programming questions, request code, or do math like `2+2=?` or `(10+5)/3`.
 """
     )
             with gr.Row():
                 msg = gr.Textbox(
                     label="Message",
+                    placeholder="Example: Write bubble sort in python",
                     lines=2,
                     scale=4,
                 )
             gr.Examples(
                 examples=[
                     ["Write bubble sort in python"],
+                    ["Write binary search in python"],
+                    ["Explain recursion with example"],
                     ["2+2=?"],
                     ["(10+5)/3"],
                     ["2^5"],
             stats_md = gr.Markdown()
             refresh = gr.Button("Refresh")
             refresh.click(get_stats_md, outputs=stats_md)
             demo.load(get_stats_md, outputs=stats_md)
 if __name__ == "__main__":