Spaces:

heerjtdev
/

answer_validator

Sleeping

App Files Files Community

heerjtdev commited on Jan 2

Commit

0e4e76a

verified ·

1 Parent(s): 8cfed6d

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -55

app.py CHANGED Viewed

@@ -7,14 +7,13 @@ import google.generativeai as genai
 from sentence_transformers import SentenceTransformer, util
 # ============================================================
-# CONFIG - DO NOT LEAK YOUR KEY!
 # ============================================================
-# Best practice: use os.environ.get("GEMINI_API_KEY")
 GEMINI_API_KEY = "AIzaSyBrbLGXkSdXReb0lUucYqcNCNBkvS-RBFw"
 genai.configure(api_key=GEMINI_API_KEY)
-# Use 1.5-flash for maximum stability on Free Tier
-MODEL = genai.GenerativeModel("gemini-1.5-flash")
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
@@ -25,86 +24,71 @@ embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
 print("✅ Ready")
 # ============================================================
-# OPTIMIZED PIPELINE (ONE CALL ONLY)
 # ============================================================
-def get_rubric_and_intent(kb, question):
-    """Combines intent detection and rubric generation to save API quota."""
     prompt = f"""
-    You are an expert examiner. Analyze the provided Knowledge Base and Question.
-    1. Classify the intent: FACTUAL, EXPLANATORY, CHARACTER_ARC, PROCESS, or COMPARISON.
-    2. Create a grading rubric of 3-6 atomic criteria based ONLY on the Knowledge Base.
-    Knowledge Base: {kb}
     Question: {question}
-    STRICT JSON OUTPUT ONLY:
     {{
-      "intent": "YOUR_LABEL",
-      "criteria": ["criterion 1", "criterion 2", ...]
     }}
     """
     try:
         response = MODEL.generate_content(prompt)
-        # Handle potential safety blocks or empty responses
-        if not response.candidates or not response.candidates[0].content.parts:
-            return {"intent": "ERROR", "criteria": []}
-        text = response.text.strip()
-        # Clean JSON if model adds markdown backticks
-        text = re.sub(r'^```json\s*|\s*```$', '', text, flags=re.MULTILINE)
-        return json.loads(text)
     except Exception as e:
         print(f"API Error: {e}")
-        return {"intent": "EXPLANATORY", "criteria": []}
 def evaluate(answer, question, kb):
-    # STEP 1: Get logic from Gemini (Single Call)
-    data = get_rubric_and_intent(kb, question)
-    intent = data.get("intent", "EXPLANATORY")
     rubric = data.get("criteria", [])
     if not rubric:
-        return {"final_verdict": "⚠️ API ERROR: No rubric generated."}
-    # STEP 2: Semantic Matching (Local - No API cost)
     sents = [s.strip() for s in re.split(r'(?<=[.!?])\s+', answer) if len(s.strip()) > 5]
     if not sents:
-        return {"final_verdict": "❌ ANSWER TOO SHORT"}
     ans_emb = embedder.encode(sents, convert_to_tensor=True)
-    scored = []
     for crit in rubric:
         crit_emb = embedder.encode(crit, convert_to_tensor=True)
         sims = util.cos_sim(crit_emb, ans_emb)[0]
-        best = float(torch.max(sims)) if sims.numel() else 0.0
-        scored.append({"criterion": crit, "satisfied": best >= SIM_THRESHOLD})
-    # STEP 3: Verdict
-    hit = sum(c["satisfied"] for c in scored)
-    if hit == len(scored): verdict_text = "✅ CORRECT"
-    elif hit >= max(1, len(scored) // 2): verdict_text = "⚠️ PARTIALLY CORRECT"
-    else: verdict_text = "❌ INCORRECT"
     return {
-        "intent": intent,
-        "rubric": rubric,
-        "final_verdict": verdict_text
     }
-# ============================================================
 # UI
-# ============================================================
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Gemini-powered Answer Grader")
-    kb = gr.Textbox(label="Knowledge Base", lines=8)
-    q = gr.Textbox(label="Question")
-    a = gr.Textbox(label="Student Answer", lines=6)
-    out = gr.JSON(label="Evaluation")
-    gr.Button("Evaluate").click(evaluate, [a, q, kb], out)
-demo.launch()

 from sentence_transformers import SentenceTransformer, util
 # ============================================================
+# CONFIG
 # ============================================================
 GEMINI_API_KEY = "AIzaSyBrbLGXkSdXReb0lUucYqcNCNBkvS-RBFw"
 genai.configure(api_key=GEMINI_API_KEY)
+# UPDATED: Use a supported 2026 model
+MODEL = genai.GenerativeModel("gemini-2.5-flash")
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 print("✅ Ready")
 # ============================================================
+# LOGIC
 # ============================================================
+def get_evaluation_data(kb, question):
+    """Gets both intent and rubric in one single API request."""
     prompt = f"""
+    Acting as an examiner, analyze the Knowledge Base (KB) and Question.
+    1. Determine the intent (FACTUAL, EXPLANATORY, PROCESS, or COMPARISON).
+    2. Create a rubric of 3-5 atomic grading criteria based ONLY on the KB.
+    KB: {kb}
     Question: {question}
+    OUTPUT JSON ONLY:
     {{
+      "intent": "LABEL",
+      "criteria": ["criterion 1", "criterion 2"]
     }}
     """
     try:
         response = MODEL.generate_content(prompt)
+        # Handle cases where model might wrap JSON in backticks
+        clean_text = re.sub(r'```json|```', '', response.text).strip()
+        return json.loads(clean_text)
     except Exception as e:
         print(f"API Error: {e}")
+        return {"intent": "ERROR", "criteria": []}
 def evaluate(answer, question, kb):
+    # API Call
+    data = get_evaluation_data(kb, question)
     rubric = data.get("criteria", [])
     if not rubric:
+        return {"error": "Could not generate rubric. Check API status."}
+    # Semantic Matching (Local)
     sents = [s.strip() for s in re.split(r'(?<=[.!?])\s+', answer) if len(s.strip()) > 5]
     if not sents:
+        return {"error": "Answer is too short to evaluate."}
     ans_emb = embedder.encode(sents, convert_to_tensor=True)
+    results = []
     for crit in rubric:
         crit_emb = embedder.encode(crit, convert_to_tensor=True)
         sims = util.cos_sim(crit_emb, ans_emb)[0]
+        score = float(torch.max(sims)) if sims.numel() else 0.0
+        results.append({"criterion": crit, "satisfied": score >= SIM_THRESHOLD})
+    # Verdict
+    hits = sum(r["satisfied"] for r in results)
+    verdict = "✅ CORRECT" if hits == len(results) else "⚠️ PARTIAL" if hits > 0 else "❌ INCORRECT"
     return {
+        "intent": data.get("intent"),
+        "rubric_results": results,
+        "final_verdict": verdict
     }
 # UI
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Gemini 2.5 Answer Grader")
+    kb_input = gr.Textbox(label="Knowledge Base", lines=5)
+    q_input = gr.Textbox(label="Question")
+    a_input = gr.Textbox(label="Student Answer", lines=4)
+    out = gr.JSON(label="Evaluation Result")
+    gr.Button("Evaluate").click(evaluate, [a_input, q_input, kb_input], out)
+demo.launch()