Spaces:

heerjtdev
/

answer_checker

Runtime error

App Files Files Community

heerjtdev commited on Jan 1

Commit

32a28cd

verified ·

1 Parent(s): 1b4ce30

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -18

app.py CHANGED Viewed

@@ -5,11 +5,27 @@ from sentence_transformers import CrossEncoder
 import re
 import hashlib
 import json
 # ============================================================
 # MODEL LOADING (ONCE)
 # ============================================================
 DEVICE = "cpu"
 SIM_MODEL_NAME = "cross-encoder/stsb-distilroberta-base"
@@ -64,27 +80,62 @@ def classify_question(question):
 # SCHEMA GENERATION (AUTO, NO LLM)
 # ============================================================
-def generate_schema(kb, question):
     """
-    Auto-generates a grading schema directly from KB.
-    Deterministic and HF-safe.
     """
-    sentences = split_sentences(kb)
-    q_type = classify_question(question)
-    # Find most relevant sentence
-    scores = sim_model.predict([(s, question) for s in sentences])
-    best_idx = int(scores.argmax())
-    best_sentence = sentences[best_idx]
-    schema = {
-        "question_type": q_type,
-        "required_concepts": [best_sentence],
-        "forbidden_concepts": [],
-        "allow_extra_info": True
-    }
     return schema
 # ============================================================
 # ANSWER DECOMPOSITION
 # ============================================================
@@ -105,7 +156,7 @@ def evaluate_answer(answer, question, kb):
     # --------------------
     key = hash_key(kb, question)
     if key not in SCHEMA_CACHE:
-        SCHEMA_CACHE[key] = generate_schema(kb, question)
     schema = SCHEMA_CACHE[key]
     logs["schema"] = schema

 import re
 import hashlib
 import json
+import os
+from openai import OpenAI
 # ============================================================
 # MODEL LOADING (ONCE)
 # ============================================================
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+if not OPENAI_API_KEY:
+    raise RuntimeError("OPENAI_API_KEY not found in environment")
+llm_client = OpenAI(api_key=OPENAI_API_KEY)
 DEVICE = "cpu"
 SIM_MODEL_NAME = "cross-encoder/stsb-distilroberta-base"
 # SCHEMA GENERATION (AUTO, NO LLM)
 # ============================================================
+def generate_schema_with_llm(kb, question):
     """
+    Uses ChatGPT to generate an explicit grading schema.
+    Cached. Deterministic via temperature=0.
     """
+    prompt = f"""
+You are an exam answer key generator.
+Knowledge Base:
+\"\"\"
+{kb}
+\"\"\"
+Question:
+\"\"\"
+{question}
+\"\"\"
+TASK:
+Extract the expected answer as atomic facts.
+Return STRICT JSON with this schema:
+{{
+  "question_type": "FACT | DEFINITION | EXPLANATION",
+  "required_concepts": ["fact1", "fact2"],
+  "forbidden_concepts": [],
+  "allow_extra_info": true
+}}
+Rules:
+- required_concepts must be explicit factual statements
+- Do NOT paraphrase excessively
+- Do NOT invent facts
+- JSON only. No explanations.
+"""
+    response = llm_client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
+            {"role": "system", "content": "You generate grading rubrics for exams."},
+            {"role": "user", "content": prompt}
+        ],
+        temperature=0
+    )
+    content = response.choices[0].message.content.strip()
+    try:
+        schema = json.loads(content)
+    except json.JSONDecodeError:
+        raise ValueError(f"LLM returned invalid JSON:\n{content}")
     return schema
 # ============================================================
 # ANSWER DECOMPOSITION
 # ============================================================
     # --------------------
     key = hash_key(kb, question)
     if key not in SCHEMA_CACHE:
+        SCHEMA_CACHE[key] = generate_schema_with_llm(kb, question)
     schema = SCHEMA_CACHE[key]
     logs["schema"] = schema