Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,14 +7,13 @@ import google.generativeai as genai
|
|
| 7 |
from sentence_transformers import SentenceTransformer, util
|
| 8 |
|
| 9 |
# ============================================================
|
| 10 |
-
# CONFIG
|
| 11 |
# ============================================================
|
| 12 |
-
# Best practice: use os.environ.get("GEMINI_API_KEY")
|
| 13 |
GEMINI_API_KEY = "AIzaSyBrbLGXkSdXReb0lUucYqcNCNBkvS-RBFw"
|
| 14 |
genai.configure(api_key=GEMINI_API_KEY)
|
| 15 |
|
| 16 |
-
# Use
|
| 17 |
-
MODEL = genai.GenerativeModel("gemini-
|
| 18 |
|
| 19 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 20 |
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
|
@@ -25,86 +24,71 @@ embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
|
|
| 25 |
print("✅ Ready")
|
| 26 |
|
| 27 |
# ============================================================
|
| 28 |
-
#
|
| 29 |
# ============================================================
|
| 30 |
-
def
|
| 31 |
-
"""
|
| 32 |
prompt = f"""
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
|
| 36 |
-
2. Create a grading rubric of 3-6 atomic criteria based ONLY on the Knowledge Base.
|
| 37 |
-
|
| 38 |
-
Knowledge Base: {kb}
|
| 39 |
Question: {question}
|
| 40 |
|
| 41 |
-
|
| 42 |
{{
|
| 43 |
-
"intent": "
|
| 44 |
-
"criteria": ["criterion 1", "criterion 2"
|
| 45 |
}}
|
| 46 |
"""
|
| 47 |
try:
|
| 48 |
response = MODEL.generate_content(prompt)
|
| 49 |
-
# Handle
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
text = response.text.strip()
|
| 54 |
-
# Clean JSON if model adds markdown backticks
|
| 55 |
-
text = re.sub(r'^```json\s*|\s*```$', '', text, flags=re.MULTILINE)
|
| 56 |
-
return json.loads(text)
|
| 57 |
except Exception as e:
|
| 58 |
print(f"API Error: {e}")
|
| 59 |
-
return {"intent": "
|
| 60 |
|
| 61 |
def evaluate(answer, question, kb):
|
| 62 |
-
#
|
| 63 |
-
data =
|
| 64 |
-
intent = data.get("intent", "EXPLANATORY")
|
| 65 |
rubric = data.get("criteria", [])
|
| 66 |
|
| 67 |
if not rubric:
|
| 68 |
-
return {"
|
| 69 |
|
| 70 |
-
#
|
| 71 |
sents = [s.strip() for s in re.split(r'(?<=[.!?])\s+', answer) if len(s.strip()) > 5]
|
| 72 |
if not sents:
|
| 73 |
-
return {"
|
| 74 |
|
| 75 |
ans_emb = embedder.encode(sents, convert_to_tensor=True)
|
| 76 |
-
|
| 77 |
-
|
| 78 |
for crit in rubric:
|
| 79 |
crit_emb = embedder.encode(crit, convert_to_tensor=True)
|
| 80 |
sims = util.cos_sim(crit_emb, ans_emb)[0]
|
| 81 |
-
|
| 82 |
-
|
| 83 |
|
| 84 |
-
#
|
| 85 |
-
|
| 86 |
-
if
|
| 87 |
-
elif hit >= max(1, len(scored) // 2): verdict_text = "⚠️ PARTIALLY CORRECT"
|
| 88 |
-
else: verdict_text = "❌ INCORRECT"
|
| 89 |
|
| 90 |
return {
|
| 91 |
-
"intent": intent,
|
| 92 |
-
"
|
| 93 |
-
"final_verdict":
|
| 94 |
}
|
| 95 |
|
| 96 |
-
# ============================================================
|
| 97 |
# UI
|
| 98 |
-
# ============================================================
|
| 99 |
with gr.Blocks() as demo:
|
| 100 |
-
gr.Markdown("## 🧠 Gemini
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
gr.Button("Evaluate").click(evaluate, [a, q, kb], out)
|
| 109 |
-
|
| 110 |
-
demo.launch()
|
|
|
|
| 7 |
from sentence_transformers import SentenceTransformer, util
|
| 8 |
|
| 9 |
# ============================================================
|
| 10 |
+
# CONFIG
|
| 11 |
# ============================================================
|
|
|
|
| 12 |
GEMINI_API_KEY = "AIzaSyBrbLGXkSdXReb0lUucYqcNCNBkvS-RBFw"
|
| 13 |
genai.configure(api_key=GEMINI_API_KEY)
|
| 14 |
|
| 15 |
+
# UPDATED: Use a supported 2026 model
|
| 16 |
+
MODEL = genai.GenerativeModel("gemini-2.5-flash")
|
| 17 |
|
| 18 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 19 |
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
|
|
|
| 24 |
print("✅ Ready")
|
| 25 |
|
| 26 |
# ============================================================
|
| 27 |
+
# LOGIC
|
| 28 |
# ============================================================
|
| 29 |
+
def get_evaluation_data(kb, question):
|
| 30 |
+
"""Gets both intent and rubric in one single API request."""
|
| 31 |
prompt = f"""
|
| 32 |
+
Acting as an examiner, analyze the Knowledge Base (KB) and Question.
|
| 33 |
+
1. Determine the intent (FACTUAL, EXPLANATORY, PROCESS, or COMPARISON).
|
| 34 |
+
2. Create a rubric of 3-5 atomic grading criteria based ONLY on the KB.
|
| 35 |
|
| 36 |
+
KB: {kb}
|
|
|
|
|
|
|
|
|
|
| 37 |
Question: {question}
|
| 38 |
|
| 39 |
+
OUTPUT JSON ONLY:
|
| 40 |
{{
|
| 41 |
+
"intent": "LABEL",
|
| 42 |
+
"criteria": ["criterion 1", "criterion 2"]
|
| 43 |
}}
|
| 44 |
"""
|
| 45 |
try:
|
| 46 |
response = MODEL.generate_content(prompt)
|
| 47 |
+
# Handle cases where model might wrap JSON in backticks
|
| 48 |
+
clean_text = re.sub(r'```json|```', '', response.text).strip()
|
| 49 |
+
return json.loads(clean_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
except Exception as e:
|
| 51 |
print(f"API Error: {e}")
|
| 52 |
+
return {"intent": "ERROR", "criteria": []}
|
| 53 |
|
| 54 |
def evaluate(answer, question, kb):
|
| 55 |
+
# API Call
|
| 56 |
+
data = get_evaluation_data(kb, question)
|
|
|
|
| 57 |
rubric = data.get("criteria", [])
|
| 58 |
|
| 59 |
if not rubric:
|
| 60 |
+
return {"error": "Could not generate rubric. Check API status."}
|
| 61 |
|
| 62 |
+
# Semantic Matching (Local)
|
| 63 |
sents = [s.strip() for s in re.split(r'(?<=[.!?])\s+', answer) if len(s.strip()) > 5]
|
| 64 |
if not sents:
|
| 65 |
+
return {"error": "Answer is too short to evaluate."}
|
| 66 |
|
| 67 |
ans_emb = embedder.encode(sents, convert_to_tensor=True)
|
| 68 |
+
results = []
|
|
|
|
| 69 |
for crit in rubric:
|
| 70 |
crit_emb = embedder.encode(crit, convert_to_tensor=True)
|
| 71 |
sims = util.cos_sim(crit_emb, ans_emb)[0]
|
| 72 |
+
score = float(torch.max(sims)) if sims.numel() else 0.0
|
| 73 |
+
results.append({"criterion": crit, "satisfied": score >= SIM_THRESHOLD})
|
| 74 |
|
| 75 |
+
# Verdict
|
| 76 |
+
hits = sum(r["satisfied"] for r in results)
|
| 77 |
+
verdict = "✅ CORRECT" if hits == len(results) else "⚠️ PARTIAL" if hits > 0 else "❌ INCORRECT"
|
|
|
|
|
|
|
| 78 |
|
| 79 |
return {
|
| 80 |
+
"intent": data.get("intent"),
|
| 81 |
+
"rubric_results": results,
|
| 82 |
+
"final_verdict": verdict
|
| 83 |
}
|
| 84 |
|
|
|
|
| 85 |
# UI
|
|
|
|
| 86 |
with gr.Blocks() as demo:
|
| 87 |
+
gr.Markdown("## 🧠 Gemini 2.5 Answer Grader")
|
| 88 |
+
kb_input = gr.Textbox(label="Knowledge Base", lines=5)
|
| 89 |
+
q_input = gr.Textbox(label="Question")
|
| 90 |
+
a_input = gr.Textbox(label="Student Answer", lines=4)
|
| 91 |
+
out = gr.JSON(label="Evaluation Result")
|
| 92 |
+
gr.Button("Evaluate").click(evaluate, [a_input, q_input, kb_input], out)
|
| 93 |
+
|
| 94 |
+
demo.launch()
|
|
|
|
|
|
|
|
|