Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import re | |
| import gradio as gr | |
| from mistralai.client import Mistral | |
| from difflib import SequenceMatcher | |
| api_key = os.environ["MISTRAL_API_KEY"] | |
| client = Mistral(api_key=api_key) | |
| MODEL_SMALL = "mistral-small-latest" | |
| MODEL_LARGE = "mistral-large-latest" | |
| def clean_text(text, mode="normal"): | |
| if not text: | |
| return "" | |
| text = re.sub(r'[أإآ]', 'ا', text) | |
| if mode != "حرفي": | |
| text = re.sub(r'ة\b', 'ه', text) | |
| text = re.sub(r'ى\b', 'ي', text) | |
| fillers = ['آآآ', 'أاا', 'ممم', 'إممم', 'ااا', 'ءءء'] | |
| for filler in fillers: | |
| text = text.replace(filler, ' ') | |
| text = text.replace("\n", " ") | |
| return " ".join(text.split()) | |
| def get_model(mode): | |
| if mode == "فهمي": | |
| return MODEL_LARGE | |
| elif mode == "هجين": | |
| return MODEL_SMALL | |
| elif mode == "حرفي": | |
| return MODEL_SMALL | |
| else: | |
| return None | |
| # def literal_score(text1, text2): | |
| # return SequenceMatcher(None, text1, text2).ratio() * 100 | |
| def get_grade(score): | |
| if score >= 90: | |
| return "ممتاز" | |
| elif score >= 80: | |
| return "جدا جيد" | |
| elif score >= 70: | |
| return "جيد" | |
| elif score >= 60: | |
| return "مقبول" | |
| else: | |
| return "يرجى الاعادة" | |
| def build_prompt(text1, text2, mode): | |
| # تعليمات مخصصة لكل نوع تقييم | |
| mode_instructions = { | |
| "حرفي": """ | |
| - Focus: Absolute Word-for-word accuracy (Memorization). | |
| - Policy: Every word must match. Ignore minor diacritics (Tashkeel) or punctuation. | |
| - Deductions: -10 points for any missing, added, or swapped word. | |
| - Strictness: If the student changes the vocabulary even if meaning is similar, they MUST lose points. | |
| """, | |
| "فهمي": """ | |
| - Focus: Semantic meaning and core concepts. | |
| - Policy: Accept paraphrasing and synonyms. | |
| - Strictness: If the topic is different (e.g. Technology vs Reading), the score MUST be 0. | |
| - Deductions: -25 points for each major missing idea. | |
| """, | |
| "هجين": """ | |
| - Focus: Balance between literal words and overall meaning. | |
| - Policy: Small word changes are okay, but key terms must remain. | |
| - Strictness: If the topic is unrelated, the score MUST be 0. | |
| - Deductions: -15 points for missing key terms, -10 for weak paraphrasing. | |
| """ | |
| } | |
| return f""" | |
| You are a **ruthless and strict** Arabic language examiner. Your goal is to grade a student's recitation (Tasmée) against an original reference. | |
| ### Context: | |
| - **Original Text (The Reference):** {text1} | |
| - **Student's Text (The Attempt):** {text2} | |
| - **Evaluation Mode:** {mode} | |
| ### Grading Rubric: | |
| 1. **Topic Check:** If the student talks about a COMPLETELY different topic, assign a score of 0 and grade "يرجى الاعادة". | |
| 2. **Rules for "{mode}":** | |
| {mode_instructions[mode]} | |
| 3. **Hallucination:** If the student adds info not in the reference, deduct 20 points. | |
| 4. **Direct Feedback:** The "reason" MUST be in Arabic, addressed DIRECTLY to the student (e.g., "لقد نسيت ذكر...", "أحسنت ولكن..."). | |
| ### Output: | |
| Return ONLY a valid JSON. No markdown, no comments. | |
| {{ | |
| "score": <integer 0-100>, | |
| "grade": "ممتاز | جدا جيد | جيد | مقبول | يرجى الاعادة", | |
| "reason": "<Direct Arabic feedback to the student>" | |
| }} | |
| """ | |
| def extract_json(text): | |
| # إزالة markdown | |
| text = re.sub(r"```json", "", text) | |
| text = re.sub(r"```", "", text) | |
| text = text.strip() | |
| # محاولة إصلاح الأسطر داخل strings | |
| def fix_multiline_strings(s): | |
| in_string = False | |
| result = [] | |
| for char in s: | |
| if char == '"' and (len(result) == 0 or result[-1] != '\\'): | |
| in_string = not in_string | |
| if char == '\n' and in_string: | |
| result.append('\\n') # تحويلها إلى escape | |
| else: | |
| result.append(char) | |
| return ''.join(result) | |
| text = fix_multiline_strings(text) | |
| return text | |
| def call_mistral(prompt, model): | |
| try: | |
| response = client.chat.complete( | |
| model=model, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0 | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"ERROR: {e}" | |
| def evaluate_similarity(text1, text2, mode): | |
| clean_ocr = clean_text(text1, mode) | |
| clean_stu = clean_text(text2, mode) | |
| if not clean_ocr or not clean_stu: | |
| return json.dumps({"error": "يرجى إدخال النصوص"}, ensure_ascii=False) | |
| # # --- الوضع الحرفي --- | |
| # if mode == "حرفي": | |
| # score = literal_score(clean_ocr, clean_stu) | |
| # grade = get_grade(score) | |
| # return json.dumps({ | |
| # "evaluation_mode": mode, | |
| # "model_used": "algorithmic", | |
| # "details": { | |
| # "score": round(score, 2), | |
| # "grade": grade, | |
| # "reason": "تم التقييم بناءً على التطابق الحرفي بدون AI" | |
| # } | |
| # }, ensure_ascii=False, indent=2) | |
| # --- وضع AI --- | |
| model = get_model(mode) | |
| prompt = build_prompt(clean_ocr, clean_stu, mode) | |
| content = call_mistral(prompt, model) | |
| clean_content = extract_json(content) | |
| try: | |
| data = json.loads(clean_content) | |
| except: | |
| # محاولة إنقاذ JSON | |
| match = re.search(r'\{.*\}', clean_content, re.DOTALL) | |
| if match: | |
| try: | |
| data = json.loads(match.group()) | |
| except: | |
| return json.dumps({ | |
| "error": "فشل تحليل JSON", | |
| "raw_output": content | |
| }, ensure_ascii=False, indent=2) | |
| else: | |
| return json.dumps({ | |
| "error": "المودل لم يرجع JSON قابل للاستخراج", | |
| "raw_output": content | |
| }, ensure_ascii=False, indent=2) | |
| return json.dumps({ | |
| "evaluation_mode": mode, | |
| "model_used": model, | |
| "details": data | |
| }, ensure_ascii=False, indent=2) | |
| # --- واجهة Gradio --- | |
| demo = gr.Interface( | |
| fn=evaluate_similarity, | |
| inputs=[ | |
| gr.Textbox(lines=8, label="النص الأصلي"), | |
| gr.Textbox(lines=8, label="نص الطالب"), | |
| gr.Radio(["حرفي", "فهمي", "هجين"], value="فهمي", label="نوع التقييم") | |
| ], | |
| outputs=gr.Code(label="النتيجة", language="json"), | |
| title="🎓 مقيم التسميع الذكي باستخدام Mistral", | |
| description="قيم نص الطالب مقابل النص الأصلي بطريقة حرفية أو فهمية أو هجين." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |