masfundeai-api

Running

App Files Files Community

rairo commited on Apr 23

Commit

adb5280

verified ·

1 Parent(s): 2f314df

Update main.py

Browse files

Files changed (1) hide show

main.py +148 -50

main.py CHANGED Viewed

@@ -3,10 +3,10 @@ import json
 import logging
 import re
 import base64
-import tempfile
 import time
 from datetime import datetime
 from io import BytesIO
 # Third-party imports
 from flask import Flask, request, jsonify, Response, stream_with_context
@@ -42,7 +42,7 @@ MODEL_ID = 'gemini-2.5-flash'
 # GENERIC GEMINI HELPERS
 # ---------------------------------------------------------------------------
-def call_gemini(contents, system_instruction: str | None = None, retries: int = 2):
     """
     Send a non-streaming request to Gemini and return the response text.
     `contents` can be a string, a list of Parts, or a list of Content objects.
@@ -63,6 +63,9 @@ def call_gemini(contents, system_instruction: str | None = None, retries: int =
             return response.text
         except Exception as e:
             if "429" in str(e) or "ResourceExhausted" in str(e):
                 time.sleep(2 * (attempt + 1))
                 continue
             logging.error(f"Gemini error: {e}")
@@ -71,7 +74,7 @@ def call_gemini(contents, system_instruction: str | None = None, retries: int =
     return ""
-def call_gemini_stream(contents, system_instruction: str | None = None):
     """
     Yield text chunks from a streaming Gemini request.
     Used for the chat endpoint.
@@ -95,12 +98,30 @@ def parse_json_response(text: str) -> dict:
     """Strip markdown fences and parse JSON from a Gemini response."""
     cleaned = re.sub(r'```json\s*', '', text)
     cleaned = re.sub(r'```\s*', '', cleaned)
-    match = re.search(r'(\{.*\}|\[.*\])', cleaned, re.DOTALL)
-    json_str = match.group(1) if match else cleaned
     return json.loads(json_str)
-def gemini_json(contents, system_instruction: str | None = None, retries: int = 2) -> dict:
     """Call Gemini and parse the response as JSON. Returns {} on failure."""
     text = call_gemini(contents, system_instruction=system_instruction, retries=retries)
     try:
@@ -119,7 +140,7 @@ def gemini_json(contents, system_instruction: str | None = None, retries: int =
 # SHARED VALIDATION HELPER
 # ---------------------------------------------------------------------------
-def require_fields(data: dict, fields: list) -> str | None:
     """Return an error message string if any required field is missing, else None."""
     missing = [f for f in fields if not data.get(f)]
     if missing:
@@ -130,9 +151,11 @@ def require_fields(data: dict, fields: list) -> str | None:
 # 1. CHAT COMPLETION  –  POST /functions/v1/ai-chat
 #
 # The client sends:
-#   messages   – array of {role, content}   (full history incl. system turns if any)
-#   systemContext – string (optional)  the system prompt / role context from the client
-#   locale     – string (optional, default "en")
 # ---------------------------------------------------------------------------
 @app.route('/functions/v1/ai-chat', methods=['POST'])
@@ -142,13 +165,30 @@ def ai_chat():
     if not messages or not isinstance(messages, list):
         return jsonify({'error': 'messages array is required'}), 400
-    system_context = data.get('systemContext', '')   # client-supplied context
-    locale = data.get('locale', 'en')
     system_instruction = (
-        f"{system_context}\n\nAlways respond in the language/locale: {locale}."
-        if system_context
-        else f"You are a helpful educational assistant. Respond in locale: {locale}."
     )
     # Build a list of Content objects from the messages array
@@ -162,9 +202,13 @@ def ai_chat():
             types.Content(role=gemini_role, parts=[types.Part(text=content)])
         )
     def sse_generator():
         try:
-            for chunk in call_gemini_stream(contents, system_instruction=system_instruction):
                 payload = json.dumps({
                     "choices": [{"delta": {"content": chunk}, "finish_reason": None}]
                 })
@@ -207,35 +251,53 @@ def ai_student_insights():
         system_instruction = (
             "You are an educational AI analyst. Analyse student performance data and return "
-            "a structured JSON report. Be empathetic, data-driven and actionable."
         )
         prompt = f"""Generate a student performance insights report as JSON.
 Student ID: {student_id}
 School ID: {school_id}
-Subjects filter: {subjects if subjects else 'all subjects'}
 Term: {term_id}
 Include recommendations: {include_recs}
 Return ONLY valid JSON matching this exact structure:
 {{
   "studentId": "{student_id}",
-  "summary": "<2-3 sentence overview>",
   "subjectBreakdown": [
     {{
-      "subject": "<subject>",
-      "trend": "<improving|stable|declining>",
-      "averageScore": <number 0-100>,
-      "insight": "<specific insight>",
-      "riskLevel": "<low|medium|high>"
     }}
   ],
   "recommendations": ["<action 1>", "<action 2>"],
-  "overallRiskLevel": "<low|medium|high>"
 }}
 """
         result = gemini_json(prompt, system_instruction=system_instruction)
         return jsonify(result)
     except Exception as e:
@@ -370,7 +432,7 @@ def read_uploaded_pdf(file_field_name: str = 'file'):
 @app.route('/functions/v1/ai-scheme-to-schedule', methods=['POST'])
 def ai_scheme_to_schedule():
-    temp_path = None
     try:
         system_instruction = (
             "You are an expert school timetable planner. Read the scheme of work carefully, "
@@ -494,7 +556,7 @@ Return ONLY valid JSON matching this exact structure:
         else:
             # Scanned / image-only PDF – send the raw PDF bytes to Gemini Vision
             logging.info("PDF: no text layer, using Gemini native PDF vision.")
-            pdf_part   = pdf_bytes_to_inline_part(pdf_bytes)
             prompt_part = types.Part(text=schedule_prompt)
             client = get_client()
@@ -522,9 +584,6 @@ Return ONLY valid JSON matching this exact structure:
     except Exception as e:
         logging.error(f"/ai-scheme-to-schedule error: {e}")
         return jsonify({'error': str(e)}), 500
-    finally:
-        if temp_path and os.path.exists(temp_path):
-            os.remove(temp_path)
 # ---------------------------------------------------------------------------
 # 5. IMAGE DATA IMPORT (OCR)  –  POST /functions/v1/ai-image-import
@@ -537,7 +596,6 @@ def ai_image_import():
     - JSON body with `imageBase64` (base64-encoded JPEG/PNG string)
     - multipart/form-data with a `file` field
     """
-    temp_path = None
     try:
         import_type  = None
         school_id    = None
@@ -574,11 +632,21 @@ def ai_image_import():
             "Extract structured data from the image with high accuracy. Return only JSON."
         )
         prompt = f"""Extract data from this school document image.
 Import Type: {import_type}
 School ID: {school_id}
-Additional context: {json.dumps({k: v for k, v in (request.get_json(silent=True) or {}).items() if k not in ['imageBase64', 'importType', 'schoolId']})}
 For importType="{import_type}", return a JSON object matching the documented output schema.
 Include confidence scores, warnings for unclear fields, and a summary object.
@@ -610,9 +678,6 @@ Return ONLY valid JSON.
     except Exception as e:
         logging.error(f"/ai-image-import error: {e}")
         return jsonify({'error': str(e)}), 500
-    finally:
-        if temp_path and os.path.exists(temp_path):
-            os.remove(temp_path)
 # ---------------------------------------------------------------------------
 # 6. REPORT CARD COMMENT GENERATOR  –  POST /functions/v1/ai-report-comment
@@ -685,7 +750,9 @@ def ai_parent_message():
         system_instruction = (
             "You are a school communication specialist. Translate academic jargon into "
             "simple, parent-friendly language and optionally translate to the requested language. "
-            "Return only JSON."
         )
         prompt = f"""Simplify and translate a school message for a parent.
@@ -695,16 +762,28 @@ Child's name: {child_name}
 Context: {context}  (report_card | attendance | behaviour | general)
 Target language: {target_language}
 Return ONLY valid JSON:
 {{
   "originalMessage": "{message}",
-  "simplifiedMessage": "<parent-friendly version in English>",
   "translatedMessage": "<translated version if target_language != 'en', else same as simplifiedMessage>",
   "language": "{target_language}",
-  "suggestedActions": ["<actionable tip for parent>"]
 }}
 """
         result = gemini_json(prompt, system_instruction=system_instruction)
         return jsonify(result)
     except Exception as e:
@@ -733,7 +812,14 @@ def ai_attendance_analysis():
         system_instruction = (
             "You are an educational data analyst specialising in attendance patterns. "
-            "Identify at-risk students and actionable patterns. Return only JSON."
         )
         prompt = f"""Analyse student attendance data and detect patterns.
@@ -743,33 +829,45 @@ Scope: {scope}  (school | class | student)
 Scope ID: {scope_id or 'N/A'}
 Date Range: {json.dumps(date_range) if date_range else 'current term'}
 Return ONLY valid JSON:
 {{
   "scope": "{scope}",
   "scopeId": "{scope_id or ''}",
   "period": {json.dumps(date_range) if date_range else '{{"from": "term start", "to": "term end"}}'},
-  "overallRate": <percentage>,
   "patterns": [
     {{
       "type": "<day_of_week|chronic_absence|weather_related|etc>",
-      "detail": "<description>",
       "severity": "<low|medium|high>",
-      "studentIds": ["<id>"]
     }}
   ],
   "atRiskStudents": [
     {{
-      "studentId": "<id>",
-      "studentName": "<name>",
-      "absenceRate": <percentage>,
       "trend": "<improving|stable|worsening>",
       "recommendation": "<action>"
     }}
   ],
-  "summary": "<overall summary sentence>"
 }}
 """
         result = gemini_json(prompt, system_instruction=system_instruction)
         return jsonify(result)
     except Exception as e:
@@ -848,10 +946,10 @@ def ai_homework_help():
         if err:
             return jsonify({'error': err}), 400
-        question    = data['question']
-        subject     = data['subject']
-        grade       = data['grade']
-        student_id  = data['studentId']
         show_working = data.get('showWorking', True)
         system_instruction = (

 import logging
 import re
 import base64
 import time
 from datetime import datetime
 from io import BytesIO
+from typing import Optional, Union
 # Third-party imports
 from flask import Flask, request, jsonify, Response, stream_with_context
 # GENERIC GEMINI HELPERS
 # ---------------------------------------------------------------------------
+def call_gemini(contents, system_instruction: Optional[str] = None, retries: int = 2):
     """
     Send a non-streaming request to Gemini and return the response text.
     `contents` can be a string, a list of Parts, or a list of Content objects.
             return response.text
         except Exception as e:
             if "429" in str(e) or "ResourceExhausted" in str(e):
+                if attempt == retries:
+                    logging.error(f"Gemini rate limit exceeded after {retries + 1} attempts.")
+                    raise
                 time.sleep(2 * (attempt + 1))
                 continue
             logging.error(f"Gemini error: {e}")
     return ""
+def call_gemini_stream(contents, system_instruction: Optional[str] = None):
     """
     Yield text chunks from a streaming Gemini request.
     Used for the chat endpoint.
     """Strip markdown fences and parse JSON from a Gemini response."""
     cleaned = re.sub(r'```json\s*', '', text)
     cleaned = re.sub(r'```\s*', '', cleaned)
+    # Use non-greedy match and DOTALL to handle nested JSON correctly
+    match = re.search(r'(\{.*?\}|\[.*?\])', cleaned, re.DOTALL)
+    # If non-greedy misses complex nested structures, fall back to full cleaned string
+    json_str = cleaned.strip()
+    if match:
+        # Try to find the outermost balanced structure
+        for i, ch in enumerate(cleaned):
+            if ch in ('{', '['):
+                opener = ch
+                closer = '}' if ch == '{' else ']'
+                depth = 0
+                for j, c in enumerate(cleaned[i:], i):
+                    if c == opener:
+                        depth += 1
+                    elif c == closer:
+                        depth -= 1
+                    if depth == 0:
+                        json_str = cleaned[i:j + 1]
+                        break
+                break
     return json.loads(json_str)
+def gemini_json(contents, system_instruction: Optional[str] = None, retries: int = 2) -> dict:
     """Call Gemini and parse the response as JSON. Returns {} on failure."""
     text = call_gemini(contents, system_instruction=system_instruction, retries=retries)
     try:
 # SHARED VALIDATION HELPER
 # ---------------------------------------------------------------------------
+def require_fields(data: dict, fields: list) -> Optional[str]:
     """Return an error message string if any required field is missing, else None."""
     missing = [f for f in fields if not data.get(f)]
     if missing:
 # 1. CHAT COMPLETION  –  POST /functions/v1/ai-chat
 #
 # The client sends:
+#   messages      – array of {role, content}  (full conversation history)
+#   userRole      – string  Required per spec. One of: admin, teacher, student, parent
+#   schoolId      – string  Required per spec. School context identifier
+#   locale        – string  Optional. Language preference (default: "en")
+#   systemContext – string  Deprecated legacy field. Ignored when userRole is present.
 # ---------------------------------------------------------------------------
 @app.route('/functions/v1/ai-chat', methods=['POST'])
     if not messages or not isinstance(messages, list):
         return jsonify({'error': 'messages array is required'}), 400
+    # Per spec, userRole and schoolId are required fields
+    err = require_fields(data, ['userRole', 'schoolId'])
+    if err:
+        return jsonify({'error': err}), 400
+    user_role  = data['userRole']
+    school_id  = data['schoolId']
+    locale     = data.get('locale', 'en')
+    # Role-aware system prompts injected server-side per spec.
+    # Never invent student/school data not explicitly provided in the conversation.
+    role_context = {
+        'admin':   "You are a helpful school administration assistant. You help with school management, reports, and oversight tasks.",
+        'teacher': "You are a helpful teaching assistant. You help with lesson planning, student support, and classroom management.",
+        'student': "You are a friendly and encouraging student tutor. Guide students to understand concepts without giving direct answers.",
+        'parent':  "You are a helpful parent liaison. Explain school information clearly and in plain language.",
+    }.get(user_role, "You are a helpful educational assistant.")
     system_instruction = (
+        f"{role_context} "
+        f"You are operating in the context of school ID: {school_id}. "
+        f"IMPORTANT: Never invent, hallucinate, or fabricate student names, IDs, scores, or any school data not explicitly provided in the conversation. "
+        f"If you do not have the data needed to answer, say so clearly. "
+        f"Always respond in the language/locale: {locale}."
     )
     # Build a list of Content objects from the messages array
             types.Content(role=gemini_role, parts=[types.Part(text=content)])
         )
+    # Capture for closure
+    _contents = contents
+    _system_instruction = system_instruction
     def sse_generator():
         try:
+            for chunk in call_gemini_stream(_contents, system_instruction=_system_instruction):
                 payload = json.dumps({
                     "choices": [{"delta": {"content": chunk}, "finish_reason": None}]
                 })
         system_instruction = (
             "You are an educational AI analyst. Analyse student performance data and return "
+            "a structured JSON report. Be empathetic, data-driven and actionable. "
+            "CRITICAL: You do not have access to a database. Never invent, fabricate, or "
+            "hallucinate student names, database IDs, scores, or subject data. "
+            "Use only data explicitly provided in this prompt. "
+            "In the summary field, refer to the student by their studentId only — never invent a name. "
+            "In subjectBreakdown, only include subjects explicitly listed in the subjects filter; "
+            "if no filter is given, state that subject-level data was not provided."
         )
         prompt = f"""Generate a student performance insights report as JSON.
 Student ID: {student_id}
 School ID: {school_id}
+Subjects filter: {subjects if subjects else 'not specified — do not invent subject data'}
 Term: {term_id}
 Include recommendations: {include_recs}
+IMPORTANT RULES:
+- Use the studentId "{student_id}" exactly as provided. Do not invent a student name.
+- Only include subjectBreakdown entries for subjects explicitly listed above.
+- If no real performance data is available, set averageScore to null and explain in the insight field.
+- Do not fabricate trends, scores, or risk levels.
 Return ONLY valid JSON matching this exact structure:
 {{
   "studentId": "{student_id}",
+  "summary": "<2-3 sentence overview referring to student by ID, not an invented name>",
   "subjectBreakdown": [
     {{
+      "subject": "<subject from the provided filter only>",
+      "trend": "<improving|stable|declining|unknown>",
+      "averageScore": <number 0-100 or null if not available>,
+      "insight": "<specific insight based only on provided data>",
+      "riskLevel": "<low|medium|high|unknown>"
     }}
   ],
   "recommendations": ["<action 1>", "<action 2>"],
+  "overallRiskLevel": "<low|medium|high|unknown>"
 }}
 """
         result = gemini_json(prompt, system_instruction=system_instruction)
+        # Sanitise: ensure studentId in output matches what was requested,
+        # not a hallucinated value
+        if result:
+            result['studentId'] = student_id
         return jsonify(result)
     except Exception as e:
 @app.route('/functions/v1/ai-scheme-to-schedule', methods=['POST'])
 def ai_scheme_to_schedule():
+    # FIX: temp_path was referenced in finally but never initialised — set to None
     try:
         system_instruction = (
             "You are an expert school timetable planner. Read the scheme of work carefully, "
         else:
             # Scanned / image-only PDF – send the raw PDF bytes to Gemini Vision
             logging.info("PDF: no text layer, using Gemini native PDF vision.")
+            pdf_part    = pdf_bytes_to_inline_part(pdf_bytes)
             prompt_part = types.Part(text=schedule_prompt)
             client = get_client()
     except Exception as e:
         logging.error(f"/ai-scheme-to-schedule error: {e}")
         return jsonify({'error': str(e)}), 500
 # ---------------------------------------------------------------------------
 # 5. IMAGE DATA IMPORT (OCR)  –  POST /functions/v1/ai-image-import
     - JSON body with `imageBase64` (base64-encoded JPEG/PNG string)
     - multipart/form-data with a `file` field
     """
     try:
         import_type  = None
         school_id    = None
             "Extract structured data from the image with high accuracy. Return only JSON."
         )
+        # FIX: the original code called request.get_json() inside the multipart branch,
+        # which always returns None/empty for multipart requests. Use request.form instead
+        # to safely build extra_context without risking None dereference.
+        if request.content_type and 'multipart/form-data' in request.content_type:
+            extra_context = {k: v for k, v in request.form.items()
+                             if k not in ['importType', 'schoolId']}
+        else:
+            extra_context = {k: v for k, v in (request.get_json(silent=True) or {}).items()
+                             if k not in ['imageBase64', 'importType', 'schoolId']}
         prompt = f"""Extract data from this school document image.
 Import Type: {import_type}
 School ID: {school_id}
+Additional context: {json.dumps(extra_context)}
 For importType="{import_type}", return a JSON object matching the documented output schema.
 Include confidence scores, warnings for unclear fields, and a summary object.
     except Exception as e:
         logging.error(f"/ai-image-import error: {e}")
         return jsonify({'error': str(e)}), 500
 # ---------------------------------------------------------------------------
 # 6. REPORT CARD COMMENT GENERATOR  –  POST /functions/v1/ai-report-comment
         system_instruction = (
             "You are a school communication specialist. Translate academic jargon into "
             "simple, parent-friendly language and optionally translate to the requested language. "
+            "CRITICAL: Only use information explicitly present in the provided message. "
+            "Never invent subject names, scores, database IDs, or any data not in the message. "
+            "Refer to the child only by the name provided. Return only JSON."
         )
         prompt = f"""Simplify and translate a school message for a parent.
 Context: {context}  (report_card | attendance | behaviour | general)
 Target language: {target_language}
+IMPORTANT RULES:
+- Only reference subjects, scores, and details that appear verbatim in the original message above.
+- Do not invent or infer subject names or data not present in the message.
+- Refer to the child only as "{child_name}" — never substitute a different name.
+- Never include database IDs in any field.
 Return ONLY valid JSON:
 {{
   "originalMessage": "{message}",
+  "simplifiedMessage": "<parent-friendly version in English using only data from the original message>",
   "translatedMessage": "<translated version if target_language != 'en', else same as simplifiedMessage>",
   "language": "{target_language}",
+  "suggestedActions": ["<actionable tip for parent based only on the message content>"]
 }}
 """
         result = gemini_json(prompt, system_instruction=system_instruction)
+        # Sanitise: pin fields that must match the request inputs exactly
+        if result:
+            result['originalMessage'] = message
+            result['language'] = target_language
         return jsonify(result)
     except Exception as e:
         system_instruction = (
             "You are an educational data analyst specialising in attendance patterns. "
+            "Identify at-risk students and actionable patterns. "
+            "CRITICAL: You do not have access to a database. Never invent, fabricate, or "
+            "hallucinate student names, student IDs, percentages, or attendance records. "
+            "Only use data explicitly provided in this prompt. "
+            "If real attendance data is not provided, set overallRate to null, return empty "
+            "arrays for patterns and atRiskStudents, and explain in the summary that no data was supplied. "
+            "Never expose raw database document IDs in summary, detail, or recommendation text fields. "
+            "Return only JSON."
         )
         prompt = f"""Analyse student attendance data and detect patterns.
 Scope ID: {scope_id or 'N/A'}
 Date Range: {json.dumps(date_range) if date_range else 'current term'}
+IMPORTANT RULES:
+- Do not invent student names or IDs. Only use names/IDs explicitly provided above.
+- Do not fabricate attendance percentages or absence counts.
+- If attendance records are not included in this prompt, return empty patterns and atRiskStudents arrays.
+- Never expose raw database document IDs in any text field (summary, detail, recommendation).
 Return ONLY valid JSON:
 {{
   "scope": "{scope}",
   "scopeId": "{scope_id or ''}",
   "period": {json.dumps(date_range) if date_range else '{{"from": "term start", "to": "term end"}}'},
+  "overallRate": <percentage or null if no data provided>,
   "patterns": [
     {{
       "type": "<day_of_week|chronic_absence|weather_related|etc>",
+      "detail": "<description based only on provided data>",
       "severity": "<low|medium|high>",
+      "studentIds": ["<id — only if explicitly provided>"]
     }}
   ],
   "atRiskStudents": [
     {{
+      "studentId": "<id — only if explicitly provided>",
+      "studentName": "<name — only if explicitly provided, never invented>",
+      "absenceRate": <percentage — only if provided>,
       "trend": "<improving|stable|worsening>",
       "recommendation": "<action>"
     }}
   ],
+  "summary": "<overall summary based only on provided data, no invented names or IDs>"
 }}
 """
         result = gemini_json(prompt, system_instruction=system_instruction)
+        # Sanitise: pin scope fields to request values so they cannot be hallucinated
+        if result:
+            result['scope'] = scope
+            result['scopeId'] = scope_id or ''
         return jsonify(result)
     except Exception as e:
         if err:
             return jsonify({'error': err}), 400
+        question     = data['question']
+        subject      = data['subject']
+        grade        = data['grade']
+        student_id   = data['studentId']
         show_working = data.get('showWorking', True)
         system_instruction = (