jeremierostan's picture
Update app.py
02837a4 verified
import gradio as gr
import pandas as pd
import json
import os
import tempfile
from pathlib import Path
import google.generativeai as genai
import fitz
from PIL import Image
import io
import pptx
from pptx.enum.shapes import MSO_SHAPE_TYPE
# --- Custom CSS for Styling (No changes needed) ---
custom_css = """
.gradio-container {
font-family: 'IBM Plex Sans', sans-serif;
}
#chatbot .user {
background-color: #0072C6;
color: white;
}
#chatbot .bot {
background-color: #F0F0F0;
color: #444;
}
.prose {
white-space: pre-wrap;
}
"""
# --- Helper Functions (MODIFIED) ---
def load_student_data():
try:
df = pd.read_csv("Student List.csv")
return pd.Series(df['Student ID'].values, index=df['Student Name']).to_dict()
except FileNotFoundError:
return {}
# --- MODIFIED: now extracts both text and images ---
def extract_content_from_file(file_path):
if file_path is None:
return "", []
text = ""
images = []
path = Path(file_path)
try:
if path.suffix == ".pdf":
doc = fitz.open(path)
for page_num, page in enumerate(doc):
text += page.get_text() + "\n"
image_list = page.get_images(full=True)
for img_index, img in enumerate(image_list):
xref = img[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
pil_image = Image.open(io.BytesIO(image_bytes))
images.append(pil_image)
doc.close()
elif path.suffix in [".pptx"]:
prs = pptx.Presentation(path)
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
text += shape.text + "\n"
# --- NEW: Check for picture shapes ---
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
image_bytes = shape.image.blob
pil_image = Image.open(io.BytesIO(image_bytes))
images.append(pil_image)
except Exception as e:
return f"Error reading file: {e}", []
return text, images
# --- Gemini API Integration ---
api_key = os.environ.get("GEMINI_API_KEY")
if api_key:
genai.configure(api_key=api_key)
else:
print("Warning: GEMINI_API_KEY secret not found.")
# --- MODIFIED: This function now sends multimodal content (text + images) ---
def call_gemini_api(prompt_parts):
if not api_key:
return "Error: Gemini API key is not configured."
try:
# Use a model that explicitly supports multimodal input.
# gemini-1.5-flash is a great, fast choice for this.
model = genai.GenerativeModel('gemini-2.5-pro')
response = model.generate_content(prompt_parts)
return response.text
except Exception as e:
return f"An error occurred while calling the Gemini API: {e}"
# --- Main Application Logic (MODIFIED) ---
student_map = load_student_data()
student_names = list(student_map.keys())
def generate_direct_response(student_name, lesson_file, question):
if not student_name or not question:
return "Please select a student and enter a question.", None
student_id = student_map.get(student_name)
if not student_id:
return "Error: Student not found.", None
try:
with open(f"{student_id}.json", 'r') as f:
iep_data = json.load(f)
except FileNotFoundError:
return f"Error: IEP file for ID {student_id} not found.", None
# --- MODIFIED: Get both text and images ---
lesson_text, lesson_images = extract_content_from_file(lesson_file.name if lesson_file else None)
# --- MODIFIED: Construct a multimodal prompt ---
prompt_text = f"""
You are an expert AI assistant for teachers. Your task is to help implement a student's Individualized Education Plan (IEP) or Accommodations Plan.
Analyze the provided text AND any images from the learning material to answer the teacher's question.
IEP or Accommodations Data: {json.dumps(iep_data, indent=2)}
Learning Context (Text from lesson, activity, assessment, etc.):
{lesson_text}
Based on all the information above (including the images I've sent), please answer the teacher's question.
Your response MUST be formatted in Markdown.
However, do not use code blocks, as these do not render correctly, which would disadvantage the student!
Important: when you are asked to create materials and include explanations, make sure to use simple, accessible language for students.
Avoid jargon. For example, instead of 'Identify mu and sigma,' you should write something like, 'Find the average (also called the mean) of the numbers, and then figure out how spread out the numbers are (which is the standard deviation).'
When you have to use scientific notation, make sure to use proper LaTeX.
Question: "{question}"
"""
# The prompt must be a list of alternating text and image parts
prompt_parts = [prompt_text]
prompt_parts.extend(lesson_images) # Add all extracted images to the prompt
llm_response = call_gemini_api(prompt_parts)
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".md", encoding='utf-8') as tmpfile:
tmpfile.write(llm_response)
download_path = tmpfile.name
return llm_response, download_path
def load_student_for_chat(student_name, lesson_file):
if not student_name:
return {"context": "", "images": []}, [{"role": "assistant", "content": "Please select a student first."}]
student_id = student_map.get(student_name)
if not student_id:
return {"context": "", "images": []}, [{"role": "assistant", "content": "Error: Student not found."}]
try:
with open(f"{student_id}.json", 'r') as f:
iep_data = json.load(f)
except FileNotFoundError:
return {"context": "", "images": []}, [{"role": "assistant", "content": f"Error: IEP file for ID {student_id} not found."}]
# --- MODIFIED: Get both text and images and store them in the state ---
lesson_text, lesson_images = extract_content_from_file(lesson_file.name if lesson_file else None)
context = f"""
You are an expert AI assistant for teachers. A teacher has loaded the data for a specific student and needs help.
The student's full IEP or Accommodations data is: {json.dumps(iep_data)}
The provided learning context (lesson, activity, assessment, etc.) is: {lesson_text}
The context might also include images, which will be provided with the user's prompts. Analyze both the text and images when responding.
"""
initial_message = f"The data for Student ID {student_id} has been loaded, including any text and images from the lesson plan. How can I help you with this student?"
# --- MODIFIED: Store images in the chat state ---
state_data = {"context": context, "images": lesson_images}
chat_history = [{"role": "assistant", "content": initial_message}]
return state_data, chat_history
def chat_with_student_context(message, history, state):
context = state.get("context", "")
# --- MODIFIED: Retrieve images from the state ---
images = state.get("images", [])
if not context:
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": "Please load a student's information before starting the chat."})
return history
history.append({"role": "user", "content": message})
# --- MODIFIED: Build a multimodal prompt for the chat ---
prompt_parts = [context] # Start with the base context
prompt_parts.extend(images) # Add the images from the lesson
# Add the conversation history
conversation_so_far = "\n\nHere is the conversation so far:\n"
for turn in history:
role = "Teacher" if turn['role'] == 'user' else "Assistant"
conversation_so_far += f"{role}: {turn['content']}\n"
conversation_so_far += "Assistant:"
prompt_parts.append(conversation_so_far)
response = call_gemini_api(prompt_parts)
history.append({"role": "assistant", "content": response})
return history
# --- Gradio UI ---
APP_PASSWORD = os.environ.get("GRADIO_PASSWORD", "default_password")
with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="blue")) as demo:
with gr.Row(visible=True) as login_screen:
with gr.Column():
gr.Markdown("<h1 style='text-align: center;'>MS Academic Support Assistant</h1><h3 style='text-align: center;'>Please enter the password to continue.</h3>")
password_input = gr.Textbox(label="Password", type="password", placeholder="Enter password...")
login_button = gr.Button("Login", variant="primary")
error_message = gr.Markdown()
with gr.Row(visible=False) as main_app:
with gr.Column():
gr.Markdown("<h1 style='text-align: center; color: #0072C6;'>MS Academic Support Assistant</h1>")
gr.Markdown("<h5 style='text-align: center; color: #0072C6;'>In line with our AI Core Values, do not mention PII and review all outputs. Student names are not shared with the AI.</h5>")
with gr.Tabs():
with gr.TabItem("Chat Mode"):
chat_state = gr.State({})
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 1. Load Student Context")
chat_student_dropdown = gr.Dropdown(student_names, label="Select Student")
chat_lesson_upload = gr.File(label="Upload Lesson Plan (Optional)", file_types=['.pdf', '.pptx'])
load_btn = gr.Button("Load Student for Chat", variant="secondary")
with gr.Column(scale=2):
gr.Markdown("### 2. Chat with the Assistant")
chatbot = gr.Chatbot(label="Conversation", elem_id="chatbot", height=500, type="messages")
chat_input = gr.Textbox(label="Your Message", placeholder="Ask a follow-up question...", show_label=False)
def handle_chat_submission(message, history, state):
response_history = chat_with_student_context(message, history, state)
return response_history, ""
chat_input.submit(
handle_chat_submission,
inputs=[chat_input, chatbot, chat_state],
outputs=[chatbot, chat_input]
)
load_btn.click(
fn=load_student_for_chat,
inputs=[chat_student_dropdown, chat_lesson_upload],
outputs=[chat_state, chatbot]
)
with gr.TabItem("Direct Response"):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 1. Provide Context")
direct_student_dropdown = gr.Dropdown(student_names, label="Select Student")
direct_lesson_upload = gr.File(label="Upload Lesson Plan (Optional)", file_types=['.pdf', '.pptx'])
direct_question = gr.Textbox(label="2. Ask Your Question", lines=5, placeholder="e.g., How can I adapt this lesson's group activity for this student?")
direct_submit_btn = gr.Button("Generate Response", variant="primary")
with gr.Column(scale=2):
gr.Markdown("### Assistant's Response")
direct_output = gr.Markdown(elem_classes="prose")
direct_download = gr.File(label="Download Response (.md)", interactive=False)
direct_submit_btn.click(
fn=generate_direct_response,
inputs=[direct_student_dropdown, direct_lesson_upload, direct_question],
outputs=[direct_output, direct_download]
)
def login(password):
if password == APP_PASSWORD:
return gr.update(visible=False), gr.update(visible=True), ""
else:
return gr.update(visible=True), gr.update(visible=False), "<p style='color: red; text-align: center;'>Incorrect password. Please try again.</p>"
login_button.click(login, inputs=[password_input], outputs=[login_screen, main_app, error_message])
demo.launch()