import os
import base64
import tempfile
import html
from datetime import datetime
import gradio as gr
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
DATA_DIR = "data"
LOGO_PATH = "utas_logo.png"
COURSE_NAME = "EGETS5241 - Energy Data Analytics"
PROGRAMME = "MSc Energy Transition and Sustainability"
UNIVERSITY = "University of Technology and Applied Sciences, Muscat"
MODEL_NAME = "gpt-4o-mini"
TEMPERATURE = 0.2
TOP_K = 2
GLOBAL_INDEX = None
BASE_RULES = """
You are an interactive Energy Data Analytics tutor for Master students at UTAS Muscat.
Course: EGETS5241 - Energy Data Analytics
Programme: MSc Energy Transition and Sustainability
Help students prepare for exams using the uploaded PPT/PDF/handouts.
You can help with:
- Detailed explanation
- Short answer questions
- Long answer questions
- Numerical questions
- MCQs and quizzes
- Case studies
- Flashcards
- Important final exam questions with answers
Use simple, clear English.
Ground answers mainly in the uploaded course material.
If something is not found in the uploaded course material, say:
"This point is not directly found in the uploaded course material, so I will explain it using general Energy Data Analytics knowledge."
"""
def get_logo_html():
if not os.path.exists(LOGO_PATH):
return "
UTAS
"
with open(LOGO_PATH, "rb") as f:
encoded = base64.b64encode(f.read()).decode("utf-8")
return f"""
"""
def get_files():
os.makedirs(DATA_DIR, exist_ok=True)
return [
f for f in os.listdir(DATA_DIR)
if not f.startswith(".")
and f.lower().endswith((".pdf", ".pptx", ".docx", ".txt", ".md"))
]
def load_index():
global GLOBAL_INDEX
if GLOBAL_INDEX is not None:
return GLOBAL_INDEX, ""
if not os.getenv("OPENAI_API_KEY"):
return None, "OPENAI_API_KEY is missing. Add it in Hugging Face Space settings."
files = get_files()
if not files:
return None, "No course file found. Upload your PPT/PDF inside the data folder."
try:
documents = SimpleDirectoryReader(DATA_DIR).load_data()
embed_model = OpenAIEmbedding(model="text-embedding-3-small")
GLOBAL_INDEX = VectorStoreIndex.from_documents(
documents,
embed_model=embed_model,
show_progress=False
)
return GLOBAL_INDEX, ""
except Exception as e:
return None, f"Error loading course files: {str(e)}"
def build_prompt(question, mode):
mode_instruction = {
"Explain in detail": "Explain the topic in detail with headings, simple examples, and energy-sector relevance.",
"Short answer": "Give a short exam-style answer in 5-7 clear lines.",
"Long answer": "Give a long exam-style answer with introduction, main points, example, and conclusion.",
"Numerical question": "Create or solve a numerical question. Show formula, values, substitution, calculation, final answer, and interpretation.",
"MCQ quiz": "Create 5 MCQs with 4 options each. Give correct answer and short explanation.",
"Case study": "Create or answer a case-study question using energy analytics context.",
"Flashcards": "Generate 10 flashcards in Q/A format.",
"Important exam questions": "Generate a complete exam preparation question bank with short-answer, long-answer, MCQ, numerical, and case-study questions with model answers.",
"Teacher may ask": "Give high-probability exam-focused questions from this topic and provide model answers."
}[mode]
return f"""
{BASE_RULES}
Selected mode:
{mode_instruction}
Student question/topic:
{question}
"""
def answer_question(question, mode):
index, error = load_index()
if index is None:
return error
llm = OpenAI(model=MODEL_NAME, temperature=TEMPERATURE)
query_engine = index.as_query_engine(
llm=llm,
similarity_top_k=TOP_K,
response_mode="compact"
)
response = query_engine.query(build_prompt(question, mode))
return str(response)
def update_mode(mode):
labels = {
"Explain in detail": "📘 Explain in detail",
"Short answer": "📝 Short answer",
"Long answer": "📄 Long answer",
"Numerical question": "🧮 Numerical question",
"MCQ quiz": "✅ MCQ quiz",
"Case study": "🏭 Case study",
"Flashcards": "🧠 Flashcards",
"Important exam questions": "📚 Question Bank",
"Teacher may ask": "🎯 Likely Exam Questions"
}
return mode, f"{labels[mode]}
"
def respond(message, history, mode):
history = history or []
if not message or not message.strip():
return history, ""
history.append({"role": "user", "content": message})
try:
reply = answer_question(message, mode)
except Exception as e:
reply = f"Error: {str(e)}"
history.append({"role": "assistant", "content": reply})
return history, ""
def clean_text(content):
"""Clean Gradio chatbot message content for reports."""
if content is None:
return ""
if isinstance(content, list):
parts = []
for item in content:
if isinstance(item, dict):
parts.append(str(item.get("text", item.get("content", ""))))
else:
parts.append(str(item))
return " ".join([p for p in parts if p]).strip()
if isinstance(content, dict):
return str(content.get("text", content.get("content", ""))).strip()
return str(content).strip()
def _history_pairs(history):
history = history or []
pairs = []
current_user = None
for item in history:
if isinstance(item, dict):
role = item.get("role", "")
content = clean_text(item.get("content", ""))
if role == "user":
current_user = content
elif role == "assistant":
pairs.append((current_user or "", content))
current_user = None
elif isinstance(item, (list, tuple)) and len(item) == 2:
pairs.append((clean_text(item[0]), clean_text(item[1])))
return pairs
def export_premium_report(history):
pairs = _history_pairs(history)
if not pairs:
return None
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filepath = os.path.join(tempfile.gettempdir(), f"EGETS5241_Premium_Study_Report_{timestamp}.html")
report_time = datetime.now().strftime("%d %B %Y, %H:%M")
logo_tag = ""
if os.path.exists(LOGO_PATH):
try:
with open(LOGO_PATH, "rb") as f:
logo_encoded = base64.b64encode(f.read()).decode("utf-8")
logo_tag = f'
'
except Exception:
logo_tag = ""
toc_items = ""
sections = ""
for i, (q, a) in enumerate(pairs, start=1):
q_clean = clean_text(q)
a_clean = clean_text(a)
q_safe = html.escape(q_clean).replace("\n", "
")
a_safe = html.escape(a_clean).replace("\n", "
")
short_title = html.escape(q_clean[:80] + ("..." if len(q_clean) > 80 else ""))
toc_items += f'Question {i}: {short_title}'
sections += f"""
{q_safe}
Tutor Answer
{a_safe}
"""
html_content = f"""
EGETS5241 Study Report
Generated on: {report_time}
Total student interactions: {len(pairs)}
Purpose: Exam preparation and self-study record.
Tip: To save this report as a polished PDF, open this file in your browser and press Ctrl + P, then choose Save as PDF.
Table of Contents
{toc_items}
{sections}
"""
with open(filepath, "w", encoding="utf-8") as f:
f.write(html_content)
return filepath
def export_excel(history):
pairs = _history_pairs(history)
if not pairs:
return None
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filepath = os.path.join(tempfile.gettempdir(), f"EGETS5241_Chat_Report_{timestamp}.xlsx")
try:
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
wb = Workbook()
ws = wb.active
ws.title = "Chat Report"
ws.merge_cells("A1:C1")
ws["A1"] = "EGETS5241 - Energy Data Analytics Chat Report"
ws["A1"].font = Font(bold=True, size=18, color="850047")
ws["A1"].alignment = Alignment(horizontal="center")
ws.merge_cells("A2:C2")
ws["A2"] = "MSc Energy Transition and Sustainability | UTAS Muscat"
ws["A2"].font = Font(italic=True, size=12, color="1F2933")
ws["A2"].alignment = Alignment(horizontal="center")
ws.append([])
ws.append(["No.", "Student Question", "Tutor Answer"])
header_row = 4
header_fill = PatternFill("solid", fgColor="850047")
header_font = Font(color="FFFFFF", bold=True)
border = Border(
left=Side(style="thin", color="D6B94B"),
right=Side(style="thin", color="D6B94B"),
top=Side(style="thin", color="D6B94B"),
bottom=Side(style="thin", color="D6B94B"),
)
for cell in ws[header_row]:
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
cell.border = border
for i, (q, a) in enumerate(pairs, start=1):
ws.append([i, q, a])
for row in ws.iter_rows(min_row=5):
for cell in row:
cell.alignment = Alignment(wrap_text=True, vertical="top")
cell.border = border
fill_alt = PatternFill("solid", fgColor="FFF8DC")
for r in range(5, ws.max_row + 1):
if r % 2 == 1:
for c in range(1, 4):
ws.cell(r, c).fill = fill_alt
widths = [8, 45, 100]
for idx, width in enumerate(widths, start=1):
ws.column_dimensions[get_column_letter(idx)].width = width
ws.freeze_panes = "A5"
ws.auto_filter.ref = f"A4:C{ws.max_row}"
wb.save(filepath)
return filepath
except Exception:
fallback = filepath.replace(".xlsx", ".csv")
import csv
with open(fallback, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["No.", "Student Question", "Tutor Answer"])
for i, (q, a) in enumerate(pairs, start=1):
writer.writerow([i, q, a])
return fallback
def export_pdf(history):
pairs = _history_pairs(history)
if not pairs:
return None
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filepath = os.path.join(tempfile.gettempdir(), f"EGETS5241_Chat_Report_{timestamp}.pdf")
try:
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
from reportlab.lib.units import cm
doc = SimpleDocTemplate(
filepath,
pagesize=A4,
rightMargin=1.4 * cm,
leftMargin=1.4 * cm,
topMargin=1.3 * cm,
bottomMargin=1.3 * cm
)
styles = getSampleStyleSheet()
title_style = ParagraphStyle(
"TitleCustom",
parent=styles["Title"],
textColor=colors.HexColor("#850047"),
fontSize=20,
leading=24,
alignment=1
)
sub_style = ParagraphStyle(
"SubTitleCustom",
parent=styles["BodyText"],
textColor=colors.HexColor("#1F2933"),
fontSize=11,
leading=15,
alignment=1
)
q_style = ParagraphStyle(
"Question",
parent=styles["BodyText"],
textColor=colors.HexColor("#850047"),
fontSize=11,
leading=15,
fontName="Helvetica-Bold"
)
a_style = ParagraphStyle(
"Answer",
parent=styles["BodyText"],
textColor=colors.HexColor("#1F2933"),
fontSize=10,
leading=14
)
story = []
story.append(Paragraph("EGETS5241 - Energy Data Analytics Chat Report", title_style))
story.append(Paragraph("MSc Energy Transition and Sustainability | UTAS Muscat", sub_style))
story.append(Paragraph(f"Generated on: {datetime.now().strftime('%d %B %Y, %H:%M')}", sub_style))
story.append(Spacer(1, 14))
for i, (q, a) in enumerate(pairs, start=1):
question = Paragraph(f"Student Question {i}:
{html.escape(str(q))}", q_style)
answer_text = html.escape(str(a)).replace("\n", "
")
answer = Paragraph(f"Tutor Answer:
{answer_text}", a_style)
table = Table([[question], [answer]], colWidths=[17.5 * cm])
table.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#FFF4C7")),
("BACKGROUND", (0, 1), (-1, 1), colors.white),
("BOX", (0, 0), (-1, -1), 0.75, colors.HexColor("#D6B94B")),
("INNERGRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#E8D89A")),
("LEFTPADDING", (0, 0), (-1, -1), 8),
("RIGHTPADDING", (0, 0), (-1, -1), 8),
("TOPPADDING", (0, 0), (-1, -1), 8),
("BOTTOMPADDING", (0, 0), (-1, -1), 8),
]))
story.append(table)
story.append(Spacer(1, 10))
doc.build(story)
return filepath
except Exception:
fallback = filepath.replace(".pdf", ".txt")
with open(fallback, "w", encoding="utf-8") as f:
f.write("EGETS5241 - Energy Data Analytics Chat Report\n\n")
for i, (q, a) in enumerate(pairs, start=1):
f.write(f"Student Question {i}:\n{q}\n\nTutor Answer:\n{a}\n\n{'-'*70}\n\n")
return fallback
CSS = """
body, .gradio-container {
background: #f6f1e7 !important;
color: #1f2933 !important;
font-family: Arial, sans-serif !important;
font-size: 18px !important;
}
.gradio-container {
max-width: 1320px !important;
margin: auto !important;
}
.header {
background: linear-gradient(135deg, #5a0030, #850047, #b8860b);
color: white;
padding: 30px;
border-radius: 22px;
border-top: 7px solid #f0c84b;
box-shadow: 0 10px 25px rgba(90,0,48,0.25);
min-height: 165px;
display: flex;
flex-direction: column;
justify-content: center;
}
.header h1 {
font-size: 36px !important;
margin-bottom: 10px !important;
}
.header h3 {
font-size: 21px !important;
margin: 4px 0 !important;
}
.header p {
font-size: 16px !important;
margin: 4px 0 !important;
}
.header h1, .header h3, .header p {
color: white !important;
}
.logo-box {
background: white;
border: 2px solid #d6b94b;
border-radius: 20px;
padding: 18px;
text-align: center;
min-height: 165px;
display: flex;
align-items: center;
justify-content: center;
}
.logo-img {
width: 340px;
max-width: 100%;
display: block;
margin: auto;
}
.info-box {
background: #fff8dc;
border: 2px solid #d6b94b;
border-left: 7px solid #b8860b;
padding: 16px;
border-radius: 14px;
color: #1f2933 !important;
font-size: 18px;
line-height: 1.65;
}
.info-box, .info-box * {
color: #1f2933 !important;
opacity: 1 !important;
}
.example-box {
background: #fff8dc;
border: 2px solid #d6b94b;
padding: 16px;
border-radius: 14px;
margin-bottom: 10px;
font-size: 18px;
}
.example-box, .example-box * {
color: #1f2933 !important;
opacity: 1 !important;
}
.mode-chip {
background: #850047;
color: white !important;
padding: 14px;
border-radius: 12px;
font-weight: bold;
text-align: center;
margin: 10px 0;
font-size: 18px;
}
button {
border-radius: 12px !important;
font-weight: bold !important;
font-size: 17px !important;
}
textarea, input {
background: white !important;
color: #1f2933 !important;
font-size: 18px !important;
}
#chatbox {
background: white !important;
border: 2px solid #d6b94b !important;
border-radius: 16px !important;
font-size: 18px !important;
line-height: 1.8 !important;
}
#chatbox * {
font-size: 18px !important;
line-height: 1.8 !important;
}
footer {
display: none !important;
}
"""
def main():
with gr.Blocks(css=CSS, title="EGETS5241 Energy Data Analytics Tutor") as demo:
with gr.Row(equal_height=True):
with gr.Column(scale=1, min_width=320):
gr.HTML(get_logo_html())
with gr.Column(scale=4):
gr.HTML(f"""
""")
gr.HTML("""
How to use: Type your topic or question directly in the question box below, for example
Smart Grid, SCADA, ARIMA, or Missing Values.
Then choose the question type and click Send.
""")
with gr.Row():
with gr.Column(scale=1, min_width=330):
gr.Markdown("### Choose Question Type")
with gr.Row():
btn_explain = gr.Button("📘 Explain", variant="primary")
btn_short = gr.Button("📝 Short")
with gr.Row():
btn_long = gr.Button("📄 Long")
btn_num = gr.Button("🧮 Numerical")
with gr.Row():
btn_mcq = gr.Button("✅ MCQ")
btn_case = gr.Button("🏭 Case Study")
with gr.Row():
btn_flash = gr.Button("🧠 Flashcards")
btn_exam = gr.Button("📚 Question Bank", variant="primary")
mode = gr.State("Explain in detail")
mode_display = gr.HTML("📘 Explain in detail
")
with gr.Column(scale=3):
gr.HTML("""
Example questions:
- Give me important final exam questions for Smart Grid.
- Explain SCADA in simple words.
- Create MCQs from data preprocessing.
- Give one numerical question on load forecasting with answer.
""")
chatbot = gr.Chatbot(
label="Energy Data Analytics Chat",
height=660,
elem_id="chatbox"
)
msg = gr.Textbox(
label="Your topic or question",
placeholder="Example: Explain SARIMA in simple words / Give important exam questions for Smart Grid",
lines=3
)
with gr.Row():
send = gr.Button("Send", variant="primary")
clear = gr.Button("Clear")
gr.Markdown("### Download Interaction Report")
report_download = gr.DownloadButton(
label="📄 Generate Premium Study Report",
value=None,
variant="primary"
)
send.click(respond, inputs=[msg, chatbot, mode], outputs=[chatbot, msg])
msg.submit(respond, inputs=[msg, chatbot, mode], outputs=[chatbot, msg])
clear.click(lambda: [], inputs=[], outputs=[chatbot])
report_download.click(export_premium_report, inputs=[chatbot], outputs=[report_download])
btn_explain.click(lambda: update_mode("Explain in detail"), inputs=[], outputs=[mode, mode_display])
btn_short.click(lambda: update_mode("Short answer"), inputs=[], outputs=[mode, mode_display])
btn_long.click(lambda: update_mode("Long answer"), inputs=[], outputs=[mode, mode_display])
btn_num.click(lambda: update_mode("Numerical question"), inputs=[], outputs=[mode, mode_display])
btn_mcq.click(lambda: update_mode("MCQ quiz"), inputs=[], outputs=[mode, mode_display])
btn_case.click(lambda: update_mode("Case study"), inputs=[], outputs=[mode, mode_display])
btn_flash.click(lambda: update_mode("Flashcards"), inputs=[], outputs=[mode, mode_display])
btn_exam.click(lambda: update_mode("Important exam questions"), inputs=[], outputs=[mode, mode_display])
try:
load_index()
except Exception:
pass
demo.launch()
if __name__ == "__main__":
main()