aceit / app.py
indiapuig's picture
Upload app.py
a92ab9f verified
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from sentence_transformers import SentenceTransformer
client = InferenceClient("microsoft/phi-4")
#Loading the bio spec txt file
with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f:
bio_spec_text = f.read()
#process file function
def preprocess_text(text):
cleaned_text = text.strip()
chunks = cleaned_text.split("\n")
cleaned_chunks = []
for chunk in chunks:
chunk = chunk.strip()
if chunk != "":
cleaned_chunks.append(chunk)
return cleaned_chunks
#Splitting the file
bio_chunks = preprocess_text(bio_spec_text)
#Loading sentance transformer model and then embedding the chunks (idrk it was on colab)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)
#Query embedding (on colab step 5)
def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
query_embedding = embedding_model.encode(query, convert_to_tensor=True)
query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)
similarities = torch.matmul(chunks_norm, query_norm)
top_indices = torch.topk(similarities, k=top_k).indices
return [text_chunks[i] for i in top_indices]
def set_topic(topic):
global chosen_topic
chosen_topic = topic
return f"βœ… Great! You've chosen **{topic}**. Let's start your study session."
def respond(message, history):
global chosen_topic
#Getting the relevnt parts from the txt file
relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
spec_content = "\n".join(relevant_chunks)
system_prompt = (
f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students "
f"under the UK education system, preparing them for GCSEs within the next two years. "
f"You are tutoring AQA GCSE Biology at both higher and foundation levels. "
f"Do not include content beyond this scope. "
f"You will be teaching them about {chosen_topic}. "
f"First, provide the user with information on the topic in small, digestible sections, "
f"preferably with each section as separate text. Always keep the aim of teaching this topic in mind. "
f"Once all the information on that specific topic has been covered, "
f"ask the user if they have any questions. If they do, answer in a way that helps them understand better. "
f"When the user has no more questions, give them a set of exam-style questions, one by one, "
f"covering different areas of the topic. "
f"The user may also request to focus on a specific area of the topic at first. "
f"After the user answers each question, provide feedback to ensure they are exam ready before moving on. "
f"This cycle repeats: content in small sections, check understanding, questions one by one, mark one by one, then repeat. "
f"Use the following specification excerpts to answer:\n{spec_content}"
)
messages = [{"role": "system", "content": system_prompt}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": message})
response = client.chat_completion(
messages,
max_tokens=300
)
return response['choices'][0]['message']['content'].strip()
# Topic list
BIO_TOPICS = [
"Cell Biology",
"Organisation",
"Infection and Response",
"Bioenergetics",
"Homeostasis and Response",
"Inheritance, Variation and Evolution",
"Ecology"
]
chosen_topic = None
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# ACE it! πŸ“š β€” GCSE Biology Tutor")
with gr.Row():
topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic")
topic_button = gr.Button("Confirm Topic")
topic_output = gr.Markdown()
chatbot = gr.ChatInterface(respond, type="messages", title="ACE it!")
topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output)
demo.launch()