import gradio as gr from huggingface_hub import InferenceClient import torch from sentence_transformers import SentenceTransformer #hello -> this is irede client = InferenceClient("microsoft/phi-4") #Loading the bio spec txt file with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f: bio_spec_text = f.read() #process file function def preprocess_text(text): cleaned_text = text.strip() chunks = cleaned_text.split("\n") cleaned_chunks = [] for chunk in chunks: chunk = chunk.strip() if chunk != "": cleaned_chunks.append(chunk) return cleaned_chunks #Splitting the file bio_chunks = preprocess_text(bio_spec_text) #Loading sentance transformer model and then embedding the chunks (idrk it was on colab) embedding_model = SentenceTransformer("all-MiniLM-L6-v2") chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True) #Query embedding (on colab step 5) def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3): query_embedding = embedding_model.encode(query, convert_to_tensor=True) query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0) chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1) similarities = torch.matmul(chunks_norm, query_norm) top_indices = torch.topk(similarities, k=top_k).indices return [text_chunks[i] for i in top_indices] def set_topic(topic): global chosen_topic chosen_topic = topic return f"✅ Great! You've chosen **{topic}**. Let's start your study session." def respond(message, history): global chosen_topic #Getting the relevnt parts from the txt file relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4) spec_content = "\n".join(relevant_chunks) system_prompt = ( f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students " f"under the UK education system, preparing them for GCSEs within the next two years. " f"You are tutoring AQA GCSE Biology at both higher and foundation levels. " f"Do not include content beyond this scope. " f"You will be teaching them about {chosen_topic}. " f"First, provide the user with information on the topic in small, digestible sections, " f"preferably with each section as separate text. Always keep the aim of teaching this topic in mind. " f"Once all the information on that specific topic has been covered, " f"ask the user if they have any questions. If they do, answer in a way that helps them understand better. " f"When the user has no more questions, give them a set of exam-style questions, one by one, " f"covering different areas of the topic. " f"The user may also request to focus on a specific area of the topic at first. " f"After the user answers each question, provide feedback to ensure they are exam ready before moving on. " f"This cycle repeats: content in small sections, check understanding, questions one by one, mark one by one, then repeat. " f"Use the following specification excerpts to answer:\n{spec_content}" ) messages = [{"role": "system", "content": system_prompt}] if history: messages.extend(history) messages.append({"role": "user", "content": message}) response = client.chat_completion( messages, max_tokens=300 ) return response['choices'][0]['message']['content'].strip() # Topic list BIO_TOPICS = [ "Cell Biology", "Organisation", "Infection and Response", "Bioenergetics", "Homeostasis and Response", "Inheritance, Variation and Evolution", "Ecology" ] chosen_topic = None # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown("# ACE it! 📚 — GCSE Biology Tutor") with gr.Row(): topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic") topic_button = gr.Button("Confirm Topic") topic_output = gr.Markdown() chatbot = gr.ChatInterface(respond, type="messages", title="ACE it!") topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output) demo.launch()