import gradio as gr from huggingface_hub import InferenceClient import torch from sentence_transformers import SentenceTransformer client = InferenceClient("microsoft/phi-4") #Loading the bio spec txt file with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f: bio_spec_text = f.read() #process file function def preprocess_text(text): cleaned_text = text.strip() chunks = cleaned_text.split("\n") cleaned_chunks = [] for chunk in chunks: chunk = chunk.strip() if chunk != "": cleaned_chunks.append(chunk) return cleaned_chunks #Splitting the file bio_chunks = preprocess_text(bio_spec_text) #Loading sentance transformer model and then embedding the chunks (idrk it was on colab) embedding_model = SentenceTransformer("all-MiniLM-L6-v2") chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True) #Query embedding (on colab step 5) def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3): query_embedding = embedding_model.encode(query, convert_to_tensor=True) query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0) chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1) similarities = torch.matmul(chunks_norm, query_norm) top_indices = torch.topk(similarities, k=top_k).indices return [text_chunks[i] for i in top_indices] def set_topic(topic): global chosen_topic chosen_topic = topic return f"✅ Great! You've chosen **{topic}**. Let's start your study session." def respond(message, history): global chosen_topic #Getting the relevnt parts from the txt file relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4) spec_content = "\n".join(relevant_chunks) system_prompt = ( f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students " f"under the UK education system, preparing them for GCSEs within the next two years. " f"You are tutoring AQA GCSE Biology at both higher and foundation levels. " f"Do not include content beyond this scope. " f"You will be teaching them about {chosen_topic}. " f"First, provide the user with information on the topic in small, digestible sections, " f"preferably with each section as separate text. Always keep the aim of teaching this topic in mind. " f"Once all the information on that specific topic has been covered, " f"ask the user if they have any questions. If they do, answer in a way that helps them understand better. " f"When the user has no more questions, give them a set of exam-style questions, one by one, " f"covering different areas of the topic. " f"The user may also request to focus on a specific area of the topic at first. " f"After the user answers each question, provide feedback to ensure they are exam ready before moving on. " f"This cycle repeats: content in small sections, check understanding, questions one by one, mark one by one, then repeat. " f"Use the following specification excerpts to answer:\n{spec_content}" ) messages = [{"role": "system", "content": system_prompt}] if history: messages.extend(history) messages.append({"role": "user", "content": message}) response = client.chat_completion( messages, max_tokens=300 ) return response['choices'][0]['message']['content'].strip() # Topic list BIO_TOPICS = [ "Cell Biology", "Organisation", "Infection and Response", "Bioenergetics", "Homeostasis and Response", "Inheritance, Variation and Evolution", "Ecology" ] chosen_topic = None # Topic list CELL_BIO_TOPICS = [ "Eukaryotes and prokaryotes", "Animal and plant cells", "Cell specialisation", "Cell differentiation", "Microscopy", "Culturing microorganisms (biology only)", "Chromosomes", "Mitosis and the cell cycle", "Stem cells", "Diffusion", "Osmosis", "Active transport" ] ORGANISATION_TOPICS = [ "Principles of organisation", "The human digestive system", "The heart and blood vessels", "Blood", "Coronary heart disease: a non-communicable disease", "Health issues", "The effect of lifestyle on some non-communicable diseases", "Cancer", "Plant tissues", "Plant organ system" ] # Topic list INFECTION_AND_RESPONSE_TOPICS = [ "Communicable (infectious) diseases", "Viral diseases", "Bacterial diseases", "Fungal diseases", "Protist diseases", "Human defence systems", "Vaccination", "Antibiotics and painkillers", "Discovery and development of drugs", "Producing monoclonal antibodies", "Uses of monoclonal antibodies", "Detection and identification of plant diseases", "Plant defence responses" ] BIOENERGETICS_TOPICS = [ "Photosynthetic reaction", "Rate of photosynthesis", "Uses of glucose from photosynthesis", "Aerobic and anaerobic respiration", "Response to exercise", "Metabolism" ] # Topic list HOMEOSTASIS_AND_RESPONSE_TOPICS = [ "Homeostasis", "Structure and function", "The brain (biology only)", "The eye (biology only)", "Control of body temperature (biology only)", "Human endocrine system", "Control of blood glucose concentration", "Maintaining water and nitrogen balance in the body (biology only)", "Hormones in human reproduction", "Contraception", "The use of hormones to treat infertility (HT only)", "Feedback systems (HT only)", "Control and coordination", "Use of plant hormones (HT only)", ] INHERITANCE_VARIATION_AND_EVOLUTION_TOPICS = [ "Sexual and asexual reproduction", "Meiosis", "Advantages and disadvantages of sexual and asexual reproduction (biology only)", "DNA and the genome", "DNA structure (biology only)", "Genetic inheritance", "Inherited disorders", "Sex determination", "Variation", "Evolution", "Selective breeding", "Genetic engineering", "Cloning (biology only)", #Individual "Theory of evolution (biology only)", #Individual "Speciation (biology only)", #Individual "The understanding of genetics (biology only)", #Individual "Evidence for evolution", #Individual "Fossils", #Individual "Extinction", #Individual "Resistant bacteria", #Individual "Classification of living organisms", #Individual ] # Topic list ECOLOGY_TOPICS = [ "Adaptations, interdependence and competition", #Group for 4 topics below "Communities", #Individual "Abiotic factors",#Individual "Biotic factors", #Individual "Adaptations", #Individual "Organisation of an ecosystem", #Group for 4 topics below "Levels of organisation", #Individual "How materials are cycled", #Individual "Decomposition (biology only)", #Individual "Impact of environmental change (biology only) (HT only)", #Individual "Biodiversity and the effect of human interaction on ecosystems", # Group for 6 topics below "Biodiversity", #Individual "Waste management", #Individual "Land use", #Individual "Deforestation", #Individual "Global warming", #Individual "Maintaining biodiversity", #Individual "Trophic levels in an ecosystem (biology only)", #Group for 3 topics below "Trophic levels", #Individual "Pyramids of biomass", #Individual "Transfer of biomass", #Individual "Food production (biology only)" #Group for topics below "Factors affecting food security", #Individual "Farming techniques", #Individual "Sustainable fisheries", #Individual "Role of biotechnology", #Individual "Sustainable fisheries", #Individual ] mode = gr.Interface( fn=select, inputs=[gr.Slider(value=2, minimum=1, maximum=10, step=1)], outputs=[gr.Textbox(label="greeting", lines=3)]) # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown("# ACE it! 📚 — GCSE Biology Tutor") #Choose topic with gr.Column(): with gr.Column(scale=1): topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic") topic_button = gr.Button("Confirm Topic") topic_output = gr.Markdown() with gr.Row(scale=2): chatbot = gr.ChatInterface(respond, type="messages", title="Ace it!") topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output) demo.launch()