Spaces:
Sleeping
Sleeping
File size: 4,251 Bytes
c5e8e78 6576a84 1e94153 a87f280 8a7c987 a87f280 6576a84 c5e8e78 9c68e04 63120d8 bd611d0 9c68e04 d6de28e 9c68e04 63120d8 1e94153 2abcde6 63120d8 9c68e04 e484d2e d6de28e e484d2e 9c68e04 6576a84 2350c0e 6576a84 c5e8e78 9c68e04 1e94153 63120d8 b35e180 1e94153 63120d8 c5e8e78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from sentence_transformers import SentenceTransformer
#hello -> this is irede
client = InferenceClient("microsoft/phi-4")
#Loading the bio spec txt file
with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f:
bio_spec_text = f.read()
#process file function
def preprocess_text(text):
cleaned_text = text.strip()
chunks = cleaned_text.split("\n")
cleaned_chunks = []
for chunk in chunks:
chunk = chunk.strip()
if chunk != "":
cleaned_chunks.append(chunk)
return cleaned_chunks
#Splitting the file
bio_chunks = preprocess_text(bio_spec_text)
#Loading sentance transformer model and then embedding the chunks (idrk it was on colab)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)
#Query embedding (on colab step 5)
def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
query_embedding = embedding_model.encode(query, convert_to_tensor=True)
query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)
similarities = torch.matmul(chunks_norm, query_norm)
top_indices = torch.topk(similarities, k=top_k).indices
return [text_chunks[i] for i in top_indices]
def set_topic(topic):
global chosen_topic
chosen_topic = topic
return f"โ
Great! You've chosen **{topic}**. Let's start your study session."
def respond(message, history):
global chosen_topic
#Getting the relevnt parts from the txt file
relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
spec_content = "\n".join(relevant_chunks)
system_prompt = (
f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students "
f"under the UK education system, preparing them for GCSEs within the next two years. "
f"You are tutoring AQA GCSE Biology at both higher and foundation levels. "
f"Do not include content beyond this scope. "
f"You will be teaching them about {chosen_topic}. "
f"First, provide the user with information on the topic in small, digestible sections, "
f"preferably with each section as separate text. Always keep the aim of teaching this topic in mind. "
f"Once all the information on that specific topic has been covered, "
f"ask the user if they have any questions. If they do, answer in a way that helps them understand better. "
f"When the user has no more questions, give them a set of exam-style questions, one by one, "
f"covering different areas of the topic. "
f"The user may also request to focus on a specific area of the topic at first. "
f"After the user answers each question, provide feedback to ensure they are exam ready before moving on. "
f"This cycle repeats: content in small sections, check understanding, questions one by one, mark one by one, then repeat. "
f"Use the following specification excerpts to answer:\n{spec_content}"
)
messages = [{"role": "system", "content": system_prompt}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": message})
response = client.chat_completion(
messages,
max_tokens=300
)
return response['choices'][0]['message']['content'].strip()
# Topic list
BIO_TOPICS = [
"Cell Biology",
"Organisation",
"Infection and Response",
"Bioenergetics",
"Homeostasis and Response",
"Inheritance, Variation and Evolution",
"Ecology"
]
chosen_topic = None
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# ACE it! ๐ โ GCSE Biology Tutor")
with gr.Row():
topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic")
topic_button = gr.Button("Confirm Topic")
topic_output = gr.Markdown()
chatbot = gr.ChatInterface(respond, type="messages", title="ACE it!")
topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output)
demo.launch()
|