File size: 4,251 Bytes
c5e8e78
6576a84
1e94153
a87f280
8a7c987
 
a87f280
6576a84
c5e8e78
9c68e04
63120d8
bd611d0
9c68e04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6de28e
9c68e04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63120d8
 
 
 
 
 
1e94153
 
2abcde6
63120d8
 
9c68e04
 
 
 
 
e484d2e
 
 
 
 
 
 
 
 
 
 
 
 
 
d6de28e
e484d2e
 
9c68e04
 
6576a84
 
 
 
 
 
 
2350c0e
6576a84
 
c5e8e78
9c68e04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e94153
63120d8
 
 
 
 
 
 
 
 
 
 
 
 
b35e180
1e94153
 
63120d8
c5e8e78
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from sentence_transformers import SentenceTransformer

#hello -> this is irede

client = InferenceClient("microsoft/phi-4")

#Loading the bio spec txt file

with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f:
    bio_spec_text = f.read()

#process file function
def preprocess_text(text):
    cleaned_text = text.strip()
    chunks = cleaned_text.split("\n")
    cleaned_chunks = []

    for chunk in chunks:
        chunk = chunk.strip()
        if chunk != "":
            cleaned_chunks.append(chunk)
    return cleaned_chunks

#Splitting the file
bio_chunks = preprocess_text(bio_spec_text)

#Loading sentance transformer model and then embedding the chunks (idrk it was on colab)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)

#Query embedding (on colab step 5)

def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)

    query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
    chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)

    similarities = torch.matmul(chunks_norm, query_norm)

    top_indices = torch.topk(similarities, k=top_k).indices

    return [text_chunks[i] for i in top_indices]

def set_topic(topic):
    global chosen_topic
    chosen_topic = topic
    return f"โœ… Great! You've chosen **{topic}**. Let's start your study session."

    

def respond(message, history):
    global chosen_topic

    #Getting the relevnt parts from the txt file
    relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
    spec_content = "\n".join(relevant_chunks)

    system_prompt = (
        f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students "
        f"under the UK education system, preparing them for GCSEs within the next two years. "
        f"You are tutoring AQA GCSE Biology at both higher and foundation levels. "
        f"Do not include content beyond this scope. "
        f"You will be teaching them about {chosen_topic}. "
        f"First, provide the user with information on the topic in small, digestible sections, "
        f"preferably with each section as separate text. Always keep the aim of teaching this topic in mind. "
        f"Once all the information on that specific topic has been covered, "
        f"ask the user if they have any questions. If they do, answer in a way that helps them understand better. "
        f"When the user has no more questions, give them a set of exam-style questions, one by one, "
        f"covering different areas of the topic. "
        f"The user may also request to focus on a specific area of the topic at first. "
        f"After the user answers each question, provide feedback to ensure they are exam ready before moving on. "
        f"This cycle repeats: content in small sections, check understanding, questions one by one, mark one by one, then repeat. "
        f"Use the following specification excerpts to answer:\n{spec_content}"
)


    messages = [{"role": "system", "content": system_prompt}]

    if history:
        messages.extend(history)
    messages.append({"role": "user", "content": message})
    
    response = client.chat_completion(
        messages,
        max_tokens=300
    )
    return response['choices'][0]['message']['content'].strip()



# Topic list
BIO_TOPICS = [
    "Cell Biology",
    "Organisation",
    "Infection and Response",
    "Bioenergetics",
    "Homeostasis and Response",
    "Inheritance, Variation and Evolution",
    "Ecology"
]

chosen_topic = None


# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# ACE it! ๐Ÿ“š โ€” GCSE Biology Tutor")

    with gr.Row():
        topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic")
        topic_button = gr.Button("Confirm Topic")

    topic_output = gr.Markdown()

    chatbot = gr.ChatInterface(respond, type="messages", title="ACE it!")

    topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output)

    

demo.launch()