File size: 8,553 Bytes
e8aba82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db8fbb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8aba82
 
 
 
 
0e53879
69d8b11
0e53879
 
 
 
 
69d8b11
0e53879
e8aba82
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from sentence_transformers import SentenceTransformer

client = InferenceClient("microsoft/phi-4")

#Loading the bio spec txt file

with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f:
    bio_spec_text = f.read()

#process file function
def preprocess_text(text):
    cleaned_text = text.strip()
    chunks = cleaned_text.split("\n")
    cleaned_chunks = []

    for chunk in chunks:
        chunk = chunk.strip()
        if chunk != "":
            cleaned_chunks.append(chunk)
    return cleaned_chunks

#Splitting the file
bio_chunks = preprocess_text(bio_spec_text)

#Loading sentance transformer model and then embedding the chunks (idrk it was on colab)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)

#Query embedding (on colab step 5)

def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)

    query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
    chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)

    similarities = torch.matmul(chunks_norm, query_norm)

    top_indices = torch.topk(similarities, k=top_k).indices

    return [text_chunks[i] for i in top_indices]

def set_topic(topic):
    global chosen_topic
    chosen_topic = topic
    return f"โœ… Great! You've chosen **{topic}**. Let's start your study session."

    

def respond(message, history):
    global chosen_topic

    #Getting the relevnt parts from the txt file
    relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
    spec_content = "\n".join(relevant_chunks)

    system_prompt = (
        f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students "
        f"under the UK education system, preparing them for GCSEs within the next two years. "
        f"You are tutoring AQA GCSE Biology at both higher and foundation levels. "
        f"Do not include content beyond this scope. "
        f"You will be teaching them about {chosen_topic}. "
        f"First, provide the user with information on the topic in small, digestible sections, "
        f"preferably with each section as separate text. Always keep the aim of teaching this topic in mind. "
        f"Once all the information on that specific topic has been covered, "
        f"ask the user if they have any questions. If they do, answer in a way that helps them understand better. "
        f"When the user has no more questions, give them a set of exam-style questions, one by one, "
        f"covering different areas of the topic. "
        f"The user may also request to focus on a specific area of the topic at first. "
        f"After the user answers each question, provide feedback to ensure they are exam ready before moving on. "
        f"This cycle repeats: content in small sections, check understanding, questions one by one, mark one by one, then repeat. "
        f"Use the following specification excerpts to answer:\n{spec_content}"
)


    messages = [{"role": "system", "content": system_prompt}]

    if history:
        messages.extend(history)
    messages.append({"role": "user", "content": message})
    
    response = client.chat_completion(
        messages,
        max_tokens=300
    )
    return response['choices'][0]['message']['content'].strip()



# Topic list
BIO_TOPICS = [
    "Cell Biology",
    "Organisation",
    "Infection and Response",
    "Bioenergetics",
    "Homeostasis and Response",
    "Inheritance, Variation and Evolution",
    "Ecology"
]

chosen_topic = None

# Topic list
CELL_BIO_TOPICS = [
    "Eukaryotes and prokaryotes",
    "Animal and plant cells",
    "Cell specialisation",
    "Cell differentiation",
    "Microscopy",
    "Culturing microorganisms (biology only)",
    "Chromosomes",
    "Mitosis and the cell cycle",
    "Stem cells",
    "Diffusion",
    "Osmosis",
    "Active transport"
]

ORGANISATION_TOPICS = [
    "Principles of organisation",
    "The human digestive system",
    "The heart and blood vessels",
    "Blood",
    "Coronary heart disease: a non-communicable disease",
    "Health issues",
    "The effect of lifestyle on some non-communicable diseases",
    "Cancer",
    "Plant tissues",
    "Plant organ system"
]

# Topic list
INFECTION_AND_RESPONSE_TOPICS = [
    "Communicable (infectious) diseases",
    "Viral diseases",
    "Bacterial diseases",
    "Fungal diseases",
    "Protist diseases",
    "Human defence systems",
    "Vaccination",
    "Antibiotics and painkillers",
    "Discovery and development of drugs",
    "Producing monoclonal antibodies",
    "Uses of monoclonal antibodies",
    "Detection and identification of plant diseases",
    "Plant defence responses"
]

BIOENERGETICS_TOPICS = [
    "Photosynthetic reaction",
    "Rate of photosynthesis",
    "Uses of glucose from photosynthesis",
    "Aerobic and anaerobic respiration",
    "Response to exercise",
    "Metabolism"
]

# Topic list
HOMEOSTASIS_AND_RESPONSE_TOPICS = [
    "Homeostasis",
    "Structure and function",
    "The brain (biology only)",
    "The eye (biology only)",
    "Control of body temperature (biology only)",
    "Human endocrine system",
    "Control of blood glucose concentration",
    "Maintaining water and nitrogen balance in the body (biology only)",
    "Hormones in human reproduction",
    "Contraception",
    "The use of hormones to treat infertility (HT only)",
    "Feedback systems (HT only)",
    "Control and coordination",
    "Use of plant hormones (HT only)",
]

INHERITANCE_VARIATION_AND_EVOLUTION_TOPICS = [
    "Sexual and asexual reproduction",
    "Meiosis",
    "Advantages and disadvantages of sexual and asexual reproduction (biology only)",
    "DNA and the genome",
    "DNA structure (biology only)",
    "Genetic inheritance",
    "Inherited disorders",
    "Sex determination",
    "Variation",
    "Evolution",
    "Selective breeding",
    "Genetic engineering",
    "Cloning (biology only)", #Individual
    "Theory of evolution (biology only)", #Individual
    "Speciation (biology only)", #Individual
    "The understanding of genetics (biology only)", #Individual
    "Evidence for evolution", #Individual
    "Fossils", #Individual
    "Extinction", #Individual
    "Resistant bacteria", #Individual 
    "Classification of living organisms", #Individual
]

# Topic list
ECOLOGY_TOPICS = [
    "Adaptations, interdependence and competition", #Group for 4 topics below
    "Communities", #Individual
    "Abiotic factors",#Individual
    "Biotic factors", #Individual
    "Adaptations", #Individual

    "Organisation of an ecosystem", #Group for 4 topics below
    "Levels of organisation", #Individual
    "How materials are cycled", #Individual
    "Decomposition (biology only)", #Individual
    "Impact of environmental change (biology only) (HT only)", #Individual
    
    "Biodiversity and the effect of human interaction on ecosystems", # Group for 6 topics below
    "Biodiversity", #Individual
    "Waste management", #Individual
    "Land use", #Individual
    "Deforestation", #Individual
    "Global warming", #Individual
    "Maintaining biodiversity", #Individual

    "Trophic levels in an ecosystem (biology only)", #Group for 3 topics below
    "Trophic levels", #Individual
    "Pyramids of biomass", #Individual
    "Transfer of biomass", #Individual

    "Food production (biology only)" #Group for topics below
    "Factors affecting food security", #Individual
    "Farming techniques", #Individual
    "Sustainable fisheries", #Individual
    "Role of biotechnology", #Individual
    "Sustainable fisheries", #Individual
]

mode = gr.Interface(
	fn=select,
	inputs=[gr.Slider(value=2, minimum=1, maximum=10, step=1)],
	outputs=[gr.Textbox(label="greeting", lines=3)])

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# ACE it! ๐Ÿ“š โ€” GCSE Biology Tutor")

    #Choose topic
    with gr.Column():
        with gr.Column(scale=1):
            topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic")
            topic_button = gr.Button("Confirm Topic")
            topic_output = gr.Markdown()
            
    with gr.Row(scale=2):
        chatbot = gr.ChatInterface(respond, type="messages", title="Ace it!")

    topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output)

    

demo.launch()