Spaces:
Sleeping
Sleeping
File size: 8,553 Bytes
e8aba82 db8fbb9 e8aba82 0e53879 69d8b11 0e53879 69d8b11 0e53879 e8aba82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 |
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from sentence_transformers import SentenceTransformer
client = InferenceClient("microsoft/phi-4")
#Loading the bio spec txt file
with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f:
bio_spec_text = f.read()
#process file function
def preprocess_text(text):
cleaned_text = text.strip()
chunks = cleaned_text.split("\n")
cleaned_chunks = []
for chunk in chunks:
chunk = chunk.strip()
if chunk != "":
cleaned_chunks.append(chunk)
return cleaned_chunks
#Splitting the file
bio_chunks = preprocess_text(bio_spec_text)
#Loading sentance transformer model and then embedding the chunks (idrk it was on colab)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)
#Query embedding (on colab step 5)
def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
query_embedding = embedding_model.encode(query, convert_to_tensor=True)
query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)
similarities = torch.matmul(chunks_norm, query_norm)
top_indices = torch.topk(similarities, k=top_k).indices
return [text_chunks[i] for i in top_indices]
def set_topic(topic):
global chosen_topic
chosen_topic = topic
return f"โ
Great! You've chosen **{topic}**. Let's start your study session."
def respond(message, history):
global chosen_topic
#Getting the relevnt parts from the txt file
relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
spec_content = "\n".join(relevant_chunks)
system_prompt = (
f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students "
f"under the UK education system, preparing them for GCSEs within the next two years. "
f"You are tutoring AQA GCSE Biology at both higher and foundation levels. "
f"Do not include content beyond this scope. "
f"You will be teaching them about {chosen_topic}. "
f"First, provide the user with information on the topic in small, digestible sections, "
f"preferably with each section as separate text. Always keep the aim of teaching this topic in mind. "
f"Once all the information on that specific topic has been covered, "
f"ask the user if they have any questions. If they do, answer in a way that helps them understand better. "
f"When the user has no more questions, give them a set of exam-style questions, one by one, "
f"covering different areas of the topic. "
f"The user may also request to focus on a specific area of the topic at first. "
f"After the user answers each question, provide feedback to ensure they are exam ready before moving on. "
f"This cycle repeats: content in small sections, check understanding, questions one by one, mark one by one, then repeat. "
f"Use the following specification excerpts to answer:\n{spec_content}"
)
messages = [{"role": "system", "content": system_prompt}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": message})
response = client.chat_completion(
messages,
max_tokens=300
)
return response['choices'][0]['message']['content'].strip()
# Topic list
BIO_TOPICS = [
"Cell Biology",
"Organisation",
"Infection and Response",
"Bioenergetics",
"Homeostasis and Response",
"Inheritance, Variation and Evolution",
"Ecology"
]
chosen_topic = None
# Topic list
CELL_BIO_TOPICS = [
"Eukaryotes and prokaryotes",
"Animal and plant cells",
"Cell specialisation",
"Cell differentiation",
"Microscopy",
"Culturing microorganisms (biology only)",
"Chromosomes",
"Mitosis and the cell cycle",
"Stem cells",
"Diffusion",
"Osmosis",
"Active transport"
]
ORGANISATION_TOPICS = [
"Principles of organisation",
"The human digestive system",
"The heart and blood vessels",
"Blood",
"Coronary heart disease: a non-communicable disease",
"Health issues",
"The effect of lifestyle on some non-communicable diseases",
"Cancer",
"Plant tissues",
"Plant organ system"
]
# Topic list
INFECTION_AND_RESPONSE_TOPICS = [
"Communicable (infectious) diseases",
"Viral diseases",
"Bacterial diseases",
"Fungal diseases",
"Protist diseases",
"Human defence systems",
"Vaccination",
"Antibiotics and painkillers",
"Discovery and development of drugs",
"Producing monoclonal antibodies",
"Uses of monoclonal antibodies",
"Detection and identification of plant diseases",
"Plant defence responses"
]
BIOENERGETICS_TOPICS = [
"Photosynthetic reaction",
"Rate of photosynthesis",
"Uses of glucose from photosynthesis",
"Aerobic and anaerobic respiration",
"Response to exercise",
"Metabolism"
]
# Topic list
HOMEOSTASIS_AND_RESPONSE_TOPICS = [
"Homeostasis",
"Structure and function",
"The brain (biology only)",
"The eye (biology only)",
"Control of body temperature (biology only)",
"Human endocrine system",
"Control of blood glucose concentration",
"Maintaining water and nitrogen balance in the body (biology only)",
"Hormones in human reproduction",
"Contraception",
"The use of hormones to treat infertility (HT only)",
"Feedback systems (HT only)",
"Control and coordination",
"Use of plant hormones (HT only)",
]
INHERITANCE_VARIATION_AND_EVOLUTION_TOPICS = [
"Sexual and asexual reproduction",
"Meiosis",
"Advantages and disadvantages of sexual and asexual reproduction (biology only)",
"DNA and the genome",
"DNA structure (biology only)",
"Genetic inheritance",
"Inherited disorders",
"Sex determination",
"Variation",
"Evolution",
"Selective breeding",
"Genetic engineering",
"Cloning (biology only)", #Individual
"Theory of evolution (biology only)", #Individual
"Speciation (biology only)", #Individual
"The understanding of genetics (biology only)", #Individual
"Evidence for evolution", #Individual
"Fossils", #Individual
"Extinction", #Individual
"Resistant bacteria", #Individual
"Classification of living organisms", #Individual
]
# Topic list
ECOLOGY_TOPICS = [
"Adaptations, interdependence and competition", #Group for 4 topics below
"Communities", #Individual
"Abiotic factors",#Individual
"Biotic factors", #Individual
"Adaptations", #Individual
"Organisation of an ecosystem", #Group for 4 topics below
"Levels of organisation", #Individual
"How materials are cycled", #Individual
"Decomposition (biology only)", #Individual
"Impact of environmental change (biology only) (HT only)", #Individual
"Biodiversity and the effect of human interaction on ecosystems", # Group for 6 topics below
"Biodiversity", #Individual
"Waste management", #Individual
"Land use", #Individual
"Deforestation", #Individual
"Global warming", #Individual
"Maintaining biodiversity", #Individual
"Trophic levels in an ecosystem (biology only)", #Group for 3 topics below
"Trophic levels", #Individual
"Pyramids of biomass", #Individual
"Transfer of biomass", #Individual
"Food production (biology only)" #Group for topics below
"Factors affecting food security", #Individual
"Farming techniques", #Individual
"Sustainable fisheries", #Individual
"Role of biotechnology", #Individual
"Sustainable fisheries", #Individual
]
mode = gr.Interface(
fn=select,
inputs=[gr.Slider(value=2, minimum=1, maximum=10, step=1)],
outputs=[gr.Textbox(label="greeting", lines=3)])
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# ACE it! ๐ โ GCSE Biology Tutor")
#Choose topic
with gr.Column():
with gr.Column(scale=1):
topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic")
topic_button = gr.Button("Confirm Topic")
topic_output = gr.Markdown()
with gr.Row(scale=2):
chatbot = gr.ChatInterface(respond, type="messages", title="Ace it!")
topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output)
demo.launch()
|