indiapuig commited on
Commit
e8aba82
·
verified ·
1 Parent(s): ecba6c2

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -0
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ import torch
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ client = InferenceClient("microsoft/phi-4")
7
+
8
+ #Loading the bio spec txt file
9
+
10
+ with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f:
11
+ bio_spec_text = f.read()
12
+
13
+ #process file function
14
+ def preprocess_text(text):
15
+ cleaned_text = text.strip()
16
+ chunks = cleaned_text.split("\n")
17
+ cleaned_chunks = []
18
+
19
+ for chunk in chunks:
20
+ chunk = chunk.strip()
21
+ if chunk != "":
22
+ cleaned_chunks.append(chunk)
23
+ return cleaned_chunks
24
+
25
+ #Splitting the file
26
+ bio_chunks = preprocess_text(bio_spec_text)
27
+
28
+ #Loading sentance transformer model and then embedding the chunks (idrk it was on colab)
29
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
30
+
31
+ chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)
32
+
33
+ #Query embedding (on colab step 5)
34
+
35
+ def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
36
+ query_embedding = embedding_model.encode(query, convert_to_tensor=True)
37
+
38
+ query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
39
+ chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)
40
+
41
+ similarities = torch.matmul(chunks_norm, query_norm)
42
+
43
+ top_indices = torch.topk(similarities, k=top_k).indices
44
+
45
+ return [text_chunks[i] for i in top_indices]
46
+
47
+ def set_topic(topic):
48
+ global chosen_topic
49
+ chosen_topic = topic
50
+ return f"✅ Great! You've chosen **{topic}**. Let's start your study session."
51
+
52
+
53
+
54
+ def respond(message, history):
55
+ global chosen_topic
56
+
57
+ #Getting the relevnt parts from the txt file
58
+ relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
59
+ spec_content = "\n".join(relevant_chunks)
60
+
61
+ system_prompt = (
62
+ f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students "
63
+ f"under the UK education system, preparing them for GCSEs within the next two years. "
64
+ f"You are tutoring AQA GCSE Biology at both higher and foundation levels. "
65
+ f"Do not include content beyond this scope. "
66
+ f"You will be teaching them about {chosen_topic}. "
67
+ f"First, provide the user with information on the topic in small, digestible sections, "
68
+ f"preferably with each section as separate text. Always keep the aim of teaching this topic in mind. "
69
+ f"Once all the information on that specific topic has been covered, "
70
+ f"ask the user if they have any questions. If they do, answer in a way that helps them understand better. "
71
+ f"When the user has no more questions, give them a set of exam-style questions, one by one, "
72
+ f"covering different areas of the topic. "
73
+ f"The user may also request to focus on a specific area of the topic at first. "
74
+ f"After the user answers each question, provide feedback to ensure they are exam ready before moving on. "
75
+ f"This cycle repeats: content in small sections, check understanding, questions one by one, mark one by one, then repeat. "
76
+ f"Use the following specification excerpts to answer:\n{spec_content}"
77
+ )
78
+
79
+
80
+ messages = [{"role": "system", "content": system_prompt}]
81
+
82
+ if history:
83
+ messages.extend(history)
84
+ messages.append({"role": "user", "content": message})
85
+
86
+ response = client.chat_completion(
87
+ messages,
88
+ max_tokens=300
89
+ )
90
+ return response['choices'][0]['message']['content'].strip()
91
+
92
+
93
+
94
+ # Topic list
95
+ BIO_TOPICS = [
96
+ "Cell Biology",
97
+ "Organisation",
98
+ "Infection and Response",
99
+ "Bioenergetics",
100
+ "Homeostasis and Response",
101
+ "Inheritance, Variation and Evolution",
102
+ "Ecology"
103
+ ]
104
+
105
+ chosen_topic = None
106
+
107
+
108
+ # Create the Gradio interface
109
+ with gr.Blocks() as demo:
110
+ gr.Markdown("# ACE it! 📚 — GCSE Biology Tutor")
111
+
112
+ with gr.Row():
113
+ topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic")
114
+ topic_button = gr.Button("Confirm Topic")
115
+
116
+ topic_output = gr.Markdown()
117
+
118
+ chatbot = gr.ChatInterface(respond, type="messages", title="ACE it!")
119
+
120
+ topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output)
121
+
122
+
123
+
124
+ demo.launch()
125
+