indiapuig commited on
Commit
4db306c
·
verified ·
1 Parent(s): b1c0cd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -70
app.py CHANGED
@@ -3,47 +3,38 @@ from huggingface_hub import InferenceClient
3
  import torch
4
  from sentence_transformers import SentenceTransformer
5
 
 
6
  client = InferenceClient("microsoft/phi-4")
7
 
8
- #Loading the bio spec txt file
9
-
10
  with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f:
11
  bio_spec_text = f.read()
12
 
13
- #process file function
14
  def preprocess_text(text):
15
- cleaned_text = text.strip()
16
- chunks = cleaned_text.split("\n")
17
- cleaned_chunks = []
18
-
19
- for chunk in chunks:
20
- chunk = chunk.strip()
21
- if chunk != "":
22
- cleaned_chunks.append(chunk)
23
- return cleaned_chunks
24
 
25
- #Splitting the file
26
  bio_chunks = preprocess_text(bio_spec_text)
27
 
28
- #Loading sentance transformer model and then embedding the chunks (idrk it was on colab)
29
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
30
-
31
  chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)
32
 
33
- #Query embedding (on colab step 5)
34
-
35
  def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
36
  query_embedding = embedding_model.encode(query, convert_to_tensor=True)
37
-
38
  query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
39
  chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)
40
-
41
  similarities = torch.matmul(chunks_norm, query_norm)
42
-
43
  top_indices = torch.topk(similarities, k=top_k).indices
44
-
45
  return [text_chunks[i] for i in top_indices]
46
 
 
 
 
 
 
47
  def set_topic(topic):
48
  global chosen_topic
49
  chosen_topic = topic
@@ -54,54 +45,37 @@ def set_mode(mode):
54
  chosen_mode = mode
55
  return f"You have selected **{mode}** mode."
56
 
 
 
57
  if chosen_mode == "exam mode":
58
- note = ""
59
-
60
-
61
-
62
-
63
-
 
 
 
 
 
 
64
  def respond(message, history):
65
- global chosen_topic
66
-
67
- #Getting the relevnt parts from the txt file
68
  relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
69
  spec_content = "\n".join(relevant_chunks)
70
 
71
- system_prompt = (
72
- f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students "
73
- f"under the UK education system, preparing them for GCSEs within the next two years. "
74
- f"You are tutoring AQA GCSE Biology at both higher and foundation levels. "
75
- f"Do not include content beyond this scope. "
76
- f"You will be teaching them about {chosen_topic}. "
77
- f"First, provide the user with information on the topic in small, digestible sections, "
78
- f"preferably with each section as separate text. Always keep the aim of teaching this topic in mind. "
79
- f"Once all the information on that specific topic has been covered, "
80
- f"ask the user if they have any questions. If they do, answer in a way that helps them understand better. "
81
- f"When the user has no more questions, give them a set of exam-style questions, one by one, "
82
- f"covering different areas of the topic. "
83
- f"The user may also request to focus on a specific area of the topic at first. "
84
- f"After the user answers each question, provide feedback to ensure they are exam ready before moving on. "
85
- f"This cycle repeats: content in small sections, check understanding, questions one by one, mark one by one, then repeat. "
86
- f"Use the following specification excerpts to answer:\n{spec_content}"
87
- )
88
-
89
 
90
  messages = [{"role": "system", "content": system_prompt}]
91
-
92
  if history:
93
  messages.extend(history)
94
  messages.append({"role": "user", "content": message})
95
-
96
- response = client.chat_completion(
97
- messages,
98
- max_tokens=300
99
- )
100
- return response['choices'][0]['message']['content'].strip()
101
-
102
 
 
 
103
 
104
- # Topic list
105
  BIO_TOPICS = [
106
  "Cell Biology",
107
  "Organisation",
@@ -111,36 +85,27 @@ BIO_TOPICS = [
111
  "Inheritance, Variation and Evolution",
112
  "Ecology"
113
  ]
 
114
 
115
- exam_mode = [ "exam mode", "learning mode"]
116
-
117
- chosen_topic = None
118
-
119
-
120
- # Create the Gradio interface
121
  with gr.Blocks() as demo:
122
  gr.Markdown("# ACE it! 📚 — GCSE Biology Tutor")
123
 
124
  with gr.Row():
125
  topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic")
126
  topic_button = gr.Button("Confirm Topic")
127
-
128
  topic_output = gr.Markdown()
129
 
130
  with gr.Row():
131
- exam_dropdown = gr.Dropdown(choices=exam_mode, label = "Which mode would you like it")
132
  exam_button = gr.Button("Confirm mode")
133
-
134
  exam_output = gr.Markdown()
135
 
136
  chatbot = gr.ChatInterface(respond, type="messages", title="ACE it!")
137
-
138
 
139
  topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output)
140
  exam_button.click(set_mode, inputs=exam_dropdown, outputs=exam_output)
141
 
142
-
143
-
144
-
145
  demo.launch()
146
 
 
 
3
  import torch
4
  from sentence_transformers import SentenceTransformer
5
 
6
+ # Initialize the model client
7
  client = InferenceClient("microsoft/phi-4")
8
 
9
+ # Load biology specification text
 
10
  with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f:
11
  bio_spec_text = f.read()
12
 
13
+ # Preprocess the text into chunks
14
  def preprocess_text(text):
15
+ chunks = [chunk.strip() for chunk in text.strip().split("\n") if chunk.strip()]
16
+ return chunks
 
 
 
 
 
 
 
17
 
 
18
  bio_chunks = preprocess_text(bio_spec_text)
19
 
20
+ # Load sentence transformer model and encode chunks
21
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 
22
  chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)
23
 
24
+ # Retrieve the most relevant chunks
 
25
  def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
26
  query_embedding = embedding_model.encode(query, convert_to_tensor=True)
 
27
  query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
28
  chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)
 
29
  similarities = torch.matmul(chunks_norm, query_norm)
 
30
  top_indices = torch.topk(similarities, k=top_k).indices
 
31
  return [text_chunks[i] for i in top_indices]
32
 
33
+ # Global state
34
+ chosen_topic = None
35
+ chosen_mode = None
36
+
37
+ # Gradio callbacks
38
  def set_topic(topic):
39
  global chosen_topic
40
  chosen_topic = topic
 
45
  chosen_mode = mode
46
  return f"You have selected **{mode}** mode."
47
 
48
+ def get_note():
49
+ global chosen_mode, chosen_topic
50
  if chosen_mode == "exam mode":
51
+ return "Ask questions one by one on GCSE Biology."
52
+ else:
53
+ return (
54
+ f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students "
55
+ f"under the UK education system, preparing them for GCSEs within the next two years. "
56
+ f"You are tutoring AQA GCSE Biology at both higher and foundation levels. "
57
+ f"Do not include content beyond this scope. "
58
+ f"You will be teaching them about {chosen_topic}. "
59
+ f"First, provide the user with information on the topic in small, digestible sections..."
60
+ )
61
+
62
+ # Chatbot response
63
  def respond(message, history):
64
+ # Get relevant chunks
 
 
65
  relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
66
  spec_content = "\n".join(relevant_chunks)
67
 
68
+ system_prompt = get_note() + "\n" + spec_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  messages = [{"role": "system", "content": system_prompt}]
 
71
  if history:
72
  messages.extend(history)
73
  messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
74
 
75
+ response = client.chat_completion(messages, max_tokens=300)
76
+ return response['choices'][0]['message']['content'].strip()
77
 
78
+ # Topic and mode lists
79
  BIO_TOPICS = [
80
  "Cell Biology",
81
  "Organisation",
 
85
  "Inheritance, Variation and Evolution",
86
  "Ecology"
87
  ]
88
+ exam_mode = ["exam mode", "learning mode"]
89
 
90
+ # Gradio interface
 
 
 
 
 
91
  with gr.Blocks() as demo:
92
  gr.Markdown("# ACE it! 📚 — GCSE Biology Tutor")
93
 
94
  with gr.Row():
95
  topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic")
96
  topic_button = gr.Button("Confirm Topic")
 
97
  topic_output = gr.Markdown()
98
 
99
  with gr.Row():
100
+ exam_dropdown = gr.Dropdown(choices=exam_mode, label="Which mode would you like it")
101
  exam_button = gr.Button("Confirm mode")
 
102
  exam_output = gr.Markdown()
103
 
104
  chatbot = gr.ChatInterface(respond, type="messages", title="ACE it!")
 
105
 
106
  topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output)
107
  exam_button.click(set_mode, inputs=exam_dropdown, outputs=exam_output)
108
 
 
 
 
109
  demo.launch()
110
 
111
+