indiapuig commited on
Commit
9c68e04
·
verified ·
1 Parent(s): 5e94baa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -17
app.py CHANGED
@@ -6,18 +6,44 @@ from sentence_transformers import SentenceTransformer
6
 
7
  client = InferenceClient("microsoft/phi-4")
8
 
9
- # Topic list
10
- BIO_TOPICS = [
11
- "Cell Biology",
12
- "Organisation",
13
- "Infection and Response",
14
- "Bioenergetics",
15
- "Homeostasis and Response",
16
- "Inheritance, Variation and Evolution",
17
- "Ecology"
18
- ]
19
 
20
- chosen_topic = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def set_topic(topic):
23
  global chosen_topic
@@ -29,14 +55,20 @@ def set_topic(topic):
29
  def respond(message, history):
30
  global chosen_topic
31
 
32
- messages = [{
33
- "role": "system",
34
- "content": f"You are a friendly GCSE Biology tutor focusing on **{chosen_topic}**." # Add full on prompt
35
- }]
 
 
 
 
 
 
 
36
 
37
  if history:
38
  messages.extend(history)
39
-
40
  messages.append({"role": "user", "content": message})
41
 
42
  response = client.chat_completion(
@@ -45,7 +77,21 @@ def respond(message, history):
45
  )
46
  return response['choices'][0]['message']['content'].strip()
47
 
48
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  # Create the Gradio interface
51
  with gr.Blocks() as demo:
 
6
 
7
  client = InferenceClient("microsoft/phi-4")
8
 
9
+ #Loading the bio spec txt file
 
 
 
 
 
 
 
 
 
10
 
11
+ with open("bio_spec.txt", "r", encoding = "utf-8") as f:
12
+ bio_spec_text = f.read()
13
+
14
+ #process file function
15
+ def preprocess_text(text):
16
+ cleaned_text = text.strip()
17
+ chunks = cleaned_text.split("\n")
18
+ cleaned_chunks = []
19
+
20
+ for chunk in chunks:
21
+ chunk = chunk.strip()
22
+ if chunk != "":
23
+ cleaned_chunks.append(chunk)
24
+ return cleaned_chunks
25
+
26
+ #Splitting the file
27
+ bio_chunks = preprocess_text(bio_spec_text)
28
+
29
+ #Loading sentance transformer model and then embedding the chunks (idrk it was on colab)
30
+ embedding_model = SentanceTransformer("all-MiniLM-L6-v2")
31
+
32
+ chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)
33
+
34
+ #Query embedding (on colab step 5)
35
+
36
+ def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
37
+ query_embedding = embedding_model.encode(query, convert_to_tensor=True)
38
+
39
+ query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
40
+ chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)
41
+
42
+ similarities = torch.matmul(chunks_norm, query_norm)
43
+
44
+ top_indices = torch.topk(similarities, k=top_k).indices
45
+
46
+ return [text_chunks[i] for i in top_indices]
47
 
48
  def set_topic(topic):
49
  global chosen_topic
 
55
  def respond(message, history):
56
  global chosen_topic
57
 
58
+ #Getting the relevnt parts from the txt file
59
+
60
+ relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
61
+ spec_content = "\n".join(relevant_chunks)
62
+
63
+ system_prompt = (
64
+ f"You are a friendly GCSE Biology tutor focusing on **{chosen_topic}**.\n"
65
+ f"Use the following specification excerpts to answer:\n{spec_context}"
66
+ )
67
+
68
+ messages = [{"role": "system", "content": system_prompt}]
69
 
70
  if history:
71
  messages.extend(history)
 
72
  messages.append({"role": "user", "content": message})
73
 
74
  response = client.chat_completion(
 
77
  )
78
  return response['choices'][0]['message']['content'].strip()
79
 
80
+
81
+
82
+ # Topic list
83
+ BIO_TOPICS = [
84
+ "Cell Biology",
85
+ "Organisation",
86
+ "Infection and Response",
87
+ "Bioenergetics",
88
+ "Homeostasis and Response",
89
+ "Inheritance, Variation and Evolution",
90
+ "Ecology"
91
+ ]
92
+
93
+ chosen_topic = None
94
+
95
 
96
  # Create the Gradio interface
97
  with gr.Blocks() as demo: