Spaces:

indiapuig
/

first_bot

Sleeping

App Files Files Community

indiapuig commited on Aug 12, 2025

Commit

9c68e04

verified ·

1 Parent(s): 5e94baa

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -17

app.py CHANGED Viewed

@@ -6,18 +6,44 @@ from sentence_transformers import SentenceTransformer
 client = InferenceClient("microsoft/phi-4")
-# Topic list
-BIO_TOPICS = [
-    "Cell Biology",
-    "Organisation",
-    "Infection and Response",
-    "Bioenergetics",
-    "Homeostasis and Response",
-    "Inheritance, Variation and Evolution",
-    "Ecology"
-]
-chosen_topic = None
 def set_topic(topic):
     global chosen_topic
@@ -29,14 +55,20 @@ def set_topic(topic):
 def respond(message, history):
     global chosen_topic
-    messages = [{
-        "role": "system",
-        "content": f"You are a friendly GCSE Biology tutor focusing on **{chosen_topic}**." # Add full on prompt
-    }]
     if history:
         messages.extend(history)
     messages.append({"role": "user", "content": message})
     response = client.chat_completion(
@@ -45,7 +77,21 @@ def respond(message, history):
     )
     return response['choices'][0]['message']['content'].strip()
 # Create the Gradio interface
 with gr.Blocks() as demo:

 client = InferenceClient("microsoft/phi-4")
+#Loading the bio spec txt file
+with open("bio_spec.txt", "r", encoding = "utf-8") as f:
+    bio_spec_text = f.read()
+#process file function
+def preprocess_text(text):
+    cleaned_text = text.strip()
+    chunks = cleaned_text.split("\n")
+    cleaned_chunks = []
+    for chunk in chunks:
+        chunk = chunk.strip()
+        if chunk != "":
+            cleaned_chunks.append(chunk)
+    return cleaned_chunks
+#Splitting the file
+bio_chunks = preprocess_text(bio_spec_text)
+#Loading sentance transformer model and then embedding the chunks (idrk it was on colab)
+embedding_model = SentanceTransformer("all-MiniLM-L6-v2")
+chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)
+#Query embedding (on colab step 5)
+def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
+    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
+    query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
+    chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)
+    similarities = torch.matmul(chunks_norm, query_norm)
+    top_indices = torch.topk(similarities, k=top_k).indices
+    return [text_chunks[i] for i in top_indices]
 def set_topic(topic):
     global chosen_topic
 def respond(message, history):
     global chosen_topic
+    #Getting the relevnt parts from the txt file
+    relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
+    spec_content = "\n".join(relevant_chunks)
+    system_prompt = (
+        f"You are a friendly GCSE Biology tutor focusing on **{chosen_topic}**.\n"
+        f"Use the following specification excerpts to answer:\n{spec_context}"
+    )
+    messages = [{"role": "system", "content": system_prompt}]
     if history:
         messages.extend(history)
     messages.append({"role": "user", "content": message})
     response = client.chat_completion(
     )
     return response['choices'][0]['message']['content'].strip()
+# Topic list
+BIO_TOPICS = [
+    "Cell Biology",
+    "Organisation",
+    "Infection and Response",
+    "Bioenergetics",
+    "Homeostasis and Response",
+    "Inheritance, Variation and Evolution",
+    "Ecology"
+]
+chosen_topic = None
 # Create the Gradio interface
 with gr.Blocks() as demo: