File size: 4,099 Bytes
b7a9406
 
 
 
 
4db306c
b7a9406
 
4db306c
b7a9406
 
 
b3ced06
b7a9406
b3ced06
b7a9406
 
 
4db306c
b7a9406
 
 
4db306c
b7a9406
 
 
 
 
 
 
 
4db306c
 
 
 
 
b7a9406
 
 
b3ced06
b7a9406
79f77a4
6ec86cd
 
b3ced06
79f77a4
b3ced06
 
4db306c
b3ced06
 
 
 
 
 
1fb3294
b3ced06
 
 
 
 
4db306c
b3ced06
 
 
 
4db306c
b3ced06
4db306c
 
b7a9406
b3ced06
 
 
b7a9406
 
 
 
 
 
4db306c
 
b7a9406
4db306c
b7a9406
 
 
 
 
 
 
 
 
4db306c
b7a9406
4db306c
b7a9406
 
 
 
 
 
 
 
000f111
b3ced06
000f111
 
 
b7a9406
 
 
79f77a4
 
4ab3bbe
 
 
b7a9406
 
4db306c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from sentence_transformers import SentenceTransformer

# Initialize the model client
client = InferenceClient("microsoft/phi-4")

# Load biology specification text
with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f:
    bio_spec_text = f.read()

# Preprocess text into chunks
def preprocess_text(text):
    return [chunk.strip() for chunk in text.strip().split("\n") if chunk.strip()]

bio_chunks = preprocess_text(bio_spec_text)

# Load sentence transformer model and encode chunks
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)

# Retrieve the most relevant chunks
def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
    query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
    chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)
    similarities = torch.matmul(chunks_norm, query_norm)
    top_indices = torch.topk(similarities, k=top_k).indices
    return [text_chunks[i] for i in top_indices]

# Global state
chosen_topic = None
chosen_mode = None

# Gradio callbacks
def set_topic(topic):
    global chosen_topic
    chosen_topic = topic
    return f"βœ… Great! You've chosen **{topic}**."

def set_mode(mode):
    global chosen_mode
    chosen_mode = mode
    return f"βœ… You have selected **{mode}** mode."

# Generate system prompt based on mode
def get_system_prompt():
    global chosen_mode, chosen_topic
    if not chosen_topic or not chosen_mode:
        return "Please select both a topic and a mode to start."
    
    relevant_chunks = get_top_chunks(chosen_topic, chunk_embeddings, bio_chunks, top_k=4)
    spec_content = "\n".join(relevant_chunks)

    if chosen_mode == "exam mode":
        prompt = (
            f"You are now asking **GCSE Biology exam-style questions** on the topic '{chosen_topic}'. "
            f"Ask one question at a time. Wait for the user to answer before giving feedback or asking the next question. "
            f"Use the following specification excerpts as reference:\n{spec_content}"
        )
    else:
        prompt = (
            f"You are a helpful science tutor teaching '{chosen_topic}' in small, digestible sections. "
            f"Focus on clear explanations suitable for 14-16 year olds. "
            f"Use the following specification excerpts as reference:\n{spec_content}"
        )
    return prompt

# Chatbot response
def respond(message, history):
    system_prompt = get_system_prompt()
    if "Please select both" in system_prompt:
        return system_prompt  # Early return if topic or mode not selected

    messages = [{"role": "system", "content": system_prompt}]
    if history:
        messages.extend(history)
    messages.append({"role": "user", "content": message})

    response = client.chat_completion(messages, max_tokens=300)
    return response['choices'][0]['message']['content'].strip()

# Topic and mode lists
BIO_TOPICS = [
    "Cell Biology",
    "Organisation",
    "Infection and Response",
    "Bioenergetics",
    "Homeostasis and Response",
    "Inheritance, Variation and Evolution",
    "Ecology"
]
exam_mode = ["exam mode", "learning mode"]

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# ACE it! πŸ“š β€” GCSE Biology Tutor")

    with gr.Row():
        topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic")
        topic_button = gr.Button("Confirm Topic")
    topic_output = gr.Markdown()

    with gr.Row():
        exam_dropdown = gr.Dropdown(choices=exam_mode, label="Which mode would you like")
        exam_button = gr.Button("Confirm mode")
    exam_output = gr.Markdown()

    chatbot = gr.ChatInterface(respond, type="messages", title="ACE it!")

    topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output)
    exam_button.click(set_mode, inputs=exam_dropdown, outputs=exam_output)




demo.launch()