Spaces:

ace-it-kwk
/

aceit

Build error

App Files Files Community

aceit / app.py

indiapuig

Upload app.py

a92ab9f verified 5 months ago

raw

history blame contribute delete

4.23 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	import torch
	from sentence_transformers import SentenceTransformer

	client = InferenceClient("microsoft/phi-4")

	#Loading the bio spec txt file

	with open("bio_spec.txt", "r", encoding="utf-8", errors="replace") as f:
	bio_spec_text = f.read()

	#process file function
	def preprocess_text(text):
	cleaned_text = text.strip()
	chunks = cleaned_text.split("\n")
	cleaned_chunks = []

	for chunk in chunks:
	chunk = chunk.strip()
	if chunk != "":
	cleaned_chunks.append(chunk)
	return cleaned_chunks

	#Splitting the file
	bio_chunks = preprocess_text(bio_spec_text)

	#Loading sentance transformer model and then embedding the chunks (idrk it was on colab)
	embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

	chunk_embeddings = embedding_model.encode(bio_chunks, convert_to_tensor=True)

	#Query embedding (on colab step 5)

	def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
	query_embedding = embedding_model.encode(query, convert_to_tensor=True)

	query_norm = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
	chunks_norm = torch.nn.functional.normalize(chunk_embeddings, p=2, dim=1)

	similarities = torch.matmul(chunks_norm, query_norm)

	top_indices = torch.topk(similarities, k=top_k).indices

	return [text_chunks[i] for i in top_indices]

	def set_topic(topic):
	global chosen_topic
	chosen_topic = topic
	return f"✅ Great! You've chosen {topic}. Let's start your study session."



	def respond(message, history):
	global chosen_topic

	#Getting the relevnt parts from the txt file
	relevant_chunks = get_top_chunks(message, chunk_embeddings, bio_chunks, top_k=4)
	spec_content = "\n".join(relevant_chunks)

	system_prompt = (
	f"You are a helpful science tutor who primarily teaches 14 to 16-year-old students "
	f"under the UK education system, preparing them for GCSEs within the next two years. "
	f"You are tutoring AQA GCSE Biology at both higher and foundation levels. "
	f"Do not include content beyond this scope. "
	f"You will be teaching them about {chosen_topic}. "
	f"First, provide the user with information on the topic in small, digestible sections, "
	f"preferably with each section as separate text. Always keep the aim of teaching this topic in mind. "
	f"Once all the information on that specific topic has been covered, "
	f"ask the user if they have any questions. If they do, answer in a way that helps them understand better. "
	f"When the user has no more questions, give them a set of exam-style questions, one by one, "
	f"covering different areas of the topic. "
	f"The user may also request to focus on a specific area of the topic at first. "
	f"After the user answers each question, provide feedback to ensure they are exam ready before moving on. "
	f"This cycle repeats: content in small sections, check understanding, questions one by one, mark one by one, then repeat. "
	f"Use the following specification excerpts to answer:\n{spec_content}"
	)


	messages = [{"role": "system", "content": system_prompt}]

	if history:
	messages.extend(history)
	messages.append({"role": "user", "content": message})

	response = client.chat_completion(
	messages,
	max_tokens=300
	)
	return response['choices'][0]['message']['content'].strip()



	# Topic list
	BIO_TOPICS = [
	"Cell Biology",
	"Organisation",
	"Infection and Response",
	"Bioenergetics",
	"Homeostasis and Response",
	"Inheritance, Variation and Evolution",
	"Ecology"
	]

	chosen_topic = None


	# Create the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# ACE it! 📚 — GCSE Biology Tutor")

	with gr.Row():
	topic_dropdown = gr.Dropdown(choices=BIO_TOPICS, label="Choose a Biology Topic")
	topic_button = gr.Button("Confirm Topic")

	topic_output = gr.Markdown()

	chatbot = gr.ChatInterface(respond, type="messages", title="ACE it!")

	topic_button.click(set_topic, inputs=topic_dropdown, outputs=topic_output)



	demo.launch()