Spaces:

mayzinoo
/

Geometry_Lesson

Sleeping

App Files Files Community

Geometry_Lesson / app.py

mayzinoo

Update app.py

3c2b4d9 verified 9 months ago

raw

history blame

5.55 kB

	# app.py
	import gradio as gr
	import os
	from transformers import pipeline
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	import json
	import re


	# --- Load necessary components for the RAG system ---
	# These paths are relative to the Space's root directory
	FAISS_INDEX_PATH = "sol_faiss_index.bin"
	DOCUMENT_IDS_PATH = "sol_document_ids.json"

	# Load SentenceTransformer model
	# Ensure this model is downloaded or available in the environment
	# For Spaces, you might need to add it to requirements.txt or directly download if space has internet
	# It's better to declare it globally or as a shared resource.
	try:
	model = SentenceTransformer('all-mpnet-base-v2')
	except Exception as e:
	print(f"Error loading SentenceTransformer model: {e}")
	print("Attempting to load from local cache or download on first use.")
	# If running in a Space, the model will be downloaded to cache if not present.
	# Ensure you have internet access in your Space settings.

	# Load FAISS index
	try:
	index = faiss.read_index(FAISS_INDEX_PATH)
	except Exception as e:
	print(f"Error loading FAISS index: {e}")
	# Handle error, maybe create a dummy index or exit
	index = None # Placeholder if loading fails

	# Load document IDs
	try:
	with open(DOCUMENT_IDS_PATH, "r") as f:
	document_ids = json.load(f)
	except Exception as e:
	print(f"Error loading document IDs: {e}")
	document_ids = [] # Placeholder if loading fails

	# Placeholder for the actual content of "10 Geometry Mathematics Instructional Guide.pdf"
	# In a real deployed scenario, this content would be loaded from a file
	# that you upload to your Hugging Face Space or fetched at runtime.
	# For now, we'll assume it's available or that 'documents' are pre-processed and loaded.
	# You would typically load the 'documents' list created in Step 2 here.
	# For deployment, it's best to save the `documents` list (sol_data) as a JSON
	# and load it back. Let's add that.

	# Assuming you've saved sol_data as 'sol_documents.json'
	SOL_DOCUMENTS_PATH = "sol_documents.json"
	try:
	with open(SOL_DOCUMENTS_PATH, "r") as f:
	documents = json.load(f)
	except Exception as e:
	print(f"Error loading sol documents: {e}")
	documents = [] # Placeholder

	# Load LLM for generation
	# For a Hugging Face Space, you need to ensure the model is available.
	# 'google/gemma-2b-it' is a good option.
	# Ensure you set up environment variables or secrets for API keys if using paid models.
	try:
	# llm_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	llm_pipeline = pipeline("text-generation", model="google/gemma-2b-it")
	except Exception as e:
	print(f"Error loading LLM pipeline: {e}")
	llm_pipeline = None # Placeholder


	def retrieve_and_generate_app(query, top_k=3):
	if not model or not index or not document_ids or not documents or not llm_pipeline:
	return "System not fully initialized. Please check logs for missing components."

	try:
	# 1. Query Embedding
	query_embedding = model.encode([query])

	# 2. Retrieval using FAISS
	D, I = index.search(query_embedding, top_k)

	retrieved_docs = []
	for i in I[0]:
	sol_id = document_ids[i]
	retrieved_content = next((doc["content"] for doc in documents if doc["id"] == sol_id), "Content not found.")
	retrieved_docs.append({"id": sol_id, "content": retrieved_content})

	# 3. Context Construction
	context = "\n\n".join([f"SOL {doc['id']}: {doc['content']}" for doc in retrieved_docs])

	# 4. LLM Generation
	prompt = f"""
	Given the following information about Virginia Standards of Learning (SOLs):
	{context}
	Based on this information, answer the following question:
	{query}
	If the question is about a specific SOL number, provide a direct explanation for that SOL.
	If asked for lesson plans, worksheets, or proofs, explain what the document generally entails and whether it provides such materials.
	Be concise and to the point.
	"""
	print(f"\n--- PROMPT SENT TO LLM ---\n{prompt}\n--------------------------\n")

	response = llm_pipeline(prompt, max_new_tokens=500, num_return_sequences=1, do_sample=True, temperature=0.7)

	generated_text = response[0]['generated_text']

	print(f"\n--- RAW GENERATED TEXT ---\n{generated_text}\n--------------------------\n")

	answer_start_marker = f"Based on this information, answer the following question:\n{query}"
	if answer_start_marker in generated_text:
	answer = generated_text.split(answer_start_marker, 1)[1].strip()
	answer = re.sub(r'If the question is about a specific SOL number,.*?$', '', answer, flags=re.DOTALL).strip()
	else:
	answer = generated_text

	print(f"\n--- FINAL ANSWER ---\n{answer}\n--------------------\n")
	return answer if answer else "No valid response generated. Check logs for details."
	except Exception as e:
	print(f"\n--- ERROR ---\n{str(e)}\n------------\n")
	return f"An error occurred: {str(e)}. Please check the logs for more details."

	# Create Gradio interface
	demo = gr.Interface(
	fn=retrieve_and_generate_app,
	inputs=gr.Textbox(lines=2, placeholder="Enter your geometry-related question here..."),
	outputs=gr.Textbox(label="Generated Answer"),
	title="Virginia SOL Geometry Assistant",
	description="Ask questions about the Geometry SOL Instructional Guide"
	)

	if __name__ == "__main__":
	demo.launch()