Spaces:

mayzinoo
/

Geometry_Lesson

Sleeping

App Files Files Community

Geometry_Lesson / app.py

mayzinoo

Update app.py

2f520d0 verified 9 months ago

raw

history blame

3.6 kB

	# app.py
	import gradio as gr
	import os
	from transformers import pipeline
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	import json
	import re


	# --- Load necessary components for the RAG system ---
	# These paths are relative to the Space's root directory
	FAISS_INDEX_PATH = "sol_faiss_index.bin"
	DOCUMENT_IDS_PATH = "sol_document_ids.json"

	# Load SentenceTransformer model
	# Ensure this model is downloaded or available in the environment
	# For Spaces, you might need to add it to requirements.txt or directly download if space has internet
	# It's better to declare it globally or as a shared resource.
	try:
	model = SentenceTransformer('all-mpnet-base-v2')
	except Exception as e:
	print(f"Error loading SentenceTransformer model: {e}")
	print("Attempting to load from local cache or download on first use.")
	# If running in a Space, the model will be downloaded to cache if not present.
	# Ensure you have internet access in your Space settings.

	# Load FAISS index
	try:
	index = faiss.read_index(FAISS_INDEX_PATH)
	except Exception as e:
	print(f"Error loading FAISS index: {e}")
	# Handle error, maybe create a dummy index or exit
	index = None # Placeholder if loading fails

	# Load document IDs
	try:
	with open(DOCUMENT_IDS_PATH, "r") as f:
	document_ids = json.load(f)
	except Exception as e:
	print(f"Error loading document IDs: {e}")
	document_ids = [] # Placeholder if loading fails

	# Placeholder for the actual content of "10 Geometry Mathematics Instructional Guide.pdf"
	# In a real deployed scenario, this content would be loaded from a file
	# that you upload to your Hugging Face Space or fetched at runtime.
	# For now, we'll assume it's available or that 'documents' are pre-processed and loaded.
	# You would typically load the 'documents' list created in Step 2 here.
	# For deployment, it's best to save the `documents` list (sol_data) as a JSON
	# and load it back. Let's add that.

	# Assuming you've saved sol_data as 'sol_documents.json'
	SOL_DOCUMENTS_PATH = "sol_documents.json"
	try:
	with open(SOL_DOCUMENTS_PATH, "r") as f:
	documents = json.load(f)
	except Exception as e:
	print(f"Error loading sol documents: {e}")
	documents = [] # Placeholder

	# Load LLM for generation
	# For a Hugging Face Space, you need to ensure the model is available.
	# 'google/gemma-2b-it' is a good option.
	# Ensure you set up environment variables or secrets for API keys if using paid models.
	try:
	llm_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	except Exception as e:
	print(f"Error loading LLM pipeline: {e}")
	llm_pipeline = None # Placeholder


	def retrieve_and_generate_app(query, top_k=3):
	if not model or not index or not document_ids or not documents or not llm_pipeline:
	return "System not fully initialized. Please check logs for missing components."

	# 1. Query Embedding
	query_embedding = model.encode([query])

	# 2. Retrieval using FAISS
	D, I = index.search(query_embedding, top_k)

	retrieved_docs = []
	for i in I[0]:
	sol_id = document_ids[i]
	# Find the full content of the retrieved SOL
	# This relies on the 'documents' list being correctly loaded and matching by ID
	retrieved_content = next((doc["content"] for doc in documents if doc["id"] == sol_id), "Content not found.")
	retrieved_docs.append({"id": sol_id, "content": retrieved_content})

	context = "\n\n".join([f"SOL {doc['id']}: {doc['content']}" for doc in retrieved_docs])

	prompt = f""""""