Spaces:

Shahbazakbar
/

AI-Tutor

Runtime error

App Files Files Community

AI-Tutor / app.py

Shahbazakbar

Update app.py

1dbe860 verified over 1 year ago

raw

history blame contribute delete

2.78 kB

	import fitz # PyMuPDF
	import numpy as np
	from sentence_transformers import SentenceTransformer
	import faiss
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	import gradio as gr
	import os

	# Hugging Face token from environment variable
	hf_token = os.getenv("HF_TOKEN", "your-token-here")

	# Quantization config for 4-bit loading
	quant_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16,
	bnb_4bit_use_double_quant=True
	)

	# Load models with authentication and quantization
	embedder = SentenceTransformer('all-MiniLM-L6-v2')
	model_name = "mistralai/Mistral-7B-Instruct-v0.3"
	tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	token=hf_token,
	quantization_config=quant_config,
	device_map="auto",
	low_cpu_mem_usage=True
	)

	# Text extraction function for PDFs
	def extract_text_from_pdf(pdf_path):
	doc = fitz.open(pdf_path)
	text = ""
	for page in doc:
	text += page.get_text()
	return text

	# RAG implementation
	def create_vector_store(text):
	sentences = text.split(". ")
	embeddings = embedder.encode(sentences, convert_to_tensor=False)
	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(embeddings)
	return index, sentences, embeddings

	def retrieve_context(query, index, sentences, embeddings, k=3):
	query_embedding = embedder.encode([query], convert_to_tensor=False)
	distances, indices = index.search(query_embedding, k)
	return [sentences[i] for i in indices[0]]

	def generate_explanation(query, context):
	prompt = f"As a teacher, explain this concept: {query}\nContext: {' '.join(context)}"
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	outputs = model.generate(**inputs, max_new_tokens=50)
	return tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Process input and explain (PDF only)
	def process_input(file, query):
	if not file.name.endswith('.pdf'):
	return "Please upload a PDF file."
	text = extract_text_from_pdf(file.name)
	index, sentences, embeddings = create_vector_store(text)
	context = retrieve_context(query, index, sentences, embeddings)
	explanation = generate_explanation(query, context)
	return explanation

	# Gradio interface
	interface = gr.Interface(
	fn=process_input,
	inputs=[
	gr.File(label="Upload a PDF file", file_types=[".pdf"]),
	gr.Textbox(label="Ask a question about the content")
	],
	outputs=gr.Textbox(label="Explanation"),
	title="AI Tutor",
	description="Upload a PDF file and ask questions about its content!"
	)

	print("App initialized successfully!")
	interface.launch()