AI-Tutor / app.py
Shahbazakbar's picture
Update app.py
1dbe860 verified
import fitz # PyMuPDF
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import gradio as gr
import os
# Hugging Face token from environment variable
hf_token = os.getenv("HF_TOKEN", "your-token-here")
# Quantization config for 4-bit loading
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True
)
# Load models with authentication and quantization
embedder = SentenceTransformer('all-MiniLM-L6-v2')
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
model_name,
token=hf_token,
quantization_config=quant_config,
device_map="auto",
low_cpu_mem_usage=True
)
# Text extraction function for PDFs
def extract_text_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
text = ""
for page in doc:
text += page.get_text()
return text
# RAG implementation
def create_vector_store(text):
sentences = text.split(". ")
embeddings = embedder.encode(sentences, convert_to_tensor=False)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)
return index, sentences, embeddings
def retrieve_context(query, index, sentences, embeddings, k=3):
query_embedding = embedder.encode([query], convert_to_tensor=False)
distances, indices = index.search(query_embedding, k)
return [sentences[i] for i in indices[0]]
def generate_explanation(query, context):
prompt = f"As a teacher, explain this concept: {query}\nContext: {' '.join(context)}"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=50)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Process input and explain (PDF only)
def process_input(file, query):
if not file.name.endswith('.pdf'):
return "Please upload a PDF file."
text = extract_text_from_pdf(file.name)
index, sentences, embeddings = create_vector_store(text)
context = retrieve_context(query, index, sentences, embeddings)
explanation = generate_explanation(query, context)
return explanation
# Gradio interface
interface = gr.Interface(
fn=process_input,
inputs=[
gr.File(label="Upload a PDF file", file_types=[".pdf"]),
gr.Textbox(label="Ask a question about the content")
],
outputs=gr.Textbox(label="Explanation"),
title="AI Tutor",
description="Upload a PDF file and ask questions about its content!"
)
print("App initialized successfully!")
interface.launch()