Spaces:
Runtime error
Runtime error
| import fitz # PyMuPDF | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| import gradio as gr | |
| import os | |
| # Hugging Face token from environment variable | |
| hf_token = os.getenv("HF_TOKEN", "your-token-here") | |
| # Quantization config for 4-bit loading | |
| quant_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_use_double_quant=True | |
| ) | |
| # Load models with authentication and quantization | |
| embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
| model_name = "mistralai/Mistral-7B-Instruct-v0.3" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| token=hf_token, | |
| quantization_config=quant_config, | |
| device_map="auto", | |
| low_cpu_mem_usage=True | |
| ) | |
| # Text extraction function for PDFs | |
| def extract_text_from_pdf(pdf_path): | |
| doc = fitz.open(pdf_path) | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| # RAG implementation | |
| def create_vector_store(text): | |
| sentences = text.split(". ") | |
| embeddings = embedder.encode(sentences, convert_to_tensor=False) | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(embeddings) | |
| return index, sentences, embeddings | |
| def retrieve_context(query, index, sentences, embeddings, k=3): | |
| query_embedding = embedder.encode([query], convert_to_tensor=False) | |
| distances, indices = index.search(query_embedding, k) | |
| return [sentences[i] for i in indices[0]] | |
| def generate_explanation(query, context): | |
| prompt = f"As a teacher, explain this concept: {query}\nContext: {' '.join(context)}" | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| outputs = model.generate(**inputs, max_new_tokens=50) | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Process input and explain (PDF only) | |
| def process_input(file, query): | |
| if not file.name.endswith('.pdf'): | |
| return "Please upload a PDF file." | |
| text = extract_text_from_pdf(file.name) | |
| index, sentences, embeddings = create_vector_store(text) | |
| context = retrieve_context(query, index, sentences, embeddings) | |
| explanation = generate_explanation(query, context) | |
| return explanation | |
| # Gradio interface | |
| interface = gr.Interface( | |
| fn=process_input, | |
| inputs=[ | |
| gr.File(label="Upload a PDF file", file_types=[".pdf"]), | |
| gr.Textbox(label="Ask a question about the content") | |
| ], | |
| outputs=gr.Textbox(label="Explanation"), | |
| title="AI Tutor", | |
| description="Upload a PDF file and ask questions about its content!" | |
| ) | |
| print("App initialized successfully!") | |
| interface.launch() |