import gradio as gr import pyttsx3 import PyPDF2 import os import time import uuid import numpy as np from gtts import gTTS from playsound import playsound from sentence_transformers import SentenceTransformer import chromadb from groq import Groq import os from dotenv import load_dotenv load_dotenv() GROQ_API_KEY = os.environ.get("GROQ_API_KEY") groq_client = Groq(api_key=GROQ_API_KEY) model = SentenceTransformer('all-MiniLM-L6-v2') client = chromadb.Client() collection = client.create_collection("echo_study") PDF_FOLDER = "." #PDF_FOLDER = "pdfs" loaded_files = {} pdf_texts = {} current_file = {"name": None} QUESTIONS = { "embedded systems": [ "How does the lecture define an Embedded System?", "What are the primary resource constraints in embedded systems?", "How do embedded systems interact with the physical world?" ], "dynamic programming": [ "What is the simplest way to define Dynamic Programming?", "How many times does DP solve each subproblem?", "What is the simple formula for Dynamic Programming" ], "mongol history": [ "Why did the Empire's huge size lead to its fall?", "What was the original goal of the British East India Company?" ] } def speak_system(text): engine = pyttsx3.init() engine.setProperty('rate', 140) engine.say(text) engine.runAndWait() def speak_user(text): audio_path = f"C:/Users/hnaal/Desktop/Echo_study/user_{uuid.uuid4()}.mp3" tts = gTTS(text=text, lang='en') tts.save(audio_path) playsound(audio_path) os.remove(audio_path) def load_all_pdfs(): speak_system("Welcome back! Ready to tackle your studies?") yield "⏳ Processing Embeddings..." for filename in os.listdir(PDF_FOLDER): if filename.endswith(".pdf"): filepath = os.path.join(PDF_FOLDER, filename) with open(filepath, "rb") as f: reader = PyPDF2.PdfReader(f) text = "" for page in reader.pages: text += page.extract_text() pdf_texts[filename] = text embedding = model.encode(text[:2000]).tolist() collection.add( documents=[text[:2000]], embeddings=[embedding], ids=[filename], metadatas=[{"source": filename}] ) name = filename.replace(".pdf", "").replace("_", " ").lower() loaded_files[name] = filename yield f"⏳ Processing: {filename}..." speak_system("All files loaded successfully.") yield "✅ Loaded: " + ", ".join(loaded_files.keys()) def update_questions(pdf_name): pdf_key = pdf_name.lower() for key in QUESTIONS: if any(word in pdf_key for word in key.split()): return gr.Dropdown(choices=QUESTIONS[key], value=QUESTIONS[key][0]) return gr.Dropdown(choices=[], value=None) def find_best_chunk(question, pdf_text): chunks = [] words = pdf_text.split() for i in range(0, len(words), 80): chunk = " ".join(words[i:i+80]) chunks.append(chunk) if not chunks: return pdf_text[:500] question_embedding = model.encode(question) chunk_embeddings = [model.encode(chunk) for chunk in chunks] similarities = [ np.dot(question_embedding, ce) / (np.linalg.norm(question_embedding) * np.linalg.norm(ce)) for ce in chunk_embeddings ] best_idx = similarities.index(max(similarities)) return chunks[best_idx] def ask_groq(question, context, file_name): response = groq_client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[ { "role": "system", "content": f"""You are EchoStudy, a warm and encouraging study partner for blind students. When answering: 1. Start with a different warm phrase each time, like: Great question!, Interesting!, Good thinking!, Let me help you with that! 2. Use a simple real-life analogy to explain if needed. 3. Answer in 2 short sentences only, very simple and brief. 4. Avoid markdown symbols like stars or hashtags. 5. End with: Would you like more details?""" }, { "role": "user", "content": f"Context from {file_name}: {context}\n\nQuestion: {question}" } ], max_tokens=80 ) return response.choices[0].message.content def demo_interaction(pdf_name, question): log = "" if pdf_name.strip().lower() != current_file["name"]: speak_system("Please say the name of your PDF file.") log += "🔊 System: Please say the name of your PDF file.\n" yield log, "" time.sleep(1) speak_user(pdf_name) log += f"🎤 Student: {pdf_name}\n" yield log, "" time.sleep(1) found = None for name in loaded_files: if any(word.lower() in name.lower() for word in pdf_name.split()): found = name break if not found: speak_system("Sorry, I could not find that file.") log += "🔊 System: Sorry, I could not find that file.\n" yield log, "Not found" return current_file["name"] = pdf_name.strip().lower() speak_system(f"Found {found}. Ready for your question.") log += f"🔊 System: Found {found}. Ready for your question.\n" yield log, found time.sleep(1) else: found = None for name in loaded_files: if any(word.lower() in name.lower() for word in pdf_name.split()): found = name break if not found: yield "File not found.", "Not found" return speak_user(question) log += f"🎤 Student: {question}\n" yield log, found time.sleep(1) target_file = loaded_files[found] pdf_text = pdf_texts[target_file] context = find_best_chunk(question, pdf_text) answer = ask_groq(question, context, found) speak_system(answer) log += f"🔊 System: {answer}\n" yield log, found with gr.Blocks() as app: gr.Markdown("# 🎓 Echo Study – Voice-First Study Assistant") gr.Markdown("♿ Designed for visually impaired students") with gr.Row(): load_btn = gr.Button("📂 Load Study Materials") load_status = gr.Textbox(label="Status") gr.Markdown("### 🎤 Demo Interaction") pdf_input = gr.Dropdown( choices=["embedded systems", "dynamic programming", "mongol history"], value="embedded systems", label="📄 Select PDF" ) question_input = gr.Dropdown( choices=QUESTIONS["embedded systems"], value=QUESTIONS["embedded systems"][0], label="❓ Select Question" ) selected_file = gr.Textbox(label="📂 Selected File", interactive=False) start_btn = gr.Button("▶️ Start Demo", variant="primary") conversation_log = gr.Textbox(label="Conversation Log", lines=10) pdf_input.change(update_questions, inputs=pdf_input, outputs=question_input) load_btn.click(load_all_pdfs, outputs=load_status, show_progress=False) start_btn.click(demo_interaction, inputs=[pdf_input, question_input], outputs=[conversation_log, selected_file]) app.launch()