Spaces:

ha7naa
/

Echo-study

Sleeping

File size: 7,371 Bytes

import gradio as gr
import pyttsx3
import PyPDF2
import os
import time
import uuid
import numpy as np
from gtts import gTTS
from playsound import playsound
from sentence_transformers import SentenceTransformer
import chromadb
from groq import Groq


import os
from dotenv import load_dotenv
load_dotenv()
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

groq_client = Groq(api_key=GROQ_API_KEY)
model = SentenceTransformer('all-MiniLM-L6-v2')
client = chromadb.Client()
collection = client.create_collection("echo_study")
PDF_FOLDER = "."
#PDF_FOLDER = "pdfs"
loaded_files = {}
pdf_texts = {}
current_file = {"name": None}


QUESTIONS = {
    "embedded systems": [
        "How does the lecture define an Embedded System?",
        "What are the primary resource constraints in embedded systems?",
        "How do embedded systems interact with the physical world?"
    ],
    "dynamic programming": [
        "What is the simplest way to define Dynamic Programming?",
        "How many times does DP solve each subproblem?",
        "What is the simple formula for Dynamic Programming"
    ],
    "mongol history": [
        "Why did the Empire's huge size lead to its fall?",
        "What was the original goal of the British East India Company?"
    ]
}


def speak_system(text):
    engine = pyttsx3.init()
    engine.setProperty('rate', 140)
    engine.say(text)
    engine.runAndWait()

def speak_user(text):
    audio_path = f"C:/Users/hnaal/Desktop/Echo_study/user_{uuid.uuid4()}.mp3"
    tts = gTTS(text=text, lang='en')
    tts.save(audio_path)
    playsound(audio_path)
    os.remove(audio_path)


def load_all_pdfs():
    speak_system("Welcome back! Ready to tackle your studies?")
    yield "⏳ Processing Embeddings..."
    for filename in os.listdir(PDF_FOLDER):
        if filename.endswith(".pdf"):
            filepath = os.path.join(PDF_FOLDER, filename)
            with open(filepath, "rb") as f:
                reader = PyPDF2.PdfReader(f)
                text = ""
                for page in reader.pages:
                    text += page.extract_text()
                pdf_texts[filename] = text
                embedding = model.encode(text[:2000]).tolist()
                collection.add(
                    documents=[text[:2000]],
                    embeddings=[embedding],
                    ids=[filename],
                    metadatas=[{"source": filename}]
                )
                name = filename.replace(".pdf", "").replace("_", " ").lower()
                loaded_files[name] = filename
                yield f"⏳ Processing: {filename}..."
    speak_system("All files loaded successfully.")
    yield "✅ Loaded: " + ", ".join(loaded_files.keys())



def update_questions(pdf_name):
    pdf_key = pdf_name.lower()
    for key in QUESTIONS:
        if any(word in pdf_key for word in key.split()):
            return gr.Dropdown(choices=QUESTIONS[key], value=QUESTIONS[key][0])
    return gr.Dropdown(choices=[], value=None)

def find_best_chunk(question, pdf_text):
    chunks = []
    words = pdf_text.split()
    for i in range(0, len(words), 80):
        chunk = " ".join(words[i:i+80])
        chunks.append(chunk)
    if not chunks:
        return pdf_text[:500]
    question_embedding = model.encode(question)
    chunk_embeddings = [model.encode(chunk) for chunk in chunks]
    similarities = [
        np.dot(question_embedding, ce) / (np.linalg.norm(question_embedding) * np.linalg.norm(ce))
        for ce in chunk_embeddings
    ]
    best_idx = similarities.index(max(similarities))
    return chunks[best_idx]


def ask_groq(question, context, file_name):
    response = groq_client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": f"""You are EchoStudy, a warm and encouraging study partner for blind students.
When answering:
1. Start with a different warm phrase each time, like: Great question!, Interesting!, Good thinking!, Let me help you with that!
2. Use a simple real-life analogy to explain if needed.
3. Answer in 2 short sentences only, very simple and brief.
4. Avoid markdown symbols like stars or hashtags.
5. End with: Would you like more details?"""
            },
            {
                "role": "user",
                "content": f"Context from {file_name}: {context}\n\nQuestion: {question}"
            }
        ],
        max_tokens=80
    )
    return response.choices[0].message.content


def demo_interaction(pdf_name, question):
    log = ""
    if pdf_name.strip().lower() != current_file["name"]:
        speak_system("Please say the name of your PDF file.")
        log += "🔊 System: Please say the name of your PDF file.\n"
        yield log, ""
        time.sleep(1)
        speak_user(pdf_name)
        log += f"🎤 Student: {pdf_name}\n"
        yield log, ""
        time.sleep(1)
        found = None
        for name in loaded_files:
            if any(word.lower() in name.lower() for word in pdf_name.split()):
                found = name
                break
        if not found:
            speak_system("Sorry, I could not find that file.")
            log += "🔊 System: Sorry, I could not find that file.\n"
            yield log, "Not found"
            return
        current_file["name"] = pdf_name.strip().lower()
        speak_system(f"Found {found}. Ready for your question.")
        log += f"🔊 System: Found {found}. Ready for your question.\n"
        yield log, found
        time.sleep(1)
    else:
        found = None
        for name in loaded_files:
            if any(word.lower() in name.lower() for word in pdf_name.split()):
                found = name
                break
        if not found:
            yield "File not found.", "Not found"
            return
    speak_user(question)
    log += f"🎤 Student: {question}\n"
    yield log, found
    time.sleep(1)
    target_file = loaded_files[found]
    pdf_text = pdf_texts[target_file]
    context = find_best_chunk(question, pdf_text)
    answer = ask_groq(question, context, found)
    speak_system(answer)
    log += f"🔊 System: {answer}\n"
    yield log, found



with gr.Blocks() as app:
    gr.Markdown("# 🎓 Echo Study – Voice-First Study Assistant")
    gr.Markdown("♿ Designed for visually impaired students")
    with gr.Row():
        load_btn = gr.Button("📂 Load Study Materials")
        load_status = gr.Textbox(label="Status")
    gr.Markdown("### 🎤 Demo Interaction")
    pdf_input = gr.Dropdown(
        choices=["embedded systems", "dynamic programming", "mongol history"],
        value="embedded systems",
        label="📄 Select PDF"
    )
    question_input = gr.Dropdown(
        choices=QUESTIONS["embedded systems"],
        value=QUESTIONS["embedded systems"][0],
        label="❓ Select Question"
    )
    selected_file = gr.Textbox(label="📂 Selected File", interactive=False)
    start_btn = gr.Button("▶️ Start Demo", variant="primary")
    conversation_log = gr.Textbox(label="Conversation Log", lines=10)
    pdf_input.change(update_questions, inputs=pdf_input, outputs=question_input)
    load_btn.click(load_all_pdfs, outputs=load_status, show_progress=False)
    start_btn.click(demo_interaction, inputs=[pdf_input, question_input], outputs=[conversation_log, selected_file])

app.launch()