Studymaker2

Sleeping

App Files Files Community

g0th commited on May 27

Commit

d881f5c

verified ·

1 Parent(s): 9cb64f3

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -39

app.py CHANGED Viewed

@@ -1,49 +1,91 @@
-import streamlit as st
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-import PyPDF2
-import torch
 import os
-st.set_page_config(page_title="Perplexity-style Q&A (Mistral Auth)", layout="wide")
-st.title("🧠 AI Study Assistant using Mistral 7B (Authenticated)")
-# ✅ Load Hugging Face token securely
-hf_token = os.getenv("HF_TOKEN")  # your Hugging Face secret name
-# ✅ Load the gated model using your token
-@st.cache_resource
-def load_model():
-    tokenizer = AutoTokenizer.from_pretrained(
-        "mistralai/Mistral-7B-Instruct-v0.1",
-        token=hf_token
-    )
     model = AutoModelForCausalLM.from_pretrained(
         "mistralai/Mistral-7B-Instruct-v0.1",
         torch_dtype=torch.float16,
         device_map="auto",
         token=hf_token
     )
-    return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
-textgen = load_model()
-# ✅ PDF parsing
-def extract_text_from_pdf(file):
-    reader = PyPDF2.PdfReader(file)
-    return "\n".join([p.extract_text() for p in reader.pages if p.extract_text()])
-# ✅ UI
-query = st.text_input("Ask a question or enter a topic:")
-uploaded_file = st.file_uploader("Or upload a PDF to use as context:", type=["pdf"])
-context = ""
-if uploaded_file:
-    context = extract_text_from_pdf(uploaded_file)
-    st.text_area("📄 Extracted PDF Text", context, height=200)
-if st.button("Generate Answer"):
-    with st.spinner("Generating answer..."):
-        prompt = f"[INST] Use the following context to answer the question:\n\n{context}\n\nQuestion: {query} [/INST]"
-        result = textgen(prompt)[0]["generated_text"]
-        st.success("Answer:")
-        st.write(result.replace(prompt, "").strip())

+import gradio as gr
 import os
+import json
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from ppt_parser import transfer_to_structure
+# ✅ Hugging Face token for gated model access
+hf_token = os.getenv("HF_TOKEN")
+# ✅ Load summarization pipeline
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+# ✅ Load Mistral 7B Instruct model
+@gr.cache()
+def load_mistral():
+    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", token=hf_token)
     model = AutoModelForCausalLM.from_pretrained(
         "mistralai/Mistral-7B-Instruct-v0.1",
         torch_dtype=torch.float16,
         device_map="auto",
         token=hf_token
     )
+    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
+    return pipe
+mistral_pipe = load_mistral()
+# ✅ Global text buffer
+extracted_text = ""
+def extract_text_from_pptx_json(parsed_json: dict) -> str:
+    text = ""
+    for slide in parsed_json.values():
+        for shape in slide.values():
+            if shape.get('type') == 'group':
+                for group_shape in shape.get('group_content', {}).values():
+                    if group_shape.get('type') == 'text':
+                        for para_key, para in group_shape.items():
+                            if para_key.startswith("paragraph_"):
+                                text += para.get("text", "") + "\n"
+            elif shape.get('type') == 'text':
+                for para_key, para in shape.items():
+                    if para_key.startswith("paragraph_"):
+                        text += para.get("text", "") + "\n"
+    return text.strip()
+def handle_pptx_upload(pptx_file):
+    global extracted_text
+    tmp_path = pptx_file.name
+    parsed_json_str, _ = transfer_to_structure(tmp_path, "images")
+    parsed_json = json.loads(parsed_json_str)
+    extracted_text = extract_text_from_pptx_json(parsed_json)
+    return extracted_text or "No readable text found in slides."
+def summarize_text():
+    global extracted_text
+    if not extracted_text:
+        return "Please upload and extract text from a PPTX file first."
+    summary = summarizer(extracted_text, max_length=200, min_length=50, do_sample=False)[0]['summary_text']
+    return summary
+def clarify_concept(question):
+    global extracted_text
+    if not extracted_text:
+        return "Please upload and extract text from a PPTX file first."
+    prompt = f"[INST] Use the following context to answer the question:\n\n{extracted_text}\n\nQuestion: {question} [/INST]"
+    response = mistral_pipe(prompt)[0]["generated_text"]
+    return response.replace(prompt, "").strip()
+# ✅ Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 AI-Powered Study Assistant for PowerPoint Lectures (Mistral 7B)")
+    pptx_input = gr.File(label="📂 Upload PPTX File", file_types=[".pptx"])
+    extract_btn = gr.Button("📜 Extract & Summarize")
+    extracted_output = gr.Textbox(label="📄 Extracted Text", lines=10, interactive=False)
+    summary_output = gr.Textbox(label="📝 Summary", interactive=False)
+    extract_btn.click(handle_pptx_upload, inputs=[pptx_input], outputs=[extracted_output])
+    extract_btn.click(summarize_text, outputs=[summary_output])
+    question = gr.Textbox(label="❓ Ask a Question")
+    ask_btn = gr.Button("💬 Ask Mistral")
+    ai_answer = gr.Textbox(label="🤖 Mistral Answer", lines=4)
+    ask_btn.click(clarify_concept, inputs=[question], outputs=[ai_answer])
+if __name__ == "__main__":
+    demo.launch()