Spaces:

amritn8
/

champ

Runtime error

App Files Files Community

amritn8 commited on Jul 30, 2025

Commit

e1982cd

verified ·

1 Parent(s): ed5f7ed

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -63

app.py CHANGED Viewed

@@ -2,18 +2,23 @@ import os
 import torch
 import whisper
 import PyPDF2
-import gradio as gr
 from transformers import BertTokenizerFast, BertForQuestionAnswering, pipeline
 from torch.nn.functional import softmax
 from docx import Document
 device = "cuda" if torch.cuda.is_available() else "cpu"
-qa_model = BertForQuestionAnswering.from_pretrained("deepset/bert-base-cased-squad2").to(device)
-tokenizer = BertTokenizerFast.from_pretrained("deepset/bert-base-cased-squad2")
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-whisper_model = whisper.load_model("base")
 def extract_text(file_obj):
     ext = os.path.splitext(file_obj.name)[1].lower()
@@ -24,10 +29,9 @@ def extract_text(file_obj):
         doc = Document(file_obj)
         return "\n".join([p.text for p in doc.paragraphs])
     elif ext == ".txt":
-        return file_obj.read().decode("utf-8")
     return ""
 def summarize_text(text):
     if len(text) < 50:
         return "Text too short to summarize."
@@ -36,7 +40,6 @@ def summarize_text(text):
     summary = summarizer(text, max_length=120, min_length=30, do_sample=False)
     return summary[0]['summary_text']
 def ask_question(question, context):
     inputs = tokenizer.encode_plus(question, context, return_tensors="pt", truncation=True, max_length=512).to(device)
     with torch.no_grad():
@@ -47,62 +50,60 @@ def ask_question(question, context):
     answer = tokenizer.decode(inputs["input_ids"][0][start_idx:end_idx])
     return f"Answer: {answer.strip()}\nConfidence: {round(score.item()*100, 2)}%"
 def transcribe(audio_path):
     result = whisper_model.transcribe(audio_path)
     return result["text"]
-with gr.Blocks() as demo:
-    gr.Markdown("# 🎙️📄 LexPilot: Voice + Document Q&A Assistant")
-    gr.Markdown("Upload a document or paste content. Ask questions by typing or using your voice.")
-    with gr.Tab("Question Answering"):
-        with gr.Row():
-            uploaded_file = gr.File(label="Upload .pdf / .docx / .txt", file_types=[".pdf", ".docx", ".txt"])
-            pasted_text = gr.Textbox(label="Paste text manually", lines=10)
-        with gr.Row():
-            typed_question = gr.Textbox(label="Type your question")
-            audio_input = gr.Audio(source="microphone",type="filepath", label="Or speak your question")
-        qa_btn = gr.Button("Get Answer")
-        qa_output = gr.Textbox(label="Answer and Confidence", lines=3)
-        def handle_qa(file, text, typed, audio):
-            context = ""
-            if file:
-                context = extract_text(file)
-            elif text:
-                context = text
-            else:
-                return "❗ Please upload or paste content."
-            if typed:
-                question = typed
-            elif audio:
-                question = transcribe(audio)
-            else:
-                return "❗ Please speak or type a question."
-            return ask_question(question, context)
-        qa_btn.click(handle_qa, inputs=[uploaded_file, pasted_text, typed_question, audio_input], outputs=qa_output)
-    with gr.Tab("Summarization"):
-        with gr.Row():
-            sum_file = gr.File(label="Upload .pdf / .docx / .txt", file_types=[".pdf", ".docx", ".txt"])
-            sum_text = gr.Textbox(label="Or paste content", lines=10)
-        sum_btn = gr.Button("Summarize")
-        sum_output = gr.Textbox(label="Summary", lines=4)
-        def handle_summary(file, text):
-            if file:
-                context = extract_text(file)
-            elif text:
-                context = text
-            else:
-                return "❗ Please upload or paste content to summarize."
-            return summarize_text(context)
-        sum_btn.click(handle_summary, inputs=[sum_file, sum_text], outputs=sum_output)
-demo.launch()

 import torch
 import whisper
 import PyPDF2
 from transformers import BertTokenizerFast, BertForQuestionAnswering, pipeline
 from torch.nn.functional import softmax
 from docx import Document
+import streamlit as st
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load models once
+@st.cache_resource
+def load_models():
+    qa_model = BertForQuestionAnswering.from_pretrained("deepset/bert-base-cased-squad2").to(device)
+    tokenizer = BertTokenizerFast.from_pretrained("deepset/bert-base-cased-squad2")
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    whisper_model = whisper.load_model("base")
+    return qa_model, tokenizer, summarizer, whisper_model
+qa_model, tokenizer, summarizer, whisper_model = load_models()
 def extract_text(file_obj):
     ext = os.path.splitext(file_obj.name)[1].lower()
         doc = Document(file_obj)
         return "\n".join([p.text for p in doc.paragraphs])
     elif ext == ".txt":
+        return file_obj.getvalue().decode("utf-8")
     return ""
 def summarize_text(text):
     if len(text) < 50:
         return "Text too short to summarize."
     summary = summarizer(text, max_length=120, min_length=30, do_sample=False)
     return summary[0]['summary_text']
 def ask_question(question, context):
     inputs = tokenizer.encode_plus(question, context, return_tensors="pt", truncation=True, max_length=512).to(device)
     with torch.no_grad():
     answer = tokenizer.decode(inputs["input_ids"][0][start_idx:end_idx])
     return f"Answer: {answer.strip()}\nConfidence: {round(score.item()*100, 2)}%"
 def transcribe(audio_path):
     result = whisper_model.transcribe(audio_path)
     return result["text"]
+st.title("🎙️📄 LexPilot: Voice + Document Q&A Assistant")
+st.write("Upload a document or paste content. Ask questions by typing or speaking.")
+tab = st.tabs(["Question Answering", "Summarization"])
+with tab[0]:
+    uploaded_file = st.file_uploader("Upload .pdf / .docx / .txt", type=["pdf", "docx", "txt"])
+    pasted_text = st.text_area("Or paste text manually", height=150)
+    typed_question = st.text_input("Type your question")
+    audio_input = st.file_uploader("Or upload audio file (wav, mp3, m4a)", type=["wav", "mp3", "m4a"])
+    if st.button("Get Answer"):
+        context = ""
+        if uploaded_file:
+            context = extract_text(uploaded_file)
+        elif pasted_text.strip():
+            context = pasted_text.strip()
+        else:
+            st.warning("❗ Please upload or paste content.")
+            st.stop()
+        if typed_question.strip():
+            question = typed_question.strip()
+        elif audio_input:
+            # Save audio temporarily
+            with open("temp_audio", "wb") as f:
+                f.write(audio_input.getbuffer())
+            question = transcribe("temp_audio")
+            st.write(f"Transcribed question: {question}")
+        else:
+            st.warning("❗ Please type or upload an audio question.")
+            st.stop()
+        answer = ask_question(question, context)
+        st.text_area("Answer and Confidence", value=answer, height=100)
+with tab[1]:
+    sum_file = st.file_uploader("Upload .pdf / .docx / .txt to summarize", type=["pdf", "docx", "txt"])
+    sum_text = st.text_area("Or paste content to summarize", height=150)
+    if st.button("Summarize"):
+        context = ""
+        if sum_file:
+            context = extract_text(sum_file)
+        elif sum_text.strip():
+            context = sum_text.strip()
+        else:
+            st.warning("❗ Please upload or paste content to summarize.")
+            st.stop()
+        summary = summarize_text(context)
+        st.text_area("Summary", value=summary, height=150)