Spaces:

amritn8
/

try12

Runtime error

App Files Files Community

amritn8 commited on Jul 30, 2025

Commit

d3a1193

verified ·

1 Parent(s): 3533296

Upload 5 files

Browse files

Files changed (5) hide show

app.py +108 -0
lexpilot_adavnced.ipynb +0 -0
lexpilot_embeddings_tensor.pt +3 -0
lexpilot_trained_model.pt +3 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+import torch
+import whisper
+import PyPDF2
+import gradio as gr
+from transformers import BertTokenizerFast, BertForQuestionAnswering, pipeline
+from torch.nn.functional import softmax
+from docx import Document
+device = "cuda" if torch.cuda.is_available() else "cpu"
+qa_model = BertForQuestionAnswering.from_pretrained("deepset/bert-base-cased-squad2").to(device)
+tokenizer = BertTokenizerFast.from_pretrained("deepset/bert-base-cased-squad2")
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+whisper_model = whisper.load_model("base")
+def extract_text(file_obj):
+    ext = os.path.splitext(file_obj.name)[1].lower()
+    if ext == ".pdf":
+        reader = PyPDF2.PdfReader(file_obj)
+        return "\n".join([p.extract_text() for p in reader.pages if p.extract_text()])
+    elif ext == ".docx":
+        doc = Document(file_obj)
+        return "\n".join([p.text for p in doc.paragraphs])
+    elif ext == ".txt":
+        return file_obj.read().decode("utf-8")
+    return ""
+def summarize_text(text):
+    if len(text) < 50:
+        return "Text too short to summarize."
+    if len(text) > 1000:
+        text = text[:1000]
+    summary = summarizer(text, max_length=120, min_length=30, do_sample=False)
+    return summary[0]['summary_text']
+def ask_question(question, context):
+    inputs = tokenizer.encode_plus(question, context, return_tensors="pt", truncation=True, max_length=512).to(device)
+    with torch.no_grad():
+        outputs = qa_model(**inputs)
+    start_idx = torch.argmax(outputs.start_logits)
+    end_idx = torch.argmax(outputs.end_logits) + 1
+    score = softmax(outputs.start_logits, dim=1)[0][start_idx] * softmax(outputs.end_logits, dim=1)[0][end_idx - 1]
+    answer = tokenizer.decode(inputs["input_ids"][0][start_idx:end_idx])
+    return f"Answer: {answer.strip()}\nConfidence: {round(score.item()*100, 2)}%"
+def transcribe(audio_path):
+    result = whisper_model.transcribe(audio_path)
+    return result["text"]
+with gr.Blocks() as demo:
+    gr.Markdown("# 🎙️📄 LexPilot: Voice + Document Q&A Assistant")
+    gr.Markdown("Upload a document or paste content. Ask questions by typing or using your voice.")
+    with gr.Tab("Question Answering"):
+        with gr.Row():
+            uploaded_file = gr.File(label="Upload .pdf / .docx / .txt", file_types=[".pdf", ".docx", ".txt"])
+            pasted_text = gr.Textbox(label="Paste text manually", lines=10)
+        with gr.Row():
+            typed_question = gr.Textbox(label="Type your question")
+            audio_input = gr.Audio(source="microphone",type="filepath", label="Or speak your question")
+        qa_btn = gr.Button("Get Answer")
+        qa_output = gr.Textbox(label="Answer and Confidence", lines=3)
+        def handle_qa(file, text, typed, audio):
+            context = ""
+            if file:
+                context = extract_text(file)
+            elif text:
+                context = text
+            else:
+                return "❗ Please upload or paste content."
+            if typed:
+                question = typed
+            elif audio:
+                question = transcribe(audio)
+            else:
+                return "❗ Please speak or type a question."
+            return ask_question(question, context)
+        qa_btn.click(handle_qa, inputs=[uploaded_file, pasted_text, typed_question, audio_input], outputs=qa_output)
+    with gr.Tab("Summarization"):
+        with gr.Row():
+            sum_file = gr.File(label="Upload .pdf / .docx / .txt", file_types=[".pdf", ".docx", ".txt"])
+            sum_text = gr.Textbox(label="Or paste content", lines=10)
+        sum_btn = gr.Button("Summarize")
+        sum_output = gr.Textbox(label="Summary", lines=4)
+        def handle_summary(file, text):
+            if file:
+                context = extract_text(file)
+            elif text:
+                context = text
+            else:
+                return "❗ Please upload or paste content to summarize."
+            return summarize_text(context)
+        sum_btn.click(handle_summary, inputs=[sum_file, sum_text], outputs=sum_output)
+demo.launch()

lexpilot_adavnced.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

lexpilot_embeddings_tensor.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44736d79307d6ac511c136c62cad24901b60b0c622780e603fe2b2404737e184
+size 9438523

lexpilot_trained_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebe0ab544c7642ed9bda903288c02940d2da063d20983c64c8636c5b31a26369
+size 435656713

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch
+transformers
+PyPDF2
+python-docx
+gradio
+git+https://github.com/openai/whisper.git