Spaces:

tejovanth
/

exampletwo

Sleeping

App Files Files Community

tejovanth commited on Apr 17, 2025

Commit

6954ed2

verified ·

1 Parent(s): 8f3e454

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -15

app.py CHANGED Viewed

@@ -1,40 +1,63 @@
 import gradio as gr
 from transformers import pipeline
-import fitz  # PyMuPDF
-# Load the summarization model from Hugging Face
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-# Function to extract text from the uploaded PDF
 def extract_text_from_pdf(pdf_file):
-    doc = fitz.open(pdf_file.name)  # ✅ Use file path instead of .read()
     text = ""
     for page in doc:
         text += page.get_text()
     return text
-# Function to summarize the extracted text
-def summarize_pdf(pdf_file):
     try:
-        text = extract_text_from_pdf(pdf_file)
         if len(text.strip()) == 0:
-            return "❌ The PDF seems empty or has no extractable text."
-        text = text[:3000]  # Truncate to fit within model's token limit
         summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
         return summary[0]['summary_text']
     except Exception as e:
         return f"❌ Error: {str(e)}"
 # Gradio UI
 demo = gr.Interface(
-    fn=summarize_pdf,
-    inputs=gr.File(label="📄 Upload PDF of Academic Notes", type="file"),
-    outputs=gr.Textbox(label="📝 Summarized Notes"),
-    title="📚 Academic Note Summarizer",
-    description="Upload a PDF of your academic notes. The app extracts and summarizes the content using a Hugging Face transformer model."
 )
-# Launch the app
 demo.launch()

 import gradio as gr
 from transformers import pipeline
+import fitz  # PyMuPDF for PDFs
+import pytesseract
+from PIL import Image
+# Load Hugging Face summarization model
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+# Extract text from PDF
 def extract_text_from_pdf(pdf_file):
+    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
     text = ""
     for page in doc:
         text += page.get_text()
     return text
+# Extract text from TXT file
+def extract_text_from_txt(txt_file):
+    return txt_file.read().decode("utf-8")
+# Extract text from image using OCR
+def extract_text_from_image(image_file):
+    image = Image.open(image_file)
+    return pytesseract.image_to_string(image)
+# Main function to handle various formats
+def summarize_uploaded_file(file):
     try:
+        file_type = file.name.split(".")[-1].lower()
+        if file_type == "pdf":
+            text = extract_text_from_pdf(file)
+        elif file_type == "txt":
+            text = extract_text_from_txt(file)
+        elif file_type in ["jpg", "jpeg", "png"]:
+            text = extract_text_from_image(file)
+        else:
+            return "❌ Unsupported file type. Please upload PDF, TXT, or an image file."
         if len(text.strip()) == 0:
+            return "❌ The file seems empty or has no readable content."
+        text = text[:3000]  # Truncate for summarization model
         summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
         return summary[0]['summary_text']
     except Exception as e:
         return f"❌ Error: {str(e)}"
 # Gradio UI
 demo = gr.Interface(
+    fn=summarize_uploaded_file,
+    inputs=gr.File(label="📄 Upload Notes (PDF, TXT, or Handwritten Image)", type="file"),
+    outputs=gr.Textbox(label="📝 Summary"),
+    title="🧠 Universal Note Summarizer",
+    description="Upload academic notes as PDF, text, or an image of handwritten notes. The app extracts and summarizes the content using a Hugging Face model and OCR."
 )
 demo.launch()