Spaces:

tejovanth
/

exampletwo

Sleeping

App Files Files Community

tejovanth commited on Apr 17, 2025

Commit

7116300

verified ·

1 Parent(s): 5228f47

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -41

app.py CHANGED Viewed

@@ -1,64 +1,68 @@
 import gradio as gr
 from transformers import pipeline
 import fitz  # PyMuPDF for PDFs
-import pytesseract
 from PIL import Image
-# Load Hugging Face summarization model
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
-# Extract text from PDF
-def extract_text_from_pdf(pdf_file):
-    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
-    text = ""
-    for page in doc:
-        text += page.get_text()
-    return text
-# Extract text from TXT file
-def extract_text_from_txt(txt_file):
-    return txt_file.read().decode("utf-8")
-# Extract text from image using OCR
-def extract_text_from_image(image_file):
-    image = Image.open(image_file)
-    return pytesseract.image_to_string(image)
-# Main function to handle various formats
-def summarize_uploaded_file(file):
-    try:
-        file_type = file.name.split(".")[-1].lower()
-        if file_type == "pdf":
-            text = extract_text_from_pdf(file)
-        elif file_type == "txt":
-            text = extract_text_from_txt(file)
-        elif file_type in ["jpg", "jpeg", "png"]:
-            text = extract_text_from_image(file)
         else:
-            return "❌ Unsupported file type. Please upload PDF, TXT, or an image file."
-        if len(text.strip()) == 0:
-            return "❌ The file seems empty or has no readable content."
-        text = text[:3000]  # Truncate for summarization model
-        summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
-        return summary[0]['summary_text']
-    except Exception as e:
-        return f"❌ Error: {str(e)}"
 # Gradio UI
 demo = gr.Interface(
-    fn=summarize_uploaded_file,
-    inputs=gr.File(label="📄 Upload Notes (PDF, TXT, or Handwritten Image)", type="file"),
-    outputs=gr.Textbox(label="📝 Summary"),
-    title="🧠 Universal Note Summarizer",
-    description="Upload academic notes as PDF, text, or an image of handwritten notes. The app extracts and summarizes the content using a Hugging Face model and OCR."
 )
 demo.launch()

 import gradio as gr
 from transformers import pipeline
 import fitz  # PyMuPDF for PDFs
+import pytesseract  # For OCR (images)
 from PIL import Image
+import io
+# Load summarization model
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
+# Function to extract text from different file types
+def extract_text(file_obj):
+    try:
+        # Read the file content
+        file_bytes = file_obj.read()
+        file_obj.seek(0)
+        header = file_bytes[:4]
+        # Determine file type based on magic numbers
+        if header.startswith(b'%PDF'):
+            doc = fitz.open(stream=file_bytes, filetype="pdf")
+            text = ""
+            for page in doc:
+                text += page.get_text()
+            return text
+        elif header.startswith(b'\xFF\xD8') or header.startswith(b'\x89PNG'):
+            # It's an image (JPEG/PNG), use OCR
+            image = Image.open(io.BytesIO(file_bytes))
+            return pytesseract.image_to_string(image)
         else:
+            # Try reading as plain text
+            try:
+                return file_bytes.decode("utf-8")
+            except UnicodeDecodeError:
+                return "❌ Unsupported file format or corrupted file."
+    except Exception as e:
+        return f"❌ Error reading file: {str(e)}"
+# Summarize the extracted text
+def summarize_file(file_obj):
+    text = extract_text(file_obj)
+    if not text or len(text.strip()) == 0:
+        return "❌ No text found in the uploaded file."
+    # Truncate to fit model token limit
+    text = text[:3000]
+    summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
+    return summary[0]["summary_text"]
 # Gradio UI
 demo = gr.Interface(
+    fn=summarize_file,
+    inputs=gr.File(label="📄 Upload Notes (PDF, TXT, or Handwritten Image)", type="binary"),
+    outputs=gr.Textbox(label="📝 Summarized Notes"),
+    title="📚 Note Summarizer",
+    description="Upload academic notes in PDF, TXT, or image format. This app extracts and summarizes the content using a Hugging Face transformer model."
 )
 demo.launch()