tejovanth commited on
Commit
6954ed2
Β·
verified Β·
1 Parent(s): 8f3e454

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -15
app.py CHANGED
@@ -1,40 +1,63 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- import fitz # PyMuPDF
 
 
4
 
5
- # Load the summarization model from Hugging Face
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
 
8
- # Function to extract text from the uploaded PDF
9
  def extract_text_from_pdf(pdf_file):
10
- doc = fitz.open(pdf_file.name) # βœ… Use file path instead of .read()
11
  text = ""
12
  for page in doc:
13
  text += page.get_text()
14
  return text
15
 
16
- # Function to summarize the extracted text
17
- def summarize_pdf(pdf_file):
 
 
 
 
 
 
 
 
 
18
  try:
19
- text = extract_text_from_pdf(pdf_file)
 
 
 
 
 
 
 
 
 
 
20
  if len(text.strip()) == 0:
21
- return "❌ The PDF seems empty or has no extractable text."
22
- text = text[:3000] # Truncate to fit within model's token limit
 
23
  summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
24
  return summary[0]['summary_text']
 
25
  except Exception as e:
26
  return f"❌ Error: {str(e)}"
27
 
28
  # Gradio UI
29
  demo = gr.Interface(
30
- fn=summarize_pdf,
31
- inputs=gr.File(label="πŸ“„ Upload PDF of Academic Notes", type="file"),
32
- outputs=gr.Textbox(label="πŸ“ Summarized Notes"),
33
- title="πŸ“š Academic Note Summarizer",
34
- description="Upload a PDF of your academic notes. The app extracts and summarizes the content using a Hugging Face transformer model."
35
  )
36
 
37
- # Launch the app
38
  demo.launch()
39
 
40
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ import fitz # PyMuPDF for PDFs
4
+ import pytesseract
5
+ from PIL import Image
6
 
7
+ # Load Hugging Face summarization model
8
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
9
 
10
+ # Extract text from PDF
11
  def extract_text_from_pdf(pdf_file):
12
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
13
  text = ""
14
  for page in doc:
15
  text += page.get_text()
16
  return text
17
 
18
+ # Extract text from TXT file
19
+ def extract_text_from_txt(txt_file):
20
+ return txt_file.read().decode("utf-8")
21
+
22
+ # Extract text from image using OCR
23
+ def extract_text_from_image(image_file):
24
+ image = Image.open(image_file)
25
+ return pytesseract.image_to_string(image)
26
+
27
+ # Main function to handle various formats
28
+ def summarize_uploaded_file(file):
29
  try:
30
+ file_type = file.name.split(".")[-1].lower()
31
+
32
+ if file_type == "pdf":
33
+ text = extract_text_from_pdf(file)
34
+ elif file_type == "txt":
35
+ text = extract_text_from_txt(file)
36
+ elif file_type in ["jpg", "jpeg", "png"]:
37
+ text = extract_text_from_image(file)
38
+ else:
39
+ return "❌ Unsupported file type. Please upload PDF, TXT, or an image file."
40
+
41
  if len(text.strip()) == 0:
42
+ return "❌ The file seems empty or has no readable content."
43
+
44
+ text = text[:3000] # Truncate for summarization model
45
  summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
46
  return summary[0]['summary_text']
47
+
48
  except Exception as e:
49
  return f"❌ Error: {str(e)}"
50
 
51
  # Gradio UI
52
  demo = gr.Interface(
53
+ fn=summarize_uploaded_file,
54
+ inputs=gr.File(label="πŸ“„ Upload Notes (PDF, TXT, or Handwritten Image)", type="file"),
55
+ outputs=gr.Textbox(label="πŸ“ Summary"),
56
+ title="🧠 Universal Note Summarizer",
57
+ description="Upload academic notes as PDF, text, or an image of handwritten notes. The app extracts and summarizes the content using a Hugging Face model and OCR."
58
  )
59
 
 
60
  demo.launch()
61
 
62
 
63
+