tejovanth commited on
Commit
21a3052
Β·
verified Β·
1 Parent(s): 18a4722

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -2,38 +2,39 @@ import gradio as gr
2
  from transformers import pipeline
3
  import fitz # PyMuPDF
4
 
5
- # Load summarization pipeline
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
 
8
- # Function to extract text from PDF
9
  def extract_text_from_pdf(pdf_file):
10
- doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
11
  text = ""
12
  for page in doc:
13
  text += page.get_text()
14
  return text
15
 
16
- # Combine everything into one function
17
  def summarize_pdf(pdf_file):
18
  try:
19
  text = extract_text_from_pdf(pdf_file)
20
  if len(text.strip()) == 0:
21
- return "The PDF seems empty or text is not extractable."
22
- # Truncate long text (BART model has ~1024 token limit)
23
- text = text[:3000]
24
  summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
25
  return summary[0]['summary_text']
26
  except Exception as e:
27
- return f"Error: {str(e)}"
28
 
29
- # Gradio Interface
30
  demo = gr.Interface(
31
  fn=summarize_pdf,
32
- inputs=gr.File(label="Upload PDF of Academic Notes"),
33
- outputs=gr.Textbox(label="Summarized Notes"),
34
- title="πŸ“„ Academic Note Summarizer (PDF)",
35
- description="Upload your academic notes in PDF format. The app will extract and summarize the content using a Hugging Face model."
36
  )
37
 
 
38
  demo.launch()
39
 
 
 
2
  from transformers import pipeline
3
  import fitz # PyMuPDF
4
 
5
+ # Load the summarization model from Hugging Face
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
 
8
+ # Function to extract text from the uploaded PDF
9
  def extract_text_from_pdf(pdf_file):
10
+ doc = fitz.open(pdf_file.name) # βœ… Use file path instead of .read()
11
  text = ""
12
  for page in doc:
13
  text += page.get_text()
14
  return text
15
 
16
+ # Function to summarize the extracted text
17
  def summarize_pdf(pdf_file):
18
  try:
19
  text = extract_text_from_pdf(pdf_file)
20
  if len(text.strip()) == 0:
21
+ return "❌ The PDF seems empty or has no extractable text."
22
+ text = text[:3000] # Truncate to fit within model's token limit
 
23
  summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
24
  return summary[0]['summary_text']
25
  except Exception as e:
26
+ return f"❌ Error: {str(e)}"
27
 
28
+ # Gradio UI
29
  demo = gr.Interface(
30
  fn=summarize_pdf,
31
+ inputs=gr.File(label="πŸ“„ Upload PDF of Academic Notes", type="file"),
32
+ outputs=gr.Textbox(label="πŸ“ Summarized Notes"),
33
+ title="πŸ“š Academic Note Summarizer",
34
+ description="Upload a PDF of your academic notes. The app extracts and summarizes the content using a Hugging Face transformer model."
35
  )
36
 
37
+ # Launch the app
38
  demo.launch()
39
 
40
+