Azidan commited on
Commit
82b870b
·
verified ·
1 Parent(s): b512372

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -34
app.py CHANGED
@@ -6,40 +6,33 @@ from docx import Document
6
  import io
7
  import os
8
 
9
- # Load the summarization model once
10
  device = 0 if torch.cuda.is_available() else -1
11
  print(f"Using device: {'GPU' if device == 0 else 'CPU'}")
12
 
13
  summarizer = pipeline(
14
  "summarization",
15
- model="sshleifer/distilbart-cnn-12-6", # Fast & good for CPU
16
  device=device
17
  )
18
 
19
  def extract_text(file_path):
20
  if file_path is None:
21
  return ""
22
-
23
- # file_path is a string (temp path) or NamedString-like object; convert to str
24
- file_path = str(file_path) # Ensure it's a plain string
25
  filename = os.path.basename(file_path).lower()
26
-
27
  try:
28
  if filename.endswith('.pdf'):
29
  with pdfplumber.open(file_path) as pdf:
30
  return "\n".join(page.extract_text() or "" for page in pdf.pages)
31
-
32
  elif filename.endswith('.docx'):
33
  doc = Document(file_path)
34
  return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
35
-
36
  elif filename.endswith('.txt'):
37
  with open(file_path, "r", encoding="utf-8", errors="replace") as f:
38
  return f.read()
39
-
40
  else:
41
  return "Unsupported file. Please use .pdf, .docx, or .txt"
42
-
43
  except Exception as e:
44
  return f"Error reading file: {str(e)}"
45
 
@@ -54,13 +47,20 @@ def summarize(input_text, file_path, detail_level):
54
 
55
  words = len(text.split())
56
  if words < 100:
57
- return text # Too short → return as-is
58
 
59
- # Convert slider to target ratio
60
  target_ratio = detail_level
61
  target_length = int(words * target_ratio)
62
- max_l = max(500, min(1400, target_length + 250))
63
- min_l = max(300, int(target_length * 0.65))
 
 
 
 
 
 
 
 
64
 
65
  try:
66
  result = summarizer(
@@ -75,34 +75,20 @@ def summarize(input_text, file_path, detail_level):
75
  )
76
  return result[0]['summary_text']
77
  except Exception as e:
78
- return f"Error during summarization: {str(e)}\n(Try shorter text or lower detail level)"
79
 
80
- # Create Gradio interface
81
  interface = gr.Interface(
82
  fn=summarize,
83
  inputs=[
84
- gr.Textbox(
85
- lines=12,
86
- placeholder="Paste your lecture text here (or use the upload below)...",
87
- label="Lecture Text (Paste)"
88
- ),
89
- gr.File(
90
- file_types=[".pdf", ".docx", ".txt"],
91
- label="Upload Lecture File"
92
- ),
93
- gr.Slider(
94
- minimum=0.15,
95
- maximum=0.60,
96
- value=0.32,
97
- step=0.01,
98
- label="Detail Level (higher = longer, more detailed summary)"
99
- )
100
  ],
101
  outputs=gr.Textbox(label="Generated Summary"),
102
  title="Lecture Summarizer",
103
- description="Upload a lecture file (PDF/DOCX/TXT) or paste text. Adjust the slider for shorter or more detailed summaries.",
104
  flagging_mode="never",
105
  )
106
 
107
- # Launch with theme
108
  interface.launch(theme="soft")
 
6
  import io
7
  import os
8
 
9
+ # Load model
10
  device = 0 if torch.cuda.is_available() else -1
11
  print(f"Using device: {'GPU' if device == 0 else 'CPU'}")
12
 
13
  summarizer = pipeline(
14
  "summarization",
15
+ model="sshleifer/distilbart-cnn-12-6",
16
  device=device
17
  )
18
 
19
  def extract_text(file_path):
20
  if file_path is None:
21
  return ""
22
+ file_path = str(file_path)
 
 
23
  filename = os.path.basename(file_path).lower()
 
24
  try:
25
  if filename.endswith('.pdf'):
26
  with pdfplumber.open(file_path) as pdf:
27
  return "\n".join(page.extract_text() or "" for page in pdf.pages)
 
28
  elif filename.endswith('.docx'):
29
  doc = Document(file_path)
30
  return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
 
31
  elif filename.endswith('.txt'):
32
  with open(file_path, "r", encoding="utf-8", errors="replace") as f:
33
  return f.read()
 
34
  else:
35
  return "Unsupported file. Please use .pdf, .docx, or .txt"
 
36
  except Exception as e:
37
  return f"Error reading file: {str(e)}"
38
 
 
47
 
48
  words = len(text.split())
49
  if words < 100:
50
+ return text
51
 
 
52
  target_ratio = detail_level
53
  target_length = int(words * target_ratio)
54
+
55
+ # Safeguards: cap lengths to prevent min > max
56
+ max_l = max(500, min(1400, target_length + 250)) # Hard cap at 1400 (model limit-ish)
57
+ min_l = max(100, int(target_length * 0.65))
58
+
59
+ # Force min_l < max_l if overflow
60
+ if min_l >= max_l:
61
+ min_l = max_l - 100 # Reasonable fallback
62
+ if min_l < 100:
63
+ min_l = 100
64
 
65
  try:
66
  result = summarizer(
 
75
  )
76
  return result[0]['summary_text']
77
  except Exception as e:
78
+ return f"Error during summarization: {str(e)}\n(Try shorter text, lower detail level, or paste instead of upload.)"
79
 
80
+ # Interface
81
  interface = gr.Interface(
82
  fn=summarize,
83
  inputs=[
84
+ gr.Textbox(lines=12, placeholder="Paste your lecture text here...", label="Lecture Text (Paste)"),
85
+ gr.File(file_types=[".pdf", ".docx", ".txt"], label="Upload Lecture File"),
86
+ gr.Slider(0.15, 0.60, value=0.32, step=0.01, label="Detail Level (higher = longer summary)")
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  ],
88
  outputs=gr.Textbox(label="Generated Summary"),
89
  title="Lecture Summarizer",
90
+ description="Upload PDF/DOCX/TXT lecture or paste text. Adjust slider for detail. For very long files, use lower detail or chunk text.",
91
  flagging_mode="never",
92
  )
93
 
 
94
  interface.launch(theme="soft")