Natwar commited on
Commit
b44bc34
·
verified ·
1 Parent(s): 8f6e6b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -40
app.py CHANGED
@@ -1,29 +1,42 @@
1
  import os
2
  import subprocess
3
  import sys
4
- import pkg_resources
5
  import warnings
6
  warnings.filterwarnings("ignore")
7
 
 
8
  def install_package(package, version=None):
9
  package_spec = f"{package}=={version}" if version else package
10
  print(f"Installing {package_spec}...")
11
  try:
12
- subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir", package_spec])
 
 
 
 
 
 
 
 
 
13
  except subprocess.CalledProcessError as e:
14
  print(f"Failed to install {package_spec}: {e}")
15
  raise
16
 
 
17
  # Required packages
18
  required_packages = {
19
  "gradio": None,
20
  "torch": None,
21
- "transformers": None
22
  }
23
 
24
- installed_packages = {pkg.key for pkg in pkg_resources.working_set}
25
  for package, version in required_packages.items():
26
- if package not in installed_packages:
 
 
 
27
  install_package(package, version)
28
 
29
  # Import packages
@@ -38,53 +51,83 @@ AVAILABLE_MODELS = {
38
  "sshleifer/distilbart-cnn-6-6": "Fast & light, good for general summarization",
39
  "facebook/bart-large-cnn": "Larger BART model, better detail retention",
40
  "google/pegasus-cnn_dailymail": "Pegasus model for high-quality summarization",
41
- "allenai/led-base-16384": "Handles longer scientific documents"
42
  }
43
 
44
  print(f"Loading default model: {DEFAULT_MODEL}")
45
- summarizer = pipeline("summarization", model=DEFAULT_MODEL)
46
 
47
  EXAMPLE_TEXTS = {
48
- "news_article": "In a historic move, global leaders have agreed to phase out fossil fuels over the next two decades. This landmark decision came after weeks of intense negotiations during the international climate summit. Experts believe this will drastically cut carbon emissions and pave the way for sustainable energy sources worldwide. Countries will now be held accountable through annual environmental reviews.",
49
-
50
- "scientific_abstract": "The rise of antibiotic-resistant bacteria poses one of the most significant threats to global health in the 21st century. Recent studies have shown that the overuse and misuse of antibiotics in both human medicine and agriculture have accelerated the evolution of resistant strains. In this review, we summarize current research on bacterial resistance mechanisms, including horizontal gene transfer and biofilm formation. We also explore novel approaches to combating resistance, such as bacteriophage therapy, antimicrobial peptides, and CRISPR-based gene editing technologies. The paper further outlines a strategic framework for integrating surveillance, policy reforms, and public health initiatives to curb the spread of resistance. While scientific innovation holds promise, global cooperation and responsible antibiotic stewardship remain essential to preventing a post-antibiotic era where common infections could once again become deadly.",
51
-
52
- "business_report": "The company reported a 32% increase in quarterly revenue, largely driven by the success of its latest AI-powered product line. International markets, particularly in Asia and Europe, showed strong adoption rates. Leadership announced plans to reinvest earnings into R&D and global expansion, while shareholders reacted positively with a 15% spike in stock prices."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  }
54
 
55
- def summarize_text(text, model_name, summary_length, num_beams, max_length=130, min_length=30):
 
56
  if not text.strip():
57
  return "Please provide some text to summarize."
58
 
59
  try:
60
  global summarizer
61
- summarizer = pipeline("summarization", model=model_name)
 
62
 
63
  length_mapping = {
64
  "very_short": (30, 50),
65
  "short": (50, 70),
66
  "medium": (70, 100),
67
- "long": (100, 130)
68
  }
69
- min_length, max_length = length_mapping.get(summary_length, (min_length, max_length))
70
-
71
- summary = summarizer(text, max_length=int(max_length), min_length=int(min_length), num_beams=int(num_beams), do_sample=False)
72
- return summary[0]['summary_text']
 
 
 
 
 
 
73
  except Exception as e:
74
  return f"Error: {str(e)}"
75
 
 
76
  def count_words(text):
77
- return len(text.split())
78
 
79
- def on_input_change(text):
80
- return f"{count_words(text)} words"
81
 
82
  def paste_example(example_type):
83
  return EXAMPLE_TEXTS.get(example_type, "")
84
 
 
85
  with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
86
  gr.Markdown("# 📝 Multimodel Text Summarization")
87
- gr.Markdown("Summarize news, reports, or scientific content using various models like BART, Pegasus, or LED.")
 
 
88
 
89
  with gr.Row():
90
  with gr.Column(scale=3):
@@ -93,16 +136,16 @@ with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as
93
  label="Text to Summarize",
94
  placeholder="Paste or type your text here...",
95
  show_label=True,
96
- elem_id="text_input"
97
  )
98
  word_counter = gr.Markdown("0 words", elem_id="word_counter")
99
- text_input.change(on_input_change, inputs=[text_input], outputs=[word_counter])
100
 
101
  with gr.Row():
102
  example_dropdown = gr.Dropdown(
103
  choices=list(EXAMPLE_TEXTS.keys()),
104
  value=None,
105
- label="Load Example Text"
106
  )
107
  example_load_btn = gr.Button("Load Example")
108
 
@@ -110,7 +153,7 @@ with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as
110
  model_choice = gr.Dropdown(
111
  choices=list(AVAILABLE_MODELS.keys()),
112
  value=DEFAULT_MODEL,
113
- label="Select Summarization Model"
114
  )
115
  model_info = gr.Markdown(f"**Model info:** {AVAILABLE_MODELS[DEFAULT_MODEL]}")
116
 
@@ -118,11 +161,10 @@ with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as
118
  summary_length = gr.Radio(
119
  choices=["very_short", "short", "medium", "long"],
120
  value="medium",
121
- label="Summary Length"
122
  )
123
  num_beams = gr.Slider(
124
- minimum=1, maximum=8, value=4, step=1,
125
- label="Beam Size"
126
  )
127
 
128
  summarize_button = gr.Button("Generate Summary", variant="primary", size="lg")
@@ -132,37 +174,35 @@ with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as
132
  summary_output = gr.Textbox(
133
  label="Generated Summary",
134
  lines=12,
135
- placeholder="Your summary will appear here..."
136
  )
137
- copy_button = gr.Button("📋 Copy to Clipboard", variant="secondary")
138
 
139
  # Events
140
  model_choice.change(
141
  fn=lambda x: f"**Model info:** {AVAILABLE_MODELS.get(x, 'Custom model')}",
142
  inputs=[model_choice],
143
- outputs=[model_info]
144
  )
145
 
146
  example_load_btn.click(
147
  fn=paste_example,
148
  inputs=[example_dropdown],
149
- outputs=[text_input]
150
  )
151
 
152
  summarize_button.click(
153
  fn=summarize_text,
154
  inputs=[text_input, model_choice, summary_length, num_beams],
155
- outputs=[summary_output]
156
  )
157
 
158
  gr.Markdown("""
159
  ---
160
- ✅ Choose from different summarization models
161
- ✅ Works great for academic, news, or business content
162
- ✅ Customize summary length and beam search for better quality
163
-
164
  Built using Gradio and Hugging Face Transformers
165
  """)
166
 
167
  if __name__ == "__main__":
168
- demo.launch()
 
1
  import os
2
  import subprocess
3
  import sys
 
4
  import warnings
5
  warnings.filterwarnings("ignore")
6
 
7
+
8
  def install_package(package, version=None):
9
  package_spec = f"{package}=={version}" if version else package
10
  print(f"Installing {package_spec}...")
11
  try:
12
+ if package == "torch":
13
+ # CPU-only build is ~200MB vs ~900MB for CUDA — avoids disk quota errors
14
+ subprocess.check_call([
15
+ sys.executable, "-m", "pip", "install", "--no-cache-dir",
16
+ "torch", "--index-url", "https://download.pytorch.org/whl/cpu"
17
+ ])
18
+ else:
19
+ subprocess.check_call([
20
+ sys.executable, "-m", "pip", "install", "--no-cache-dir", package_spec
21
+ ])
22
  except subprocess.CalledProcessError as e:
23
  print(f"Failed to install {package_spec}: {e}")
24
  raise
25
 
26
+
27
  # Required packages
28
  required_packages = {
29
  "gradio": None,
30
  "torch": None,
31
+ "transformers": None,
32
  }
33
 
34
+ # Install missing packages using importlib (pkg_resources is deprecated)
35
  for package, version in required_packages.items():
36
+ try:
37
+ __import__(package)
38
+ print(f"{package} is already installed.")
39
+ except ImportError:
40
  install_package(package, version)
41
 
42
  # Import packages
 
51
  "sshleifer/distilbart-cnn-6-6": "Fast & light, good for general summarization",
52
  "facebook/bart-large-cnn": "Larger BART model, better detail retention",
53
  "google/pegasus-cnn_dailymail": "Pegasus model for high-quality summarization",
54
+ "allenai/led-base-16384": "Handles longer scientific documents",
55
  }
56
 
57
  print(f"Loading default model: {DEFAULT_MODEL}")
58
+ summarizer = pipeline("summarization", model=DEFAULT_MODEL, device=-1) # device=-1 forces CPU
59
 
60
  EXAMPLE_TEXTS = {
61
+ "news_article": (
62
+ "In a historic move, global leaders have agreed to phase out fossil fuels over the next two "
63
+ "decades. This landmark decision came after weeks of intense negotiations during the international "
64
+ "climate summit. Experts believe this will drastically cut carbon emissions and pave the way for "
65
+ "sustainable energy sources worldwide. Countries will now be held accountable through annual "
66
+ "environmental reviews."
67
+ ),
68
+ "scientific_abstract": (
69
+ "The rise of antibiotic-resistant bacteria poses one of the most significant threats to global "
70
+ "health in the 21st century. Recent studies have shown that the overuse and misuse of antibiotics "
71
+ "in both human medicine and agriculture have accelerated the evolution of resistant strains. In "
72
+ "this review, we summarize current research on bacterial resistance mechanisms, including horizontal "
73
+ "gene transfer and biofilm formation. We also explore novel approaches to combating resistance, "
74
+ "such as bacteriophage therapy, antimicrobial peptides, and CRISPR-based gene editing technologies. "
75
+ "The paper further outlines a strategic framework for integrating surveillance, policy reforms, and "
76
+ "public health initiatives to curb the spread of resistance. While scientific innovation holds "
77
+ "promise, global cooperation and responsible antibiotic stewardship remain essential to preventing "
78
+ "a post-antibiotic era where common infections could once again become deadly."
79
+ ),
80
+ "business_report": (
81
+ "The company reported a 32% increase in quarterly revenue, largely driven by the success of its "
82
+ "latest AI-powered product line. International markets, particularly in Asia and Europe, showed "
83
+ "strong adoption rates. Leadership announced plans to reinvest earnings into R&D and global "
84
+ "expansion, while shareholders reacted positively with a 15% spike in stock prices."
85
+ ),
86
  }
87
 
88
+
89
+ def summarize_text(text, model_name, summary_length, num_beams):
90
  if not text.strip():
91
  return "Please provide some text to summarize."
92
 
93
  try:
94
  global summarizer
95
+ # Reload pipeline only if the model has changed, always on CPU
96
+ summarizer = pipeline("summarization", model=model_name, device=-1)
97
 
98
  length_mapping = {
99
  "very_short": (30, 50),
100
  "short": (50, 70),
101
  "medium": (70, 100),
102
+ "long": (100, 130),
103
  }
104
+ min_length, max_length = length_mapping.get(summary_length, (70, 100))
105
+
106
+ summary = summarizer(
107
+ text,
108
+ max_length=int(max_length),
109
+ min_length=int(min_length),
110
+ num_beams=int(num_beams),
111
+ do_sample=False,
112
+ )
113
+ return summary[0]["summary_text"]
114
  except Exception as e:
115
  return f"Error: {str(e)}"
116
 
117
+
118
  def count_words(text):
119
+ return f"{len(text.split())} words"
120
 
 
 
121
 
122
  def paste_example(example_type):
123
  return EXAMPLE_TEXTS.get(example_type, "")
124
 
125
+
126
  with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
127
  gr.Markdown("# 📝 Multimodel Text Summarization")
128
+ gr.Markdown(
129
+ "Summarize news, reports, or scientific content using various models like BART, Pegasus, or LED."
130
+ )
131
 
132
  with gr.Row():
133
  with gr.Column(scale=3):
 
136
  label="Text to Summarize",
137
  placeholder="Paste or type your text here...",
138
  show_label=True,
139
+ elem_id="text_input",
140
  )
141
  word_counter = gr.Markdown("0 words", elem_id="word_counter")
142
+ text_input.change(count_words, inputs=[text_input], outputs=[word_counter])
143
 
144
  with gr.Row():
145
  example_dropdown = gr.Dropdown(
146
  choices=list(EXAMPLE_TEXTS.keys()),
147
  value=None,
148
+ label="Load Example Text",
149
  )
150
  example_load_btn = gr.Button("Load Example")
151
 
 
153
  model_choice = gr.Dropdown(
154
  choices=list(AVAILABLE_MODELS.keys()),
155
  value=DEFAULT_MODEL,
156
+ label="Select Summarization Model",
157
  )
158
  model_info = gr.Markdown(f"**Model info:** {AVAILABLE_MODELS[DEFAULT_MODEL]}")
159
 
 
161
  summary_length = gr.Radio(
162
  choices=["very_short", "short", "medium", "long"],
163
  value="medium",
164
+ label="Summary Length",
165
  )
166
  num_beams = gr.Slider(
167
+ minimum=1, maximum=8, value=4, step=1, label="Beam Size"
 
168
  )
169
 
170
  summarize_button = gr.Button("Generate Summary", variant="primary", size="lg")
 
174
  summary_output = gr.Textbox(
175
  label="Generated Summary",
176
  lines=12,
177
+ placeholder="Your summary will appear here...",
178
  )
 
179
 
180
  # Events
181
  model_choice.change(
182
  fn=lambda x: f"**Model info:** {AVAILABLE_MODELS.get(x, 'Custom model')}",
183
  inputs=[model_choice],
184
+ outputs=[model_info],
185
  )
186
 
187
  example_load_btn.click(
188
  fn=paste_example,
189
  inputs=[example_dropdown],
190
+ outputs=[text_input],
191
  )
192
 
193
  summarize_button.click(
194
  fn=summarize_text,
195
  inputs=[text_input, model_choice, summary_length, num_beams],
196
+ outputs=[summary_output],
197
  )
198
 
199
  gr.Markdown("""
200
  ---
201
+ ✅ Choose from different summarization models
202
+ ✅ Works great for academic, news, or business content
203
+ ✅ Customize summary length and beam search for better quality
 
204
  Built using Gradio and Hugging Face Transformers
205
  """)
206
 
207
  if __name__ == "__main__":
208
+ demo.launch()