Natwar commited on
Commit
f01d54d
Β·
verified Β·
1 Parent(s): 1a39287

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -92
app.py CHANGED
@@ -5,77 +5,84 @@ import warnings
5
  warnings.filterwarnings("ignore")
6
 
7
 
8
- def install_package(package, version=None):
9
- package_spec = f"{package}=={version}" if version else package
10
- print(f"Installing {package_spec}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  try:
12
- if package == "torch":
13
- # CPU-only build (~190MB) instead of CUDA (~900MB) to avoid disk quota errors
14
- subprocess.check_call([
15
- sys.executable, "-m", "pip", "install", "--no-cache-dir",
16
- "torch", "--index-url", "https://download.pytorch.org/whl/cpu"
17
- ])
 
 
 
 
 
 
 
 
18
  else:
19
- subprocess.check_call([
20
- sys.executable, "-m", "pip", "install", "--no-cache-dir", package_spec
21
- ])
22
- except subprocess.CalledProcessError as e:
23
- print(f"Failed to install {package_spec}: {e}")
24
- raise
25
 
 
26
 
27
- def force_install(package_spec):
28
- """Install a package unconditionally, overriding whatever version is present."""
29
- print(f"Force-installing {package_spec}...")
30
- try:
31
- subprocess.check_call([
32
- sys.executable, "-m", "pip", "install", "--no-cache-dir", package_spec
33
- ])
34
- except subprocess.CalledProcessError as e:
35
- print(f"Failed to force-install {package_spec}: {e}")
36
- raise
37
-
38
-
39
- # Phase 1 β€” install missing packages.
40
- # Notes:
41
- # - tokenizers is NOT pre-pinned here; transformers 4.46.3 pulls tokenizers 0.20.3
42
- # which already ships native cp313 wheels, so no Rust compilation is needed.
43
- # - transformers is pinned to 4.46.3 (last v4 release) because v5 dropped the
44
- # "summarization" pipeline task entirely.
45
- required_packages = {
46
- "gradio": None,
47
- "torch": None,
48
- "transformers": "4.46.3",
49
- }
50
 
51
- for package, version in required_packages.items():
52
- try:
53
- __import__(package)
54
- print(f"{package} is already installed.")
55
- except ImportError:
56
- install_package(package, version)
57
 
58
- # Phase 2 β€” fix the huggingface_hub version AFTER transformers has run.
59
- #
60
- # Problem: transformers 4.46.3 requires huggingface-hub<1.0, so pip picks
61
- # the latest <1.0 release (currently 0.36.x). Starting around hub 0.30,
62
- # get_session() returns an httpx.Client when httpx is present on the system.
63
- # transformers' own hub.py calls get_session().head(..., allow_redirects=...),
64
- # which is a requests-style kwarg that httpx rejects with:
65
- # TypeError: Client.head() got an unexpected keyword argument 'allow_redirects'
66
- #
67
- # Fix: force hub back to 0.28.1 β€” the last release that uses requests (not httpx)
68
- # for get_session(), while still satisfying:
69
- # - transformers 4.46.3 requirement: >=0.23.2, <1.0 βœ“
70
- # - gradio requirement: >=0.28.1 βœ“
71
- force_install("huggingface_hub==0.28.1")
72
-
73
- # Now safe to import everything
74
- import gradio as gr
75
- import torch
76
- from transformers import pipeline
77
-
78
- # Load default summarization model
79
  DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"
80
 
81
  AVAILABLE_MODELS = {
@@ -86,7 +93,7 @@ AVAILABLE_MODELS = {
86
  }
87
 
88
  print(f"Loading default model: {DEFAULT_MODEL}")
89
- summarizer = pipeline("summarization", model=DEFAULT_MODEL, device=-1) # device=-1 forces CPU
90
 
91
  EXAMPLE_TEXTS = {
92
  "news_article": (
@@ -120,29 +127,26 @@ EXAMPLE_TEXTS = {
120
  def summarize_text(text, model_name, summary_length, num_beams):
121
  if not text.strip():
122
  return "Please provide some text to summarize."
123
-
124
  try:
125
  global summarizer
126
  summarizer = pipeline("summarization", model=model_name, device=-1)
127
-
128
  length_mapping = {
129
  "very_short": (30, 50),
130
- "short": (50, 70),
131
- "medium": (70, 100),
132
- "long": (100, 130),
133
  }
134
- min_length, max_length = length_mapping.get(summary_length, (70, 100))
135
-
136
- summary = summarizer(
137
  text,
138
- max_length=int(max_length),
139
- min_length=int(min_length),
140
  num_beams=int(num_beams),
141
  do_sample=False,
142
  )
143
- return summary[0]["summary_text"]
144
- except Exception as e:
145
- return f"Error: {str(e)}"
146
 
147
 
148
  def count_words(text):
@@ -153,6 +157,8 @@ def paste_example(example_type):
153
  return EXAMPLE_TEXTS.get(example_type, "")
154
 
155
 
 
 
156
  with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
157
  gr.Markdown("# πŸ“ Multimodel Text Summarization")
158
  gr.Markdown(
@@ -165,10 +171,9 @@ with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as
165
  lines=12,
166
  label="Text to Summarize",
167
  placeholder="Paste or type your text here...",
168
- show_label=True,
169
  elem_id="text_input",
170
  )
171
- word_counter = gr.Markdown("0 words", elem_id="word_counter")
172
  text_input.change(count_words, inputs=[text_input], outputs=[word_counter])
173
 
174
  with gr.Row():
@@ -193,9 +198,7 @@ with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as
193
  value="medium",
194
  label="Summary Length",
195
  )
196
- num_beams = gr.Slider(
197
- minimum=1, maximum=8, value=4, step=1, label="Beam Size"
198
- )
199
 
200
  summarize_button = gr.Button("Generate Summary", variant="primary", size="lg")
201
 
@@ -207,19 +210,12 @@ with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as
207
  placeholder="Your summary will appear here...",
208
  )
209
 
210
- # Events
211
  model_choice.change(
212
  fn=lambda x: f"**Model info:** {AVAILABLE_MODELS.get(x, 'Custom model')}",
213
  inputs=[model_choice],
214
  outputs=[model_info],
215
  )
216
-
217
- example_load_btn.click(
218
- fn=paste_example,
219
- inputs=[example_dropdown],
220
- outputs=[text_input],
221
- )
222
-
223
  summarize_button.click(
224
  fn=summarize_text,
225
  inputs=[text_input, model_choice, summary_length, num_beams],
 
5
  warnings.filterwarnings("ignore")
6
 
7
 
8
+ def run_pip(*args):
9
+ """Run a pip command and raise on failure."""
10
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + list(args))
11
+
12
+
13
+ # ── Phase 1: Install packages ────────────────────────────────────────────────
14
+ # Order and pins matter β€” see comments below.
15
+
16
+ print("=== Installing gradio (if needed) ===")
17
+ try:
18
+ import gradio # noqa: F401
19
+ print("gradio already installed.")
20
+ except ImportError:
21
+ run_pip("gradio")
22
+
23
+ print("=== Installing torch (CPU-only, ~190 MB vs ~900 MB for CUDA) ===")
24
+ try:
25
+ import torch # noqa: F401
26
+ print("torch already installed.")
27
+ except ImportError:
28
+ run_pip("torch", "--index-url", "https://download.pytorch.org/whl/cpu")
29
+
30
+ print("=== Installing transformers 4.46.3 ===")
31
+ # Pin to last v4 release β€” transformers 5.x removed the 'summarization' pipeline task.
32
+ # This also pulls tokenizers 0.20.3 (native cp313 wheel, no Rust needed) and
33
+ # huggingface-hub 0.36.x as a side-effect; we fix the hub version in Phase 2.
34
+ try:
35
+ import transformers as _tf
36
+ if _tf.__version__ != "4.46.3":
37
+ raise ImportError("wrong version")
38
+ print("transformers 4.46.3 already installed.")
39
+ except (ImportError, AttributeError):
40
+ run_pip("transformers==4.46.3")
41
+
42
+ # ── Phase 2: Patch transformers/utils/hub.py BEFORE importing it ─────────────
43
+ #
44
+ # Root cause: transformers 4.46.3 calls
45
+ # get_session().head(..., allow_redirects=False, ...)
46
+ # In this environment get_session() returns an httpx.Client (because httpx is
47
+ # installed as a gradio dependency and the hub version that transformers pulled
48
+ # switches to httpx when it is available). httpx uses `follow_redirects=`,
49
+ # not `allow_redirects=`, so the call raises:
50
+ # TypeError: Client.head() got an unexpected keyword argument 'allow_redirects'
51
+ #
52
+ # Fix: rewrite every `allow_redirects=` β†’ `follow_redirects=` in hub.py on
53
+ # disk *before* Python imports it, so no module reload is needed.
54
+
55
+ def patch_transformers_hub():
56
  try:
57
+ import importlib.util
58
+ spec = importlib.util.find_spec("transformers")
59
+ if spec is None:
60
+ print("Warning: could not locate transformers package for patching.")
61
+ return
62
+ pkg_dir = os.path.dirname(spec.origin)
63
+ hub_path = os.path.join(pkg_dir, "utils", "hub.py")
64
+ with open(hub_path, "r", encoding="utf-8") as f:
65
+ src = f.read()
66
+ if "allow_redirects=" in src:
67
+ patched = src.replace("allow_redirects=", "follow_redirects=")
68
+ with open(hub_path, "w", encoding="utf-8") as f:
69
+ f.write(patched)
70
+ print(f"Patched {hub_path}: allow_redirects β†’ follow_redirects")
71
  else:
72
+ print("transformers hub.py already clean β€” no patch needed.")
73
+ except Exception as exc:
74
+ print(f"Warning: hub.py patch failed ({exc}). Will try to continue anyway.")
 
 
 
75
 
76
+ patch_transformers_hub()
77
 
78
+ # ── Phase 3: Safe imports (transformers is now patched on disk) ───────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ import gradio as gr # noqa: E402
81
+ import torch # noqa: E402
82
+ from transformers import pipeline # noqa: E402
83
+
84
+ # ── App setup ────────────────────────────────────────────────────────────────
 
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"
87
 
88
  AVAILABLE_MODELS = {
 
93
  }
94
 
95
  print(f"Loading default model: {DEFAULT_MODEL}")
96
+ summarizer = pipeline("summarization", model=DEFAULT_MODEL, device=-1) # device=-1 β†’ CPU
97
 
98
  EXAMPLE_TEXTS = {
99
  "news_article": (
 
127
  def summarize_text(text, model_name, summary_length, num_beams):
128
  if not text.strip():
129
  return "Please provide some text to summarize."
 
130
  try:
131
  global summarizer
132
  summarizer = pipeline("summarization", model=model_name, device=-1)
 
133
  length_mapping = {
134
  "very_short": (30, 50),
135
+ "short": (50, 70),
136
+ "medium": (70, 100),
137
+ "long": (100, 130),
138
  }
139
+ min_len, max_len = length_mapping.get(summary_length, (70, 100))
140
+ result = summarizer(
 
141
  text,
142
+ max_length=int(max_len),
143
+ min_length=int(min_len),
144
  num_beams=int(num_beams),
145
  do_sample=False,
146
  )
147
+ return result[0]["summary_text"]
148
+ except Exception as exc:
149
+ return f"Error: {exc}"
150
 
151
 
152
  def count_words(text):
 
157
  return EXAMPLE_TEXTS.get(example_type, "")
158
 
159
 
160
+ # ── Gradio UI ─────────────────────────────────────────────────────────────────
161
+
162
  with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
163
  gr.Markdown("# πŸ“ Multimodel Text Summarization")
164
  gr.Markdown(
 
171
  lines=12,
172
  label="Text to Summarize",
173
  placeholder="Paste or type your text here...",
 
174
  elem_id="text_input",
175
  )
176
+ word_counter = gr.Markdown("0 words")
177
  text_input.change(count_words, inputs=[text_input], outputs=[word_counter])
178
 
179
  with gr.Row():
 
198
  value="medium",
199
  label="Summary Length",
200
  )
201
+ num_beams = gr.Slider(minimum=1, maximum=8, value=4, step=1, label="Beam Size")
 
 
202
 
203
  summarize_button = gr.Button("Generate Summary", variant="primary", size="lg")
204
 
 
210
  placeholder="Your summary will appear here...",
211
  )
212
 
 
213
  model_choice.change(
214
  fn=lambda x: f"**Model info:** {AVAILABLE_MODELS.get(x, 'Custom model')}",
215
  inputs=[model_choice],
216
  outputs=[model_info],
217
  )
218
+ example_load_btn.click(fn=paste_example, inputs=[example_dropdown], outputs=[text_input])
 
 
 
 
 
 
219
  summarize_button.click(
220
  fn=summarize_text,
221
  inputs=[text_input, model_choice, summary_length, num_beams],