Natwar commited on
Commit
fdebaf9
Β·
verified Β·
1 Parent(s): f01d54d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -47
app.py CHANGED
@@ -6,12 +6,11 @@ warnings.filterwarnings("ignore")
6
 
7
 
8
  def run_pip(*args):
9
- """Run a pip command and raise on failure."""
10
  subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + list(args))
11
 
12
 
13
- # ── Phase 1: Install packages ────────────────────────────────────────────────
14
- # Order and pins matter β€” see comments below.
15
 
16
  print("=== Installing gradio (if needed) ===")
17
  try:
@@ -20,7 +19,7 @@ try:
20
  except ImportError:
21
  run_pip("gradio")
22
 
23
- print("=== Installing torch (CPU-only, ~190 MB vs ~900 MB for CUDA) ===")
24
  try:
25
  import torch # noqa: F401
26
  print("torch already installed.")
@@ -29,8 +28,6 @@ except ImportError:
29
 
30
  print("=== Installing transformers 4.46.3 ===")
31
  # Pin to last v4 release β€” transformers 5.x removed the 'summarization' pipeline task.
32
- # This also pulls tokenizers 0.20.3 (native cp313 wheel, no Rust needed) and
33
- # huggingface-hub 0.36.x as a side-effect; we fix the hub version in Phase 2.
34
  try:
35
  import transformers as _tf
36
  if _tf.__version__ != "4.46.3":
@@ -39,57 +36,43 @@ try:
39
  except (ImportError, AttributeError):
40
  run_pip("transformers==4.46.3")
41
 
42
- # ── Phase 2: Patch transformers/utils/hub.py BEFORE importing it ─────────────
43
  #
44
- # Root cause: transformers 4.46.3 calls
45
- # get_session().head(..., allow_redirects=False, ...)
46
- # In this environment get_session() returns an httpx.Client (because httpx is
47
- # installed as a gradio dependency and the hub version that transformers pulled
48
- # switches to httpx when it is available). httpx uses `follow_redirects=`,
49
- # not `allow_redirects=`, so the call raises:
50
- # TypeError: Client.head() got an unexpected keyword argument 'allow_redirects'
51
  #
52
- # Fix: rewrite every `allow_redirects=` β†’ `follow_redirects=` in hub.py on
53
- # disk *before* Python imports it, so no module reload is needed.
 
 
 
54
 
55
- def patch_transformers_hub():
56
- try:
57
- import importlib.util
58
- spec = importlib.util.find_spec("transformers")
59
- if spec is None:
60
- print("Warning: could not locate transformers package for patching.")
61
- return
62
- pkg_dir = os.path.dirname(spec.origin)
63
- hub_path = os.path.join(pkg_dir, "utils", "hub.py")
64
- with open(hub_path, "r", encoding="utf-8") as f:
65
- src = f.read()
66
- if "allow_redirects=" in src:
67
- patched = src.replace("allow_redirects=", "follow_redirects=")
68
- with open(hub_path, "w", encoding="utf-8") as f:
69
- f.write(patched)
70
- print(f"Patched {hub_path}: allow_redirects β†’ follow_redirects")
71
- else:
72
- print("transformers hub.py already clean β€” no patch needed.")
73
- except Exception as exc:
74
- print(f"Warning: hub.py patch failed ({exc}). Will try to continue anyway.")
75
 
76
- patch_transformers_hub()
 
77
 
78
- # ── Phase 3: Safe imports (transformers is now patched on disk) ───────────────
79
 
80
- import gradio as gr # noqa: E402
81
- import torch # noqa: E402
82
- from transformers import pipeline # noqa: E402
83
 
84
- # ── App setup ──────────��─────────────────────────────────────────────────────
85
 
86
  DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"
87
 
88
  AVAILABLE_MODELS = {
89
- "sshleifer/distilbart-cnn-6-6": "Fast & light, good for general summarization",
90
- "facebook/bart-large-cnn": "Larger BART model, better detail retention",
91
- "google/pegasus-cnn_dailymail": "Pegasus model for high-quality summarization",
92
- "allenai/led-base-16384": "Handles longer scientific documents",
93
  }
94
 
95
  print(f"Loading default model: {DEFAULT_MODEL}")
@@ -157,7 +140,7 @@ def paste_example(example_type):
157
  return EXAMPLE_TEXTS.get(example_type, "")
158
 
159
 
160
- # ── Gradio UI ─────────────────────────────────────────────────────────────────
161
 
162
  with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
163
  gr.Markdown("# πŸ“ Multimodel Text Summarization")
 
6
 
7
 
8
  def run_pip(*args):
9
+ """Run a pip install command and raise on failure."""
10
  subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + list(args))
11
 
12
 
13
+ # ── Phase 1: Install packages ─────────────────────────────────────────────────
 
14
 
15
  print("=== Installing gradio (if needed) ===")
16
  try:
 
19
  except ImportError:
20
  run_pip("gradio")
21
 
22
+ print("=== Installing torch (CPU-only, ~190 MB) ===")
23
  try:
24
  import torch # noqa: F401
25
  print("torch already installed.")
 
28
 
29
  print("=== Installing transformers 4.46.3 ===")
30
  # Pin to last v4 release β€” transformers 5.x removed the 'summarization' pipeline task.
 
 
31
  try:
32
  import transformers as _tf
33
  if _tf.__version__ != "4.46.3":
 
36
  except (ImportError, AttributeError):
37
  run_pip("transformers==4.46.3")
38
 
39
+ # ── Phase 2: Fix the requests-vs-httpx incompatibility ───────────────────────
40
  #
41
+ # What happens:
42
+ # - transformers 4.46.3 requires huggingface-hub<1.0, so pip installs 0.36.x.
43
+ # - huggingface-hub 0.36.x makes get_session() return an httpx.Client when
44
+ # httpx is present (it is β€” gradio depends on it).
45
+ # - transformers' own hub.py then calls that client with requests-style kwargs:
46
+ # get_session().head(url, allow_redirects=False, proxies=proxies, timeout=10)
47
+ # - httpx.Client rejects every one of these: allow_redirects, proxies, etc.
48
  #
49
+ # Fix:
50
+ # After importing transformers (so its module object is in sys.modules), replace
51
+ # the `get_session` name inside the `transformers.utils.hub` namespace with a
52
+ # lambda that returns a plain requests.Session. A requests.Session accepts all
53
+ # of those kwargs natively, so every existing call in hub.py works unchanged.
54
 
55
+ import transformers.utils.hub as _t_hub # noqa: E402
56
+ import requests as _requests # noqa: E402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ _t_hub.get_session = lambda: _requests.Session()
59
+ print("Patched transformers.utils.hub.get_session β†’ requests.Session()")
60
 
61
+ # ── Phase 3: Safe imports ─────────────────────────────────────────────────────
62
 
63
+ import gradio as gr # noqa: E402
64
+ import torch # noqa: E402
65
+ from transformers import pipeline # noqa: E402
66
 
67
+ # ── App setup ─────────────────────────────────────────────────────────────────
68
 
69
  DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"
70
 
71
  AVAILABLE_MODELS = {
72
+ "sshleifer/distilbart-cnn-6-6": "Fast & light, good for general summarization",
73
+ "facebook/bart-large-cnn": "Larger BART model, better detail retention",
74
+ "google/pegasus-cnn_dailymail": "Pegasus model for high-quality summarization",
75
+ "allenai/led-base-16384": "Handles longer scientific documents",
76
  }
77
 
78
  print(f"Loading default model: {DEFAULT_MODEL}")
 
140
  return EXAMPLE_TEXTS.get(example_type, "")
141
 
142
 
143
+ # ── Gradio UI ──────────────────────────────────────────────────────────────────
144
 
145
  with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
146
  gr.Markdown("# πŸ“ Multimodel Text Summarization")