semanticnoodles commited on
Commit
f98a415
Β·
verified Β·
1 Parent(s): a1163d8

Software loading logic rewrite

Browse files

Massive rewrite based on GColab working code for the 3 softwares instances.

Files changed (4) hide show
  1. README.md +3 -3
  2. app.py +161 -55
  3. packages.txt +1 -0
  4. requirements.txt +1 -0
README.md CHANGED
@@ -2,9 +2,9 @@
2
  title: FormatAnalyser
3
  emoji: πŸ—‚οΈ
4
  colorFrom: red
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 6.9.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
@@ -76,4 +76,4 @@ python app.py
76
 
77
  - **JHOVE installer URL** may change with new releases β€” check [openpreservation.org](https://openpreservation.org/products/jhove/) if install fails
78
  - **DROID signature file** is downloaded from The National Archives; the version number in `app.py` may need updating over time
79
- - On Hugging Face Spaces the first analysis run triggers tool installation; subsequent runs in the same session skip this step
 
2
  title: FormatAnalyser
3
  emoji: πŸ—‚οΈ
4
  colorFrom: red
5
+ colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: "4.44.0"
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
76
 
77
  - **JHOVE installer URL** may change with new releases β€” check [openpreservation.org](https://openpreservation.org/products/jhove/) if install fails
78
  - **DROID signature file** is downloaded from The National Archives; the version number in `app.py` may need updating over time
79
+ - On Hugging Face Spaces the first analysis run triggers tool installation; subsequent runs in the same session skip this step
app.py CHANGED
@@ -14,6 +14,10 @@ import shutil
14
  import tempfile
15
  import threading
16
  from pathlib import Path
 
 
 
 
17
 
18
  import gradio as gr
19
  import pandas as pd
@@ -53,50 +57,109 @@ def setup_tools():
53
  log.append("βœ“ Siegfried already on PATH")
54
  else:
55
  log.append("⬇ Installing Siegfried…")
56
- run_cmd("apt-get install -qq -y wget 2>/dev/null || true")
 
 
 
 
57
  run_cmd(
58
  "wget -q https://github.com/richardlehane/siegfried/releases/download/"
59
- "v1.11.1/siegfried_1.11.1_linux64.tar.gz -O /tmp/sf.tar.gz"
60
  )
61
- run_cmd("tar -xzf /tmp/sf.tar.gz -C /usr/local/bin sf")
62
- run_cmd("chmod +x /usr/local/bin/sf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  run_cmd("sf -update 2>&1 || true")
 
64
  if shutil.which("sf"):
65
  TOOL_STATE["siegfried"]["ready"] = True
66
- log.append("βœ“ Siegfried installed")
 
 
67
  else:
68
  log.append("βœ— Siegfried install failed")
69
 
70
  # ── DROID ─────────────────────────────────────────────────────────────────
71
- DROID_VERSION = "6.7.0"
72
- jar_candidates = [
73
- f"/opt/droid/droid-command-line-{DROID_VERSION}.jar",
74
- "/opt/droid/droid-command-line.jar",
75
- ]
76
  sig_path = "/root/.droid6/signature_files/DROID_SignatureFile_V118.xml"
77
 
78
- jar_path = next((p for p in jar_candidates if os.path.exists(p)), None)
79
- if jar_path is None:
80
- stdout, _, _ = run_cmd("find /opt/droid -name '*.jar' 2>/dev/null | head -1")
81
- if stdout:
82
- jar_path = stdout
83
-
84
- if jar_path and os.path.exists(jar_path):
85
  TOOL_STATE["droid"]["ready"] = True
86
- TOOL_STATE["droid"]["jar"] = jar_path
87
- log.append(f"βœ“ DROID already at {jar_path}")
88
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  log.append("⬇ Installing DROID…")
90
- run_cmd("apt-get install -qq -y default-jre unzip wget 2>/dev/null || true")
 
91
  DROID_URL = (
92
  f"https://github.com/digital-preservation/droid/releases/download/"
93
- f"droid-{DROID_VERSION}/droid-command-line-{DROID_VERSION}-bin.zip"
94
  )
95
  run_cmd(f"wget -q {DROID_URL} -O /tmp/droid.zip")
96
- run_cmd("unzip -q /tmp/droid.zip -d /opt/droid")
97
- stdout, _, _ = run_cmd("find /opt/droid -name '*.jar' 2>/dev/null | head -1")
 
98
  if stdout:
99
  jar_path = stdout
 
 
 
100
  TOOL_STATE["droid"]["ready"] = True
101
  TOOL_STATE["droid"]["jar"] = jar_path
102
  log.append(f"βœ“ DROID installed: {jar_path}")
@@ -117,36 +180,67 @@ def setup_tools():
117
  log.append("⚠ DROID signature file missing β€” results may be limited")
118
 
119
  # ── JHOVE ──────────────────────────────────────────────────────────────────
120
- jhove_sh = "/opt/jhove/jhove"
121
- if not os.path.exists(jhove_sh):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  stdout, _, _ = run_cmd(
123
  "find /opt/jhove -name 'jhove' -not -name '*.jar' 2>/dev/null | head -1"
124
  )
125
- if stdout:
126
- jhove_sh = stdout
127
 
128
- if os.path.exists(jhove_sh):
129
- TOOL_STATE["jhove"]["ready"] = True
130
- TOOL_STATE["jhove"]["path"] = jhove_sh
131
- log.append(f"βœ“ JHOVE already at {jhove_sh}")
132
- else:
133
  log.append("⬇ Installing JHOVE (may take ~2 min)…")
134
- run_cmd("apt-get install -qq -y default-jre wget 2>/dev/null || true")
135
- run_cmd(
136
- "wget -q https://software.openpreservation.org/rel/jhove/"
137
- "jhove-xplt-installer-1.28.0.jar -O /tmp/jhove-installer.jar"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  )
139
- run_cmd("java -jar /tmp/jhove-installer.jar -console -dir /opt/jhove")
140
- if not os.path.exists(jhove_sh):
141
- stdout, _, _ = run_cmd(
142
- "find /opt/jhove -name 'jhove' -not -name '*.jar' 2>/dev/null | head -1"
143
- )
144
- if stdout:
145
- jhove_sh = stdout
146
- if os.path.exists(jhove_sh):
147
- TOOL_STATE["jhove"]["ready"] = True
148
- TOOL_STATE["jhove"]["path"] = jhove_sh
149
- log.append(f"βœ“ JHOVE installed: {jhove_sh}")
150
  else:
151
  log.append("βœ— JHOVE install failed")
152
 
@@ -193,14 +287,26 @@ def run_droid(filepath: str) -> dict:
193
  with tempfile.TemporaryDirectory() as tmp:
194
  profile = os.path.join(tmp, "profile.droid")
195
  csv_out = os.path.join(tmp, "export.csv")
196
- subprocess.run(
197
- ["java", "-jar", jar, "-a", filepath, "-Ns", sig, "-p", profile],
198
- capture_output=True, text=True, timeout=120
199
- )
200
- export = subprocess.run(
201
- ["java", "-jar", jar, "-p", profile, "-e", csv_out],
202
- capture_output=True, text=True, timeout=60
203
- )
 
 
 
 
 
 
 
 
 
 
 
 
204
  if os.path.exists(csv_out):
205
  with open(csv_out) as f:
206
  rows = list(csv.DictReader(f))
 
14
  import tempfile
15
  import threading
16
  from pathlib import Path
17
+ try:
18
+ import requests
19
+ except ImportError:
20
+ requests = None
21
 
22
  import gradio as gr
23
  import pandas as pd
 
57
  log.append("βœ“ Siegfried already on PATH")
58
  else:
59
  log.append("⬇ Installing Siegfried…")
60
+ # try to fetch via apt/wget as before, but installation may not permit
61
+ # writing to /usr/local/bin on some environments (e.g. Colab), so we
62
+ # also support a user-space fallback where the binary is kept in a
63
+ # local directory and added to PATH.
64
+ run_cmd("apt-get install -qq -y wget unzip 2>/dev/null || true")
65
  run_cmd(
66
  "wget -q https://github.com/richardlehane/siegfried/releases/download/"
67
+ "v1.11.4/siegfried_1-11-4_linux64.zip -O /tmp/sf.zip"
68
  )
69
+ run_cmd("unzip -q /tmp/sf.zip -d /tmp/sf")
70
+ sf_bin = "/tmp/sf/sf"
71
+ # if the download/unzip above failed (no wget or unzip), try Python
72
+ if not os.path.exists(sf_bin):
73
+ # pure-Python fallback inspired by Colab snippet
74
+ siegfried_url = (
75
+ "https://github.com/richardlehane/siegfried/"
76
+ "releases/download/v1.11.4/siegfried_1-11-4_linux64.zip"
77
+ )
78
+ local_filename = os.path.join("/tmp", siegfried_url.split("/")[-1])
79
+ try:
80
+ import requests, zipfile
81
+
82
+ with requests.get(siegfried_url, stream=True) as r:
83
+ r.raise_for_status()
84
+ with open(local_filename, "wb") as f:
85
+ for chunk in r.iter_content(chunk_size=8192):
86
+ f.write(chunk)
87
+ os.makedirs("/tmp/sf", exist_ok=True)
88
+ with zipfile.ZipFile(local_filename, "r") as zip_ref:
89
+ zip_ref.extractall(path="/tmp/sf")
90
+ except Exception:
91
+ pass
92
+ # ensure executable
93
+ if os.path.exists(sf_bin):
94
+ run_cmd(f"chmod +x {sf_bin}")
95
+ # attempt to move into system path, but don't fail if we can't
96
+ moved = False
97
+ try:
98
+ run_cmd(f"mv {sf_bin} /usr/local/bin/sf")
99
+ run_cmd("chmod +x /usr/local/bin/sf")
100
+ moved = True
101
+ except Exception:
102
+ # ignore permission issues
103
+ pass
104
+
105
+ # always make sure the directory containing the binary is in PATH
106
+ os.environ["PATH"] += os.pathsep + os.path.dirname(sf_bin)
107
+
108
+ # update signatures using whatever sf binary is found in PATH
109
  run_cmd("sf -update 2>&1 || true")
110
+
111
  if shutil.which("sf"):
112
  TOOL_STATE["siegfried"]["ready"] = True
113
+ log.append(
114
+ "βœ“ Siegfried installed" + (" to /usr/local/bin" if moved else " (user path)")
115
+ )
116
  else:
117
  log.append("βœ— Siegfried install failed")
118
 
119
  # ── DROID ─────────────────────────────────────────────────────────────────
120
+ DROID_VERSION = "6.9.12"
 
 
 
 
121
  sig_path = "/root/.droid6/signature_files/DROID_SignatureFile_V118.xml"
122
 
123
+ # check for either jar or helper script on PATH
124
+ jar_path = None
125
+ script_path = shutil.which("droid") or shutil.which("droid.sh")
126
+ if script_path:
127
+ # we can rely on the script to locate the jar itself later
 
 
128
  TOOL_STATE["droid"]["ready"] = True
129
+ TOOL_STATE["droid"]["jar"] = script_path
130
+ log.append(f"βœ“ DROID script already on PATH ({script_path})")
131
  else:
132
+ jar_candidates = [
133
+ f"/opt/droid/droid-command-line-{DROID_VERSION}.jar",
134
+ "/opt/droid/droid-command-line.jar",
135
+ ]
136
+ jar_path = next((p for p in jar_candidates if os.path.exists(p)), None)
137
+ if jar_path is None:
138
+ stdout, _, _ = run_cmd("find /opt/droid -name '*.jar' 2>/dev/null | head -1")
139
+ if stdout:
140
+ jar_path = stdout
141
+ if jar_path and os.path.exists(jar_path):
142
+ TOOL_STATE["droid"]["ready"] = True
143
+ TOOL_STATE["droid"]["jar"] = jar_path
144
+ log.append(f"βœ“ DROID already at {jar_path}")
145
+ # if we still aren't ready, attempt installation/fallback
146
+ if not TOOL_STATE["droid"]["ready"]:
147
  log.append("⬇ Installing DROID…")
148
+ run_cmd("apt-get install -qq -y openjdk-21-jre-headless default-jre unzip wget 2>/dev/null || true")
149
+ # primary mechanism: download the binary bundle and extract locally
150
  DROID_URL = (
151
  f"https://github.com/digital-preservation/droid/releases/download/"
152
+ f"{DROID_VERSION}/droid-binary-{DROID_VERSION}-bin.zip"
153
  )
154
  run_cmd(f"wget -q {DROID_URL} -O /tmp/droid.zip")
155
+ run_cmd("mkdir -p /tmp/droid && unzip -q /tmp/droid.zip -d /tmp/droid")
156
+ # look for jar in extracted tree
157
+ stdout, _, _ = run_cmd("find /tmp/droid -name '*.jar' 2>/dev/null | head -1")
158
  if stdout:
159
  jar_path = stdout
160
+ # also add the containing directory to PATH so droid.sh can execute
161
+ dirname = os.path.dirname(jar_path)
162
+ os.environ["PATH"] += os.pathsep + dirname
163
  TOOL_STATE["droid"]["ready"] = True
164
  TOOL_STATE["droid"]["jar"] = jar_path
165
  log.append(f"βœ“ DROID installed: {jar_path}")
 
180
  log.append("⚠ DROID signature file missing β€” results may be limited")
181
 
182
  # ── JHOVE ──────────────────────────────────────────────────────────────────
183
+ # target installation directory (fallback to /opt if possible)
184
+ jhove_dir = "/opt/jhove"
185
+ jhove_bin = os.path.join(jhove_dir, "jhove")
186
+
187
+ # helper that sets state when we have a working binary
188
+ def set_jhove(path):
189
+ TOOL_STATE["jhove"]["ready"] = True
190
+ TOOL_STATE["jhove"]["path"] = path
191
+ log.append(f"βœ“ JHOVE ready at {path}")
192
+ # make sure environment is aware of it
193
+ os.environ["PATH"] += os.pathsep + os.path.dirname(path)
194
+ os.environ["JHOVE_HOME"] = os.path.dirname(path)
195
+
196
+ if os.path.exists(jhove_bin):
197
+ set_jhove(jhove_bin)
198
+ else:
199
+ # search anywhere under /opt/jhove in case installer wrote elsewhere
200
  stdout, _, _ = run_cmd(
201
  "find /opt/jhove -name 'jhove' -not -name '*.jar' 2>/dev/null | head -1"
202
  )
203
+ if stdout and os.path.exists(stdout):
204
+ set_jhove(stdout)
205
 
206
+ if not TOOL_STATE["jhove"]["ready"]:
 
 
 
 
207
  log.append("⬇ Installing JHOVE (may take ~2 min)…")
208
+ # Ensure Java 21 is available
209
+ run_cmd("apt-get install -qq -y openjdk-21-jre-headless wget 2>/dev/null || true")
210
+ # download installer jar
211
+ run_cmd("wget -q -O /tmp/jhove-latest.jar https://software.openpreservation.org/rel/jhove-latest.jar")
212
+
213
+ # create an automated installation xml and run it
214
+ config_xml = "/tmp/jhove-auto.xml"
215
+ install_target = jhove_dir
216
+ xml_content = f"""<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>
217
+ <AutomatedInstallation langpack=\"eng\">
218
+ <com.izforge.izpack.panels.htmlhello.HTMLHelloPanel id=\"welcome\"/>
219
+ <com.izforge.izpack.panels.target.TargetPanel id=\"install_dir\">
220
+ <installpath>{install_target}</installpath>
221
+ </com.izforge.izpack.panels.target.TargetPanel>
222
+ <com.izforge.izpack.panels.packs.PacksPanel id=\"sdk_pack_select\">
223
+ <pack index=\"0\" name=\"JHOVE Application\" selected=\"true\"/>
224
+ <pack index=\"1\" name=\"JHOVE Shell Scripts\" selected=\"true\"/>
225
+ <pack index=\"2\" name=\"JHOVE External Modules\" selected=\"true\"/>
226
+ </com.izforge.izpack.panels.packs.PacksPanel>
227
+ <com.izforge.izpack.panels.install.InstallPanel id=\"install\"/>
228
+ <com.izforge.izpack.panels.finish.FinishPanel id=\"finish\"/>
229
+ </AutomatedInstallation>
230
+ """
231
+ try:
232
+ with open(config_xml, "w") as f:
233
+ f.write(xml_content)
234
+ run_cmd(f"java -jar /tmp/jhove-latest.jar {config_xml}")
235
+ except Exception:
236
+ pass
237
+
238
+ # look for the binary again in the installation directory
239
+ stdout, _, _ = run_cmd(
240
+ f"find {install_target} -name 'jhove' -not -name '*.jar' 2>/dev/null | head -1"
241
  )
242
+ if stdout and os.path.exists(stdout):
243
+ set_jhove(stdout)
 
 
 
 
 
 
 
 
 
244
  else:
245
  log.append("βœ— JHOVE install failed")
246
 
 
287
  with tempfile.TemporaryDirectory() as tmp:
288
  profile = os.path.join(tmp, "profile.droid")
289
  csv_out = os.path.join(tmp, "export.csv")
290
+ # if jar points to a shell script, invoke it directly; otherwise
291
+ # assume it's a jar and run with java -jar.
292
+ if jar.endswith(".sh") or jar.endswith("droid"):
293
+ subprocess.run(
294
+ [jar, "-a", filepath, "-Ns", sig, "-p", profile],
295
+ capture_output=True, text=True, timeout=120
296
+ )
297
+ export = subprocess.run(
298
+ [jar, "-p", profile, "-e", csv_out],
299
+ capture_output=True, text=True, timeout=60
300
+ )
301
+ else:
302
+ subprocess.run(
303
+ ["java", "-jar", jar, "-a", filepath, "-Ns", sig, "-p", profile],
304
+ capture_output=True, text=True, timeout=120
305
+ )
306
+ export = subprocess.run(
307
+ ["java", "-jar", jar, "-p", profile, "-e", csv_out],
308
+ capture_output=True, text=True, timeout=60
309
+ )
310
  if os.path.exists(csv_out):
311
  with open(csv_out) as f:
312
  rows = list(csv.DictReader(f))
packages.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  default-jre
2
  wget
3
  unzip
 
1
+ openjdk-21-jre-headless
2
  default-jre
3
  wget
4
  unzip
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  gradio>=4.0.0
2
  pandas>=2.0.0
 
 
1
  gradio>=4.0.0
2
  pandas>=2.0.0
3
+ requests>=2.0.0