emvecchi commited on
Commit
9fb08f5
·
verified ·
1 Parent(s): ef18f49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -86
app.py CHANGED
@@ -4,8 +4,9 @@ from dataclasses import dataclass, field
4
  from typing import List, Optional, Dict
5
  from PIL import Image
6
 
7
- import re, textwrap, uuid, html as py_html
8
  from pathlib import Path
 
9
 
10
  import numpy as np
11
  import pandas as pd
@@ -34,7 +35,6 @@ def get_param_from_url(param):
34
  return user_id
35
 
36
  ########################################################################################
37
- # CHANGE THE FOLLOWING VARIABLES ACCORDING TO YOUR NEEDS
38
 
39
  # 'local' or 'hf'. hf is for Hugging Face file system but has limits on the number of access per hour
40
  filesystem = 'hf'
@@ -167,31 +167,34 @@ def display_image(image_path):
167
  img = Image.open(f)
168
  st.image(img, caption='8 most contributing properties', use_column_width=True)
169
 
170
- TEXT_STACK = "system-ui, -apple-system, 'Segoe UI', Roboto, Helvetica, Arial, 'Noto Sans', 'Liberation Sans', sans-serif"
171
- MONO_STACK = "ui-monospace, SFMono-Regular, Menlo, Consolas, 'Liberation Mono', 'Roboto Mono', monospace"
172
- SPEAKER_RE = re.compile(r'^\s*\*\*(T|P):\*\*\s*(.*)$')
173
 
174
 
175
- def _read_md_any(path: str) -> str:
176
- full = path if path.startswith(input_repo_path) else f"{input_repo_path}/{path}"
177
- if filesystem == "hf":
178
- with hf_fs.open(full, "rb") as f:
 
 
 
179
  return f.read().decode("utf-8")
180
- return Path(full).read_text(encoding="utf-8")
 
 
181
 
182
- def _wrap_paragraph(text: str, width: int) -> list[str]:
183
  if not text.strip():
184
  return [""]
185
  return textwrap.wrap(
186
- text.strip().replace("\u00A0", " "), # normalize NBSP
187
  width=width,
188
  break_long_words=False,
189
  break_on_hyphens=False,
190
- drop_whitespace=True, # <- prevents leading spaces on next line
191
  replace_whitespace=True,
192
  ) or [""]
193
 
194
- def _md_dialogue_to_lines(md_text: str, width: int) -> list[str]:
 
195
  md_text = md_text.replace("\r\n", "\n").replace("\r", "\n").strip("\n")
196
  paragraphs = re.split(r"\n\s*\n", md_text)
197
  out = []
@@ -203,92 +206,76 @@ def _md_dialogue_to_lines(md_text: str, width: int) -> list[str]:
203
  m = SPEAKER_RE.match(p)
204
  if m:
205
  speaker, content = m.group(1), m.group(2)
206
- wrapped = _wrap_paragraph(content, width)
207
  out.append(f"<strong>{speaker}:</strong> {py_html.escape(wrapped[0])}".rstrip())
208
  for w in wrapped[1:]:
209
  out.append(py_html.escape(w))
210
  else:
211
- for w in _wrap_paragraph(p, width):
212
  out.append(py_html.escape(w))
213
- out.append("") # blank line between paragraphs
214
  if out and out[-1] == "": out.pop()
215
  return out
216
 
217
- # Inject global CSS once (prevents FOUC and font swaps)
218
- def inject_dialogue_css_once():
219
- if st.session_state.get("_dlg_css_injected"): return
220
- st.session_state["_dlg_css_injected"] = True
221
- st.markdown(f"""
222
- <style id="dlg-css">
223
- /* container styles (applied per instance inline) */
224
-
225
- /* table layout is very stable across rerenders */
226
- .dlg-table {{
227
- border-collapse: separate;
228
- border-spacing: 0 0;
229
- table-layout: fixed; /* prevents column jitter */
230
- width: max-content;
231
- max-width: 100%;
232
- }}
233
- .dlg-row td {{
234
- vertical-align: top;
235
- padding: 0;
236
- }}
237
- .dlg-num {{
238
- width: 4ch; /* fixed gutter */
239
- padding-right: 1ch;
240
- text-align: right;
241
- color: rgba(0,0,0,.55);
242
- user-select: none;
243
- font-family: {MONO_STACK};
244
- font-variant-numeric: tabular-nums;
245
- }}
246
- .dlg-txt {{
247
- white-space: pre-wrap;
248
- word-break: break-word;
249
- }}
250
- .dlg-txt strong {{ font-weight: 700; }}
251
- /* keep rendering consistent */
252
- .dlg-root {{
253
- -webkit-font-smoothing: antialiased;
254
- -moz-osx-font-smoothing: grayscale;
255
- font-synthesis: none;
256
- }}
257
- </style>
258
- """, unsafe_allow_html=True)
259
-
260
- def display_numbered_dialogue(md_path: str,
261
- title: str = "Session Transcription",
262
- width_chars: int = 80,
263
- max_height_px: int = 520,
264
- font_family: str = TEXT_STACK,
265
- font_size: str = "1rem",
266
- show_border: bool = False,
267
- background: str = "transparent"):
268
- inject_dialogue_css_once()
269
- text = _read_md_any(md_path)
270
- lines = _md_dialogue_to_lines(text, width=width_chars)
271
-
272
- block_id = f"dlg-{uuid.uuid4().hex[:8]}"
273
  border_css = "1px solid #e6e6e6" if show_border else "none"
274
  radius_css = ".6rem" if show_border else "0"
275
- pad_css = ".8rem 1rem" if show_border else "0"
276
 
277
- # container + table (no CSS grid)
278
  rows = "\n".join(
279
- f"<tr class='dlg-row'><td class='dlg-num'>{i}</td><td class='dlg-txt'>{ln or '&nbsp;'}</td></tr>"
280
  for i, ln in enumerate(lines, 1)
281
  )
282
 
283
- st.markdown(f"""
284
- <div id="{block_id}" class="dlg-root"
285
- style="border:{border_css}; border-radius:{radius_css}; padding:{pad_css};
286
- background:{background}; max-height:{max_height_px}px; overflow-y:auto;">
287
- <table class="dlg-table" style="font-family:{font_family}; font-size:{font_size};">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  {rows}
289
  </table>
290
  </div>
291
- """, unsafe_allow_html=True)
 
 
292
 
293
 
294
  #################################### Streamlit App ####################################
@@ -341,10 +328,7 @@ def show_field(f: Field, index: int, data_collected):
341
  display_image(os.path.join(input_repo_path, 'images', value))
342
  elif f.name == 'dialogue_name':
343
  #display_dialogue(os.path.join(input_repo_path, 'dialogues', value))
344
- display_numbered_dialogue(os.path.join(input_repo_path, 'dialogues', value), max_height_px=520,
345
- show_border=False, # no border
346
- font_family="system-ui, -apple-system, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif",
347
- font_size="1.05rem",)
348
  elif f.name == 'patient':
349
  st.markdown(f"**Patient:**&nbsp;&nbsp;{value}")
350
  else:
 
4
  from typing import List, Optional, Dict
5
  from PIL import Image
6
 
7
+ import re, textwrap, html as py_html
8
  from pathlib import Path
9
+ from streamlit.components.v1 import html as st_html
10
 
11
  import numpy as np
12
  import pandas as pd
 
35
  return user_id
36
 
37
  ########################################################################################
 
38
 
39
  # 'local' or 'hf'. hf is for Hugging Face file system but has limits on the number of access per hour
40
  filesystem = 'hf'
 
167
  img = Image.open(f)
168
  st.image(img, caption='8 most contributing properties', use_column_width=True)
169
 
 
 
 
170
 
171
 
172
+
173
+ SPEAKER_RE = re.compile(r'^\s*\*\*(T|P):\*\*\s*(.*)$')
174
+
175
+ def read_md(path: str) -> str:
176
+ """Read .md file from HF dataset or local disk."""
177
+ if filesystem == 'hf':
178
+ with hf_fs.open(path, "rb") as f:
179
  return f.read().decode("utf-8")
180
+ else:
181
+ # local path
182
+ return Path(path).read_text(encoding="utf-8")
183
 
184
+ def wrap_para(text: str, width: int) -> list[str]:
185
  if not text.strip():
186
  return [""]
187
  return textwrap.wrap(
188
+ text.strip().replace("\u00A0", " "),
189
  width=width,
190
  break_long_words=False,
191
  break_on_hyphens=False,
192
+ drop_whitespace=True,
193
  replace_whitespace=True,
194
  ) or [""]
195
 
196
+ def md_dialogue_to_visual_lines(md_text: str, width: int) -> list[str]:
197
+ """Return a list of wrapped visual lines with <strong>T:/P:</strong> on first line."""
198
  md_text = md_text.replace("\r\n", "\n").replace("\r", "\n").strip("\n")
199
  paragraphs = re.split(r"\n\s*\n", md_text)
200
  out = []
 
206
  m = SPEAKER_RE.match(p)
207
  if m:
208
  speaker, content = m.group(1), m.group(2)
209
+ wrapped = wrap_para(content, width)
210
  out.append(f"<strong>{speaker}:</strong> {py_html.escape(wrapped[0])}".rstrip())
211
  for w in wrapped[1:]:
212
  out.append(py_html.escape(w))
213
  else:
214
+ for w in wrap_para(p, width):
215
  out.append(py_html.escape(w))
216
+ out.append("") # blank between paragraphs
217
  if out and out[-1] == "": out.pop()
218
  return out
219
 
220
+ def render_dialogue(md_path: str,
221
+ width_chars: int = 80,
222
+ height_px: int = 520,
223
+ font_family: str = "system-ui, -apple-system, 'Segoe UI', Roboto, Helvetica, Arial, 'Noto Sans', sans-serif",
224
+ font_size: str = "1.05rem",
225
+ show_border: bool = False):
226
+ """Render the .md dialogue with line numbers inside an iframe to avoid Streamlit CSS quirks."""
227
+ text = read_md(md_path)
228
+ lines = md_dialogue_to_visual_lines(text, width_chars)
229
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  border_css = "1px solid #e6e6e6" if show_border else "none"
231
  radius_css = ".6rem" if show_border else "0"
232
+ padding_css = ".8rem 1rem" if show_border else "0"
233
 
234
+ # Build stable HTML (table keeps gutter aligned)
235
  rows = "\n".join(
236
+ f"<tr><td class='num'>{i}</td><td class='txt'>{ln or '&nbsp;'}</td></tr>"
237
  for i, ln in enumerate(lines, 1)
238
  )
239
 
240
+ html_doc = f"""
241
+ <!doctype html>
242
+ <meta charset="utf-8">
243
+ <style>
244
+ :root {{
245
+ --font-text: {font_family};
246
+ --font-size: {font_size};
247
+ }}
248
+ html, body {{
249
+ margin:0; padding:0; background:transparent;
250
+ font-family: var(--font-text); font-size: var(--font-size);
251
+ -webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale;
252
+ font-synthesis: none; /* avoid auto fake-bold */
253
+ }}
254
+ .root {{
255
+ border:{border_css}; border-radius:{radius_css}; padding:{padding_css};
256
+ height:{height_px}px; overflow:auto; background:transparent;
257
+ line-height:1.5;
258
+ }}
259
+ table {{ border-collapse:collapse; table-layout:fixed; width:max-content; max-width:100%; }}
260
+ td {{ padding:0; vertical-align:top; }}
261
+ .num {{
262
+ width:4ch; padding-right:1ch; text-align:right; color:rgba(0,0,0,.55);
263
+ user-select:none; font-variant-numeric: tabular-nums;
264
+ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, 'Liberation Mono', monospace;
265
+ }}
266
+ .txt {{
267
+ white-space: pre-wrap; word-break: break-word; max-width:{width_chars}ch;
268
+ }}
269
+ strong {{ font-weight:700; }}
270
+ </style>
271
+ <div class="root">
272
+ <table>
273
  {rows}
274
  </table>
275
  </div>
276
+ """
277
+ # iframe height = inner height; no external scrollbars flashing
278
+ st_html(html_doc, height=height_px + (16 if show_border else 0), scrolling=False)
279
 
280
 
281
  #################################### Streamlit App ####################################
 
328
  display_image(os.path.join(input_repo_path, 'images', value))
329
  elif f.name == 'dialogue_name':
330
  #display_dialogue(os.path.join(input_repo_path, 'dialogues', value))
331
+ render_dialogue(os.path.join(input_repo_path, 'dialogues', value), width_chars=80, height_px=520, show_border=False)
 
 
 
332
  elif f.name == 'patient':
333
  st.markdown(f"**Patient:**&nbsp;&nbsp;{value}")
334
  else: