emvecchi commited on
Commit
effc4ff
·
verified ·
1 Parent(s): 008365c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -2
app.py CHANGED
@@ -4,6 +4,12 @@ from dataclasses import dataclass, field
4
  from typing import List, Optional, Dict
5
  from PIL import Image
6
 
 
 
 
 
 
 
7
  import numpy as np
8
  import pandas as pd
9
  import streamlit as st
@@ -163,7 +169,117 @@ def display_image(image_path):
163
  with hf_fs.open(image_path) as f:
164
  img = Image.open(f)
165
  st.image(img, caption='8 most contributing properties', use_column_width=True)
166
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  #################################### Streamlit App ####################################
168
 
169
  # Function to navigate rows
@@ -213,7 +329,9 @@ def show_field(f: Field, index: int, data_collected):
213
  if f.name == 'image_name':
214
  display_image(os.path.join(input_repo_path, 'images', value))
215
  elif f.name == 'dialogue_name':
216
- display_dialogue(os.path.join(input_repo_path, 'dialogues', value),max_height_px=500)
 
 
217
  elif f.name == 'patient':
218
  st.markdown(f"**Patient:**  {value}")
219
  else:
 
4
  from typing import List, Optional, Dict
5
  from PIL import Image
6
 
7
+ import re
8
+ import textwrap
9
+ import uuid
10
+ import html as py_html
11
+ from pathlib import Path
12
+
13
  import numpy as np
14
  import pandas as pd
15
  import streamlit as st
 
169
  with hf_fs.open(image_path) as f:
170
  img = Image.open(f)
171
  st.image(img, caption='8 most contributing properties', use_column_width=True)
172
+
173
+ SPEAKER_RE = re.compile(r'^\s*\*\*(T|P):\*\*\s*(.*)$') # matches **T:** or **P:** at start
174
+
175
+ def _read_md_any(path: str) -> str:
176
+ """Read a UTF-8 .md from HF dataset or local disk based on your `filesystem` flag."""
177
+ full = path
178
+ if not full.startswith(input_repo_path):
179
+ full = f"{input_repo_path}/{path}" # allow "dialogues/foo.md"
180
+ if filesystem == "hf":
181
+ with hf_fs.open(full, "rb") as f:
182
+ return f.read().decode("utf-8")
183
+ else:
184
+ return Path(full).read_text(encoding="utf-8")
185
+
186
+ def _wrap_paragraph(text: str, width: int) -> list[str]:
187
+ """Hard-wrap a single paragraph to exactly width chars per line (no hyphen breaks)."""
188
+ if not text.strip():
189
+ return [""]
190
+ return textwrap.wrap(
191
+ text.strip(),
192
+ width=width,
193
+ break_long_words=False,
194
+ break_on_hyphens=False,
195
+ replace_whitespace=False,
196
+ drop_whitespace=False,
197
+ ) or [""]
198
+
199
+ def _md_dialogue_to_lines(md_text: str, width: int) -> list[str]:
200
+ """
201
+ Turn the whole .md dialogue into a list of visual lines:
202
+ - Bold speaker label only (T:/P:) on the first wrapped line of a paragraph.
203
+ - Add a blank line between paragraphs.
204
+ """
205
+ md_text = md_text.replace("\r\n", "\n").replace("\r", "\n").strip("\n")
206
+ paragraphs = re.split(r"\n\s*\n", md_text) # split on blank lines
207
+ out: list[str] = []
208
+
209
+ for p in paragraphs:
210
+ p = p.strip()
211
+ if not p:
212
+ out.append("") # preserve blank paragraph as a blank numbered line
213
+ continue
214
+
215
+ m = SPEAKER_RE.match(p)
216
+ if m:
217
+ speaker, content = m.group(1), m.group(2)
218
+ wrapped = _wrap_paragraph(content, width)
219
+ # first line with bold speaker label
220
+ first = f"<strong>{speaker}:</strong> {py_html.escape(wrapped[0])}".rstrip()
221
+ out.append(first)
222
+ # continuation lines without label
223
+ for w in wrapped[1:]:
224
+ out.append(py_html.escape(w))
225
+ else:
226
+ # plain paragraph (no speaker tag)
227
+ for w in _wrap_paragraph(p, width):
228
+ out.append(py_html.escape(w))
229
+
230
+ # blank line after each paragraph
231
+ out.append("")
232
+ # drop trailing blank if you don't want an extra empty at the end
233
+ if out and out[-1] == "":
234
+ out.pop()
235
+ return out
236
+
237
+ def display_numbered_dialogue(md_path: str,
238
+ title: str = "Session Transcription",
239
+ width_chars: int = 80,
240
+ max_height_px: int = 520):
241
+ """
242
+ Render the .md dialogue like your screenshot:
243
+ - left line numbers
244
+ - wrapped to `width_chars`
245
+ - scrolls inside the box
246
+ """
247
+ text = _read_md_any(md_path)
248
+ lines = _md_dialogue_to_lines(text, width=width_chars)
249
+
250
+ block_id = f"dlg-{uuid.uuid4().hex[:8]}"
251
+ # build HTML rows
252
+ rows_html = "\n".join(
253
+ f"<div class='row'><span class='num'>{i}</span><span class='txt'>{ln or '&nbsp;'}</span></div>"
254
+ for i, ln in enumerate(lines, 1)
255
+ )
256
+
257
+ st.markdown(f"""
258
+ <style>
259
+ #{block_id} {{
260
+ border: 1px solid #e6e6e6; border-radius: .6rem; padding: .8rem 1rem; background: #fff;
261
+ max-height: {max_height_px}px; overflow-y: auto;
262
+ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, "Liberation Mono", monospace;
263
+ line-height: 1.5;
264
+ }}
265
+ #{block_id} .row {{
266
+ display: grid; grid-template-columns: 4ch 1fr; column-gap: 1ch; align-items: start;
267
+ }}
268
+ #{block_id} .num {{
269
+ color: rgba(0,0,0,.55); text-align: right; user-select: none;
270
+ }}
271
+ /* The text column is visually constrained to `width_chars` characters */
272
+ #{block_id} .txt {{
273
+ max-width: {width_chars}ch; white-space: pre-wrap; word-break: break-word;
274
+ }}
275
+ #{block_id} strong {{ font-weight: 700; }}
276
+ </style>
277
+ """, unsafe_allow_html=True)
278
+
279
+ with st.container(border=True):
280
+ st.markdown(f"**{title}** \n*(wrapped to {width_chars} chars; numbers by visual lines)*")
281
+ st.markdown(f"<div id='{block_id}'>{rows_html}</div>", unsafe_allow_html=True)
282
+
283
  #################################### Streamlit App ####################################
284
 
285
  # Function to navigate rows
 
329
  if f.name == 'image_name':
330
  display_image(os.path.join(input_repo_path, 'images', value))
331
  elif f.name == 'dialogue_name':
332
+ #display_dialogue(os.path.join(input_repo_path, 'dialogues', value))
333
+ display_numbered_dialogue(os.path.join(input_repo_path, 'images', value), width_chars=80, max_height_px=520)
334
+
335
  elif f.name == 'patient':
336
  st.markdown(f"**Patient:**&nbsp;&nbsp;{value}")
337
  else: