erkwoo commited on
Commit
5a14485
·
verified ·
1 Parent(s): 53eb5f3

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +18 -12
  2. app.py +81 -0
  3. requirements.txt +1 -0
README.md CHANGED
@@ -1,12 +1,18 @@
1
- ---
2
- title: Batongformatering
3
- emoji: 🐢
4
- colorFrom: red
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.49.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
1
+ # Transkript-formatter (Hugging Face Space)
2
+
3
+ Lim inn transkriptet ditt i venstre tekstboks og trykk **Formatér**.
4
+ Appen bevarer tidsstempel og slår sammen alle linjer under hvert tidsstempel til én linje.
5
+ Du kan kopiere resultatet eller laste det ned som `.txt`.
6
+
7
+ ## Lokal kjøring
8
+ 1. `pip install -r requirements.txt`
9
+ 2. `python app.py`
10
+ 3. Åpne `http://localhost:7860` i nettleseren.
11
+
12
+ ## Deploy Hugging Face Spaces
13
+ 1. Lag en ny Space, velg `Gradio`.
14
+ 2. Push disse filene (`app.py`, `requirements.txt`, `README.md`) til Space-repoen.
15
+ 3. Space starter automatisk.
16
+
17
+ ## Personvern
18
+ Teksten som limes inn i denne appen behandles av Space-instansen. Hvis Space settes som offentlig på Hugging Face, blir innholdet ikke indeksert av HF, men du må ikke lime inn sensitive personopplysninger uten å sikre at dere følger egne retningslinjer.
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import re
3
+ import gradio as gr
4
+ from datetime import datetime
5
+
6
+ TIMESTAMP_RE = re.compile(r'(\[\d{2}:\d{2}:\d{2}(?:\.\d{1,3})?\])')
7
+
8
+ def format_transcript(text: str) -> str:
9
+ """
10
+ Bevarer tidsstempelene og slår sammen alle linjer under hvert tidsstempel
11
+ til én linje. Returnerer blokkseparerte tidsstempel-linjer (med ett tomt
12
+ linjeskift mellom hver).
13
+ """
14
+ if not text:
15
+ return ""
16
+ # Normalize line endings
17
+ text = text.replace('\r\n', '\n').replace('\r', '\n')
18
+ # Split text so timestamps are kept as separate list elements
19
+ parts = TIMESTAMP_RE.split(text)
20
+ out_lines = []
21
+ i = 0
22
+ # If text begins without a timestamp, keep initial stray text (rare)
23
+ if not TIMESTAMP_RE.match(parts[0]):
24
+ # attach stray intro to first timestamp if exists, else return cleaned
25
+ if len(parts) > 1:
26
+ # merge into next part
27
+ parts[1] = parts[0] + parts[1]
28
+ i = 1
29
+ else:
30
+ # no timestamps at all, just collapse newlines
31
+ return re.sub(r'\n+', ' ', text).strip()
32
+
33
+ # iterate in steps of 2: [ts, body, ts, body, ...]
34
+ while i < len(parts) - 1:
35
+ ts = parts[i].strip()
36
+ body = parts[i+1]
37
+ # collapse internal newlines to single spaces, but keep punctuation spacing
38
+ body = re.sub(r'\n+', ' ', body).strip()
39
+ body = re.sub(r' {2,}', ' ', body)
40
+ # if body empty, keep timestamp only
41
+ line = f"{ts} {body}".strip()
42
+ out_lines.append(line)
43
+ i += 2
44
+
45
+ # If trailing element without timestamp, append it as stray note
46
+ if i < len(parts):
47
+ tail = parts[i].strip()
48
+ if tail:
49
+ tail = re.sub(r'\n+', ' ', tail).strip()
50
+ out_lines.append(tail)
51
+
52
+ # join with double newline for readability (one blank line between segments)
53
+ return "\n\n".join(out_lines)
54
+
55
+ def download_filename():
56
+ now = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
57
+ return f"formatted_transcript_{now}.txt"
58
+
59
+ def format_and_download(text):
60
+ formatted = format_transcript(text)
61
+ return formatted, (formatted.encode('utf-8'), download_filename())
62
+
63
+ with gr.Blocks(title="Transkript-formatter") as demo:
64
+ gr.Markdown("### Lim inn transkriptet ditt med tidsstempel (f.eks. `[00:00:23.850]`) og trykk Format. Appen bevarer alt innhold og slår sammen linjer under hvert tidsstempel til én linje.")
65
+ with gr.Row():
66
+ inp = gr.Textbox(lines=20, label="Råtekst (lim inn her)", placeholder="Lim inn hele transkriptet her...")
67
+ out = gr.Textbox(lines=20, label="Formatert tekst")
68
+ with gr.Row():
69
+ btn = gr.Button("Formatér")
70
+ download = gr.Download(label="Last ned .txt (formatert)", visible=True)
71
+ copy_btn = gr.Button("Kopier til utklippstavlen")
72
+ btn.click(lambda t: format_transcript(t), inputs=inp, outputs=out)
73
+ # provide download via a separate function so HF/Gradio supports it
74
+ download.click(format_and_download, inputs=inp, outputs=[out, download])
75
+ # copy to clipboard (browser side) - built-in from Gradio v3.XX handles via update
76
+ def copy_to_clipboard(text):
77
+ return gr.update(value=text)
78
+ copy_btn.click(copy_to_clipboard, inputs=out, outputs=out)
79
+
80
+ if __name__ == "__main__":
81
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ gradio>=3.34