flozi00 commited on
Commit
61c5634
·
1 Parent(s): 003af5a
Files changed (2) hide show
  1. app.py +7 -19
  2. requirements.txt +1 -2
app.py CHANGED
@@ -13,7 +13,6 @@ import gradio.themes as gr_themes
13
  import numpy as np
14
  import spaces
15
  import torch
16
- from deepmultilingualpunctuation import PunctuationModel
17
  from huggingface_hub import CommitScheduler, hf_hub_download
18
  from nemo.collections.asr.models import ASRModel
19
  from pydub import AudioSegment
@@ -75,13 +74,6 @@ whisper_tiny_model = AutoModelForSpeechSeq2Seq.from_pretrained(
75
  )
76
  whisper_tiny_processor = AutoProcessor.from_pretrained(whisper_tiny_model_id)
77
 
78
- # Load punctuation restoration model
79
- print("Loading punctuation restoration model...")
80
- punctuation_model = PunctuationModel(
81
- model="oliverguhr/fullstop-punctuation-multilingual-sonar-base"
82
- )
83
- print("Punctuation model loaded successfully.")
84
-
85
 
86
  def start_session(request: gr.Request):
87
  session_hash = request.session_hash
@@ -172,20 +164,16 @@ def generate_srt_content(
172
 
173
  def restore_punctuation(text: str) -> str:
174
  """
175
- Restore punctuation in a text using the punctuation model.
176
- Also fixes casing after sentence-ending punctuation (. ? !).
177
- Returns the text with punctuation and proper casing.
178
  """
179
  if not text or text.strip() == "":
180
  return text
181
- try:
182
- punctuated_text = punctuation_model.restore_punctuation(text)
183
- # Fix casing after sentence-ending punctuation
184
- punctuated_text = fix_sentence_casing(punctuated_text)
185
- return punctuated_text
186
- except Exception as e:
187
- print(f"Warning: Punctuation restoration failed: {e}")
188
- return text
189
 
190
 
191
  def fix_sentence_casing(text: str) -> str:
 
13
  import numpy as np
14
  import spaces
15
  import torch
 
16
  from huggingface_hub import CommitScheduler, hf_hub_download
17
  from nemo.collections.asr.models import ASRModel
18
  from pydub import AudioSegment
 
74
  )
75
  whisper_tiny_processor = AutoProcessor.from_pretrained(whisper_tiny_model_id)
76
 
 
 
 
 
 
 
 
77
 
78
  def start_session(request: gr.Request):
79
  session_hash = request.session_hash
 
164
 
165
  def restore_punctuation(text: str) -> str:
166
  """
167
+ Apply basic text formatting (capitalize first letter).
168
+ Returns the text with basic formatting.
 
169
  """
170
  if not text or text.strip() == "":
171
  return text
172
+ # Just capitalize the first letter if it's lowercase
173
+ text = text.strip()
174
+ if text and text[0].islower():
175
+ text = text[0].upper() + text[1:]
176
+ return text
 
 
 
177
 
178
 
179
  def fix_sentence_casing(text: str) -> str:
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  Cython
2
  cuda-python
3
  nemo_toolkit[asr]==2.5.0
4
- transformers
5
- deepmultilingualpunctuation
 
1
  Cython
2
  cuda-python
3
  nemo_toolkit[asr]==2.5.0
4
+ transformers