mazesmazes commited on
Commit
e1f4524
·
verified ·
1 Parent(s): cbe00af

Update custom model files, README, and requirements

Browse files
Files changed (3) hide show
  1. asr_modeling.py +0 -8
  2. asr_pipeline.py +0 -4
  3. requirements.txt +5 -9
asr_modeling.py CHANGED
@@ -629,8 +629,6 @@ class ASRModel(PreTrainedModel):
629
  if audio_inputs is None:
630
  raise ValueError("input_values or input_features must be provided for generation")
631
 
632
- # Debug: Check audio inputs
633
-
634
  audio_embeds = self._encode_audio(audio_inputs)
635
  batch_size = audio_embeds.shape[0]
636
  device = audio_embeds.device
@@ -673,12 +671,6 @@ class ASRModel(PreTrainedModel):
673
 
674
  num_audio_tokens = audio_embeds.shape[1]
675
  expanded_prompt_ids = self._expand_audio_tokens(prompt_ids, num_audio_tokens)
676
-
677
- # Debug: Show what prompt we built
678
- import sys
679
- prompt_text = self.tokenizer.decode(expanded_prompt_ids[0], skip_special_tokens=False)
680
- print(f"DEBUG generate: Built prompt: {prompt_text[:200]}", file=sys.stderr)
681
-
682
  inputs_embeds = self._prepare_audio_inputs_embeds(expanded_prompt_ids, audio_embeds)
683
  total_seq_len = inputs_embeds.shape[1]
684
  attention_mask = torch.ones(batch_size, total_seq_len, dtype=torch.long, device=device)
 
629
  if audio_inputs is None:
630
  raise ValueError("input_values or input_features must be provided for generation")
631
 
 
 
632
  audio_embeds = self._encode_audio(audio_inputs)
633
  batch_size = audio_embeds.shape[0]
634
  device = audio_embeds.device
 
671
 
672
  num_audio_tokens = audio_embeds.shape[1]
673
  expanded_prompt_ids = self._expand_audio_tokens(prompt_ids, num_audio_tokens)
 
 
 
 
 
 
674
  inputs_embeds = self._prepare_audio_inputs_embeds(expanded_prompt_ids, audio_embeds)
675
  total_seq_len = inputs_embeds.shape[1]
676
  attention_mask = torch.ones(batch_size, total_seq_len, dtype=torch.long, device=device)
asr_pipeline.py CHANGED
@@ -219,10 +219,6 @@ class ASRPipeline(transformers.AutomaticSpeechRecognitionPipeline):
219
  generate_kwargs.setdefault("eos_token_id", im_end_id)
220
  generate_kwargs.setdefault("max_new_tokens", self.model.config.max_new_tokens)
221
 
222
- # Debug: Log what we're passing to generate
223
- import sys
224
- print(f"DEBUG _forward: task={task}, system_prompt={self.model.config.system_prompt}", file=sys.stderr)
225
-
226
  # Pass the appropriate input type to generate
227
  if is_whisper:
228
  # Whisper model - use input_features
 
219
  generate_kwargs.setdefault("eos_token_id", im_end_id)
220
  generate_kwargs.setdefault("max_new_tokens", self.model.config.max_new_tokens)
221
 
 
 
 
 
222
  # Pass the appropriate input type to generate
223
  if is_whisper:
224
  # Whisper model - use input_features
requirements.txt CHANGED
@@ -1,9 +1,5 @@
1
- # Use latest compatible versions
2
- gradio
3
- transformers>=4.57.1
4
- torch
5
- torchaudio
6
- torchcodec
7
- peft
8
- truecase
9
- nltk
 
1
+ # Core dependencies for tiny-audio model inference
2
+ # This file is pushed to HuggingFace for model repository
3
+
4
+ # Transformers - main library for model loading and inference
5
+ transformers>=4.57.0