Vasudevakrishna commited on
Commit
b89d6d1
·
verified ·
1 Parent(s): 75dfe2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -4
app.py CHANGED
@@ -32,9 +32,7 @@ processor = AutoProcessor.from_pretrained(config.get("clip_model_name"), trust_
32
  audio_model = whisperx.load_model('tiny', 'cpu', compute_type="float32")
33
 
34
 
35
- def generate_answers(img=None, aud = None, q = None, max_tokens = 30):
36
- print(img, aud, q)
37
-
38
  batch_size = 1
39
  start_iq = tokenizer.encode("<iQ>")
40
  end_iq = tokenizer.encode("</iQ>")
@@ -78,7 +76,6 @@ def generate_answers(img=None, aud = None, q = None, max_tokens = 30):
78
 
79
  for pos in range(max_tokens - 1):
80
  model_output_logits = phi2_model.forward(inputs_embeds = combined_embeds)['logits']
81
- print(model_output_logits.shape)
82
  predicted_word_token_logits = model_output_logits[:, -1, :].unsqueeze(1)
83
  predicted_word_token = torch.argmax(predicted_word_token_logits, dim = -1)
84
  predicted_caption[:, pos] = predicted_word_token.view(1,-1).to('cpu')
 
32
  audio_model = whisperx.load_model('tiny', 'cpu', compute_type="float32")
33
 
34
 
35
+ def generate_answers(img=None, aud = None, q = None, max_tokens = 30):
 
 
36
  batch_size = 1
37
  start_iq = tokenizer.encode("<iQ>")
38
  end_iq = tokenizer.encode("</iQ>")
 
76
 
77
  for pos in range(max_tokens - 1):
78
  model_output_logits = phi2_model.forward(inputs_embeds = combined_embeds)['logits']
 
79
  predicted_word_token_logits = model_output_logits[:, -1, :].unsqueeze(1)
80
  predicted_word_token = torch.argmax(predicted_word_token_logits, dim = -1)
81
  predicted_caption[:, pos] = predicted_word_token.view(1,-1).to('cpu')