Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,9 +32,7 @@ processor = AutoProcessor.from_pretrained(config.get("clip_model_name"), trust_
|
|
| 32 |
audio_model = whisperx.load_model('tiny', 'cpu', compute_type="float32")
|
| 33 |
|
| 34 |
|
| 35 |
-
def generate_answers(img=None, aud = None, q = None, max_tokens = 30):
|
| 36 |
-
print(img, aud, q)
|
| 37 |
-
|
| 38 |
batch_size = 1
|
| 39 |
start_iq = tokenizer.encode("<iQ>")
|
| 40 |
end_iq = tokenizer.encode("</iQ>")
|
|
@@ -78,7 +76,6 @@ def generate_answers(img=None, aud = None, q = None, max_tokens = 30):
|
|
| 78 |
|
| 79 |
for pos in range(max_tokens - 1):
|
| 80 |
model_output_logits = phi2_model.forward(inputs_embeds = combined_embeds)['logits']
|
| 81 |
-
print(model_output_logits.shape)
|
| 82 |
predicted_word_token_logits = model_output_logits[:, -1, :].unsqueeze(1)
|
| 83 |
predicted_word_token = torch.argmax(predicted_word_token_logits, dim = -1)
|
| 84 |
predicted_caption[:, pos] = predicted_word_token.view(1,-1).to('cpu')
|
|
|
|
| 32 |
audio_model = whisperx.load_model('tiny', 'cpu', compute_type="float32")
|
| 33 |
|
| 34 |
|
| 35 |
+
def generate_answers(img=None, aud = None, q = None, max_tokens = 30):
|
|
|
|
|
|
|
| 36 |
batch_size = 1
|
| 37 |
start_iq = tokenizer.encode("<iQ>")
|
| 38 |
end_iq = tokenizer.encode("</iQ>")
|
|
|
|
| 76 |
|
| 77 |
for pos in range(max_tokens - 1):
|
| 78 |
model_output_logits = phi2_model.forward(inputs_embeds = combined_embeds)['logits']
|
|
|
|
| 79 |
predicted_word_token_logits = model_output_logits[:, -1, :].unsqueeze(1)
|
| 80 |
predicted_word_token = torch.argmax(predicted_word_token_logits, dim = -1)
|
| 81 |
predicted_caption[:, pos] = predicted_word_token.view(1,-1).to('cpu')
|