Spaces:

mskov
/

whisper_fileStream

Runtime error

App Files Files Community

mskov commited on Jan 26, 2023

Commit

5b7e87f

1 Parent(s): e729475

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ import time
-### code snippet
 gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
@@ -61,22 +61,25 @@ def inference(audio, state=""):
     # length penalty for gpt2.generate???
     #Prompt
-    generated_outputs = gpt2.generate(input_ids, do_sample=True, num_return_sequences=3, output_scores=True, max_length=4)
-    outputs = [generated_output[-4:] for generated_output in generated_outputs.tolist()]
     # print("outputs generated ", generated_outputs[0])
     # only use id's that were generated
     # gen_sequences has shape [3, 15]
-    gen_sequences = outputs.sequences[:, input_ids.shape[-1]:]
-    print("gen sequences: ", gen_sequences)
     # let's stack the logits generated at each step to a tensor and transform
     # logits to probs
-    probs = torch.stack(generated_outputs.scores, dim=1).softmax(-1)  # -> shape [3, 15, vocab_size]
     # now we need to collect the probability of the generated token
     # we need to add a dummy dim in the end to make gather work
-    gen_probs = torch.gather(probs, 2, gen_sequences[:, :, None]).squeeze(-1)
-    print("gen probs result: ", gen_probs)
     # now we can do all kinds of things with the probs
     # 1) the probs that exactly those sequences are generated again
@@ -99,11 +102,11 @@ def inference(audio, state=""):
     print(state)
     gt = [gt['generated_text'] for gt in state]
     print(type(gt))
     # result.text
     #return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
-    return result.text, state, gt

 gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
     # length penalty for gpt2.generate???
     #Prompt
+    #generated_outputs = gpt2.generate(input_ids, do_sample=True, num_return_sequences=3, output_scores=True, max_length=4)
+    output = model.generate(input_ids, max_length=5, do_sample=True, top_k=50, top_p=0.95, num_return_sequences=5)
+    print("output ", output)
+    #outputs = [output[-4:] for output in output.tolist()]
     # print("outputs generated ", generated_outputs[0])
     # only use id's that were generated
     # gen_sequences has shape [3, 15]
+    #gen_sequences = outputs.sequences[:, input_ids.shape[-1]:]
+    #print("gen sequences: ", gen_sequences)
     # let's stack the logits generated at each step to a tensor and transform
     # logits to probs
+    #probs = torch.stack(generated_outputs.scores, dim=1).softmax(-1)  # -> shape [3, 15, vocab_size]
     # now we need to collect the probability of the generated token
     # we need to add a dummy dim in the end to make gather work
+    #gen_probs = torch.gather(probs, 2, gen_sequences[:, :, None]).squeeze(-1)
+    #print("gen probs result: ", gen_probs)
     # now we can do all kinds of things with the probs
     # 1) the probs that exactly those sequences are generated again
     print(state)
     gt = [gt['generated_text'] for gt in state]
     print(type(gt))
+    gtTrim = [gt.lstrip(result) for val in gt]
     # result.text
     #return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
+    return result.text, state, gtTrim