Spaces:
Runtime error
Runtime error
| from transformers import ( | |
| pipeline, | |
| AutoModelForSpeechSeq2Seq, | |
| AutoProcessor, | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| BitsAndBytesConfig, | |
| ) | |
| import torch | |
| import os | |
| import random | |
| def yt2mp3(url, outputMp3F): | |
| tmpVideoF=random.random() | |
| os.system(f"./bin/youtube-dl -o /tmp/{tmpVideoF} --verbose " + url) | |
| os.system(f"ffmpeg -y -i /tmp/{tmpVideoF}.* -vn -ar 44100 -ac 2 -b:a 192k {outputMp3F}") | |
| def speech2text(mp3_file): | |
| device = 'cuda:0' | |
| torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| model_id = "distil-whisper/distil-large-v2" | |
| model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
| model_id, | |
| torch_dtype=torch_dtype, | |
| low_cpu_mem_usage=True, | |
| use_safetensors=True, | |
| use_flash_attention_2=True | |
| ) | |
| model.to(device) | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model=model, | |
| tokenizer=processor.tokenizer, | |
| feature_extractor=processor.feature_extractor, | |
| max_new_tokens=128, | |
| chunk_length_s=15, | |
| batch_size=16, | |
| torch_dtype=torch_dtype, | |
| device=device, | |
| ) | |
| result = pipe(mp3_file) | |
| text_from_video = result["text"] | |
| return text_from_video | |
| def chat(system_prompt, text): | |
| model_name = "meta-llama/Llama-2-7b-chat-hf" | |
| token = os.environ['HUGGINGFACE_TOKEN'] | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_8bit=True | |
| ) | |
| device_map = {"": 0} | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| quantization_config=bnb_config, | |
| device_map=device_map, | |
| use_auth_token=token | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token) | |
| llama_pipeline = pipeline(task="text-generation", model=model, tokenizer=tokenizer) | |
| text = f""" | |
| <s>[INST] <<SYS>> | |
| {system_prompt} | |
| <</SYS>> | |
| {text}[/INST] | |
| """ | |
| sequences = llama_pipeline( | |
| text, | |
| do_sample=True, | |
| top_k=10, | |
| num_return_sequences=1, | |
| eos_token_id=tokenizer.eos_token_id, | |
| max_length=32000 | |
| ) | |
| generated_text = sequences[0]["generated_text"] | |
| generated_text = generated_text[generated_text.find('[/INST]')+len('[/INST]'):] | |
| return generated_text | |
| def summarize(text): | |
| input_len = 10000 | |
| while True: | |
| summary = chat("", "Summarize the following: " + text[0:input_len]) | |
| if len(text) < input_len: | |
| return summary | |
| text = summary + " " + text[input_len:] | |
| import gradio as gr | |
| import gradio.inputs as inputs | |
| def summarize_from_youtube(url): | |
| outputMp3F = "./files/audio.mp3" | |
| yt2mp3(url=url, outputMp3F=outputMp3F) | |
| transcribed = speech2text(mp3_file=outputMp3F) | |
| summary = summarize(transcribed) | |
| return summary | |
| youtube_url = gr.inputs.Textbox(lines=1, label="Masukkan URL YouTube") | |
| output_text = gr.outputs.Textbox(label="Summary") | |
| gr.Interface( | |
| fn=summarize_from_youtube, | |
| inputs=youtube_url, | |
| outputs=output_text, | |
| title="YouTube Summarizer", | |
| description="Masukkan URL YouTube untuk merangkum kontennya." | |
| ).launch() | |