Spaces:
Runtime error
Runtime error
| import subprocess | |
| # # Run the pip install command | |
| subprocess.check_call(['pip', 'install', 'wordcloud']) | |
| subprocess.check_call(['pip', 'install', 'git+https://github.com/openai/whisper.git']) | |
| subprocess.check_call(['pip', 'install', 'transformers']) | |
| subprocess.check_call(['pip', 'install', 'imageio==2.4.1']) | |
| subprocess.check_call(['pip', 'install', 'moviepy']) | |
| subprocess.check_call(['pip', 'install', 'keybert']) | |
| subprocess.check_call(['pip', 'install', 'pytube']) | |
| import streamlit as st | |
| import os | |
| from wordcloud import WordCloud | |
| from keybert import KeyBERT | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| # ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
| from moviepy.editor import * | |
| from tqdm import tqdm | |
| import os | |
| import math | |
| import nltk | |
| nltk.download('punkt') | |
| import whisper | |
| from transformers import pipeline | |
| from pytube import YouTube | |
| def process_video(path): | |
| whisper_model = whisper.load_model("base") | |
| def SpeechToTextEng(aud_path): | |
| result = whisper_model.transcribe(aud_path) | |
| return result["text"] | |
| def run_range(duration): | |
| time=duration/60 | |
| floor=math.ceil(time) | |
| return floor | |
| time_range=60 | |
| clip_run_range=0 | |
| clip_duration=0 | |
| def audio_generator(path,aud=0,vid=0): | |
| if vid==1: | |
| clip=VideoFileClip(path) | |
| clip_duration = clip.duration | |
| clip_run_range=run_range(clip_duration) | |
| for i in range(clip_run_range): | |
| left=i*time_range | |
| right=left+time_range | |
| # print(left,right) | |
| crop_clip=clip.subclip(left,right) | |
| try: | |
| crop_clip.audio.write_audiofile("vid_to_aud"+str(i)+".mp3") | |
| except: | |
| pass | |
| if aud==1: | |
| audio_clip=AudioFileClip(path) | |
| clip_duration = audio_clip.duration | |
| print(clip_duration) | |
| clip_run_range=run_range(clip_duration) | |
| print(clip_run_range) | |
| for i in range(clip_run_range): | |
| left=i*time_range | |
| right=left+time_range | |
| # print(left,right) | |
| crop_clip=audio_clip.subclip(left,right) | |
| try: | |
| crop_clip.write_audiofile("vid_to_aud"+str(i)+".mp3") | |
| except: | |
| pass | |
| # YouTube video URL | |
| video_url = path | |
| # Create a YouTube object | |
| yt = YouTube(video_url) | |
| # Get the highest resolution video stream | |
| stream = yt.streams.get_lowest_resolution() | |
| # Download the video | |
| stream.download(filename='meeting.mp4') | |
| audio_generator("./meeting.mp4",vid=1) | |
| transcribed_lit=[] | |
| label_lit=[] | |
| translated_lit=[] | |
| for i in tqdm(range(clip_run_range)): | |
| transcribed=SpeechToTextEng("./vid_to_aud"+str(i)+".mp3") | |
| transcribed_lit.append(transcribed) | |
| os.remove("./vid_to_aud"+str(i)+".mp3") | |
| data = pd.DataFrame( | |
| {'transcriptions': transcribed_lit | |
| }) | |
| summarizer = pipeline("summarization") | |
| sentiment_analyzer = pipeline("sentiment-analysis") | |
| sumarized_lit=[] | |
| sentiment_lit=[] | |
| for i in tqdm(range(len(data))): | |
| summarized=summarizer(data.iloc[i,0],min_length=75, max_length=300)[0]['summary_text'] | |
| sentiment = sentiment_analyzer(data.iloc[i,0])[0]['label'] | |
| sumarized_lit.append(summarized) | |
| sentiment_lit.append(sentiment) | |
| data['summary']=sumarized_lit | |
| data['sentiment']=sentiment_lit | |
| data.to_csv('output2.csv', index=False) | |
| tot_text="" | |
| for i in range(len(data)): | |
| tot_text=tot_text+data.iloc[i,0] | |
| key_model = KeyBERT('distilbert-base-nli-mean-tokens') | |
| def extract_keywords(text, top_n=50): | |
| keywords = key_model.extract_keywords(text, top_n=top_n) | |
| return [keyword[0] for keyword in keywords] | |
| tot_keywords=extract_keywords(tot_text) | |
| def get_500_words(text,left,right): | |
| words = text.split() | |
| first_500_words = ' '.join(words[left:right]) | |
| return first_500_words | |
| def summarize_text(text): | |
| chunk_size = 500 # Number of words per chunk | |
| total_summary = "" # Total summary | |
| words = text.split() # Split the text into individual words | |
| num_chunks = len(words) // chunk_size + 1 # Calculate the number of chunks | |
| for i in tqdm(range(num_chunks)): | |
| start_index = i * chunk_size | |
| end_index = start_index + chunk_size | |
| chunk = " ".join(words[start_index:end_index]) | |
| # Pass the chunk to the summarizer (replace with your summarization code) | |
| chunk_summary = summarizer(chunk,min_length=75, max_length=200)[0]['summary_text'] | |
| # print(chunk_summary) | |
| total_summary += chunk_summary | |
| return total_summary | |
| tot_summary=summarize_text(tot_text) | |
| return tot_text,tot_summary,tot_keywords | |
| # ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | |
| def generate_word_cloud(text): | |
| # Create a WordCloud object | |
| wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) | |
| # Display the generated word cloud | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| # Plot the word cloud on the axis | |
| ax.imshow(wordcloud, interpolation='bilinear') | |
| ax.axis('off') | |
| st.pyplot(fig) | |
| def main(): | |
| st.title("Meeting Summary Web App") | |
| # YouTube link input | |
| youtube_url = st.text_input("Enter the YouTube video link") | |
| if st.button("Process Video"): | |
| if youtube_url: | |
| # Process the YouTube video | |
| tot_text, tot_summary, tot_keywords = process_video(youtube_url) | |
| # Display the output | |
| if os.path.exists("output2.csv"): | |
| output_df = pd.read_csv("output2.csv") | |
| st.subheader("Transcriptions:") | |
| st.write(output_df["transcriptions"]) | |
| st.subheader("Labels:") | |
| st.write(output_df["labels"]) | |
| st.subheader("Word Cloud:") | |
| generate_word_cloud(output_df["transcriptions"].str.cat(sep=' ')) | |
| st.subheader("tot_text:") | |
| st.write(tot_text) | |
| st.subheader("tot_summary:") | |
| st.write(tot_summary) | |
| st.subheader("tot_keywords:") | |
| st.write(tot_keywords) | |
| else: | |
| st.write("No output file found.") | |
| if __name__ == "__main__": | |
| main() |