from langchain.llms import LlamaCpp from langchain.vectorstores import FAISS from langchain.prompts import PromptTemplate from langchain.docstore.document import Document from langchain.memory import ConversationBufferMemory from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains import ConversationalRetrievalChain from langchain.chains.summarize import load_summarize_chain from langchain.document_loaders import YoutubeLoader, TextLoader from langchain.llms.huggingface_pipeline import HuggingFacePipeline from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter import os import sys import torch import tempfile import transformers from PIL import Image from datetime import datetime from torch import cuda, bfloat16 from transformers import pipeline import streamlit as st from streamlit_chat import message from streamlit_js_eval import streamlit_js_eval def save_feedback(feedback): feedback_dir = "feedback" if not os.path.exists(feedback_dir): os.makedirs(feedback_dir) timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") filename = f"feedback_{timestamp}.txt" filepath = os.path.join(feedback_dir, filename) with open(filepath, "w") as file: file.write(feedback) def transcribe(tmp_audio_path): device = "cuda:1" if torch.cuda.is_available() else "cpu" pipe = pipeline( "automatic-speech-recognition", model="openai/whisper-small.en", chunk_length_s=30, device=device, ) audio = tmp_audio_path prediction = pipe(audio, batch_size=8)["text"] return prediction.strip() def summarize(transcript, option): # Laod model llm = LlamaCpp( streaming = True, model_path="mistral-7b-instruct-v0.2.Q4_K_M.gguf", #model_path="mistral-7b-openorca.Q4_K_M.gguf", #model_path="starling-lm-7b-alpha.Q4_K_M.gguf", #model_path="zephyr-7b-beta.Q4_K_M.gguf", #model_path="dolphin-2.1-mistral-7b.Q4_K_M.gguf", #model_path="llama-2-7b.Q4_K_M.gguf", temperature=0, top_p=1, n_ctx=4096, max_tokens=-1,#output word count limitation verbose=True, #n_gpu_layers=35, #n_gpu_layers=-1,#all move to gpu #n_batch=256, context_length = 6000 ) ### Text preprocessing target_len = 600 chunk_size = 3000 chunk_overlap = 200 text_splitter = CharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len, ) texts = text_splitter.split_text(transcript,) docs = [Document(page_content=t) for t in texts[:]] general_prompt_template = """ Do not explain what you are doing. Do not self reference. You are a professional summary writer. Write a concise summary of the text that cover the key points of the text. and present the results as follows: - Serveral paragraphs with the following content: Topic, Outline, Description - A key point list in the format of one key point in one paragraph - A markdown list with the definition of the important key terms mentioned ```{text}``` SUMMARY: """ lecture_prompt_template = """ Do not explain what you are doing. Do not self reference. You are tasked with summarizing a lecture. Write a concise summary covering the lecture's key points and organize the results as follows: - Lecture Topic: Provide a brief overview of the main subject discussed in the lecture. - Lecture Description: Summarize the content and purpose of the lecture in a few sentences. - Outline: Present an outline of the lecture's structure, including main sections and subtopics. - Key Points: List the most important points discussed in the lecture, each presented in a separate paragraph. - Formulas and Equations: Include any significant formulas or equations introduced in the lecture. - Markdown Table: Create a markdown table to define and explain important terms and concepts mentioned in the lecture. ```{text}``` SUMMARY: """ tutorial_prompt_template = """ Do not explain what you are doing. Do not self reference. You have been assigned to summarize a tutorial video. Your task is to provide a concise summary covering the tutorial's main points and organize the results as follows: - Tutorial Topic: Briefly introduce the main subject matter covered in the tutorial. - Tutorial Description: Summarize the purpose and objectives of the tutorial in a few sentences. - Tutorial Structure: Outline the tutorial's structure, including main sections, steps, or modules. - Key Points: List the essential concepts or techniques explained in the tutorial, with each concept presented in its paragraph. - Practical Examples: Include any practical examples or demonstrations provided in the tutorial. - Tips and Tricks: Highlight any useful tips or tricks shared by the tutorial presenter. - Markdown Table: Create a markdown table to define and explain important terms and concepts introduced in the tutorial. ```{text}``` SUMMARY: """ speech_prompt_template = """ Do not explain what you are doing. Do not self reference. Your task is to summarize a speech. Write a concise summary covering the key points of the speech and organize the results as follows: - Speech Topic: Provide a brief introduction to the main subject matter addressed in the speech. - Speaker Introduction: Briefly introduce the speaker, including their background and credentials. - Speech Overview: Summarize the main themes or objectives of the speech in a few sentences. - Key Messages: List the key messages or arguments conveyed in the speech, with each message presented in its paragraph. - Examples and Illustrations: Include any relevant examples or illustrations provided by the speaker to support their points. - Closing Remarks: Summarize any concluding remarks or calls to action made by the speaker. - Markdown Table: Create a markdown table to define and explain important terms or concepts mentioned in the speech. ```{text}``` SUMMARY: """ documentary_prompt_template = """ Do not explain what you are doing. Do not self reference. Your task is to summarize a documentary. Write a concise summary covering the main points of the documentary and organize the results as follows: - Documentary Title: Provide the title of the documentary. - Documentary Overview: Briefly introduce the subject matter and purpose of the documentary in a few sentences. - Director's Background: Provide background information about the director or creators of the documentary. - Key Themes: List the key themes or topics explored in the documentary, with each theme presented in its paragraph. - Interviews and Testimonials: Highlight any interviews or testimonials featured in the documentary. - Footage and Visuals: Describe any significant footage or visual elements used to convey the documentary's message. - Conclusion: Summarize the main takeaways or conclusions drawn from the documentary. - Markdown Table: Create a markdown table to define and explain important terms or concepts mentioned in the documentary. ```{text}``` SUMMARY: """ if option == 'Default': prompt_template = general_prompt_template elif option == 'Lecture': prompt_template = lecture_prompt_template elif option == 'Speech': prompt_template = speech_prompt_template elif option == 'Tutorial': prompt_template = tutorial_prompt_template elif option == 'Documentary': prompt_template = documentary_prompt_template PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"]) refine_template = ( "Your job is to produce a final summary\n" "We have provided an existing summary up to a certain point: {existing_answer}\n" "We have the opportunity to refine the existing summary" "with some more context below.\n" "------------\n" "{text}\n" "------------\n" f"Given the new context, refine the original summary in English within {target_len} words and do not mention the summary is refined." ) refine_prompt = PromptTemplate( input_variables=["existing_answer", "text"], template=refine_template, ) chain = load_summarize_chain( llm, chain_type="refine", return_intermediate_steps=True, question_prompt=PROMPT, refine_prompt=refine_prompt, ) resp = chain(docs) output_text = resp["output_text"] return output_text def initialize_session_state(): if 'history' not in st.session_state: st.session_state['history'] = [] if 'generated' not in st.session_state: st.session_state['generated'] = ["Hello! Ask me anything about your video!"] if 'past' not in st.session_state: st.session_state['past'] = ["Hey!"] def conversation_chat(query, chain, history): result = chain({"question": query, "chat_history": history}) history.append((query, result["answer"])) return result["answer"] def display_chat_history(chain): reply_container = st.container() container = st.container() with container: with st.form(key='my_form', clear_on_submit=True): user_input = st.text_input("Question:", placeholder="Ask about your video transcript", key='input') submit_button = st.form_submit_button(label='Send') if submit_button and user_input: with st.spinner('Generating response...'): output = conversation_chat(user_input, chain, st.session_state['history']) st.session_state['past'].append(user_input) st.session_state['generated'].append(output) if st.session_state['generated']: with reply_container: for i in range(len(st.session_state['generated'])): message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs") message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji") def create_conversational_chain(vector_store, option): chatbot_general_prompt = PromptTemplate(input_variables=["history", "context", "question"], template=""" You are a knowledgeable chatbot, here to help with questions of the user. Your tone should be professional and informative. Context: {context} History: {history} User: {question} Chatbot:"" """) chatbot_lecture_prompt = PromptTemplate(input_variables=["history", "context", "question"], template=""" You are a knowledgeable chatbot, you already have the knowledge of a lecture video transcript. Help with questions of the user with use of this lecture video transcript. Your tone should be professional and informative. Context: {context} History: {history} User: {question} Chatbot:"" """) chatbot_speech_prompt = PromptTemplate(input_variables=["history", "context", "question"], template=""" You are a knowledgeable chatbot, you already have the knowledge of a speech video transcript. Help with questions of the user with use of this speech video transcript. Your tone should be professional and informative. Context: {context} History: {history} User: {question} Chatbot:"" """) chatbot_tutorial_prompt = PromptTemplate(input_variables=["history", "context", "question"], template=""" You are a knowledgeable chatbot, you already have the knowledge of a tutorial video transcript. Help with questions of the user with use of this tutorial video transcript. Your tone should be professional and informative. Context: {context} History: {history} User: {question} Chatbot:"" """) chatbot_documentary_prompt = PromptTemplate(input_variables=["history", "context", "question"], template=""" You are a knowledgeable chatbot, you already have the knowledge of a documentary video transcript. Help with questions of the user with use of this documentary video transcript. Your tone should be professional and informative. Context: {context} History: {history} User: {question} Chatbot:"" """) if option == 'Default': chatbot_prompt = chatbot_general_prompt elif option == 'Lecture': chatbot_prompt = chatbot_lecture_prompt elif option == 'Speech': chatbot_prompt = chatbot_speech_prompt elif option == 'Tutorial': chatbot_prompt = chatbot_tutorial_prompt elif option == 'Documentary': chatbot_prompt = chatbot_documentary_prompt # Create llm llm = LlamaCpp( streaming = True, #model_path="mistral-7b-instruct-v0.1.Q4_K_M.gguf", #model_path="mistral-7b-openorca.Q4_K_M.gguf", #model_path="starling-lm-7b-alpha.Q4_K_M.gguf", model_path="zephyr-7b-beta.Q4_K_M.gguf", #model_path="dolphin-2.1-mistral-7b.Q4_K_M.gguf", #model_path="llama-2-7b.Q4_K_M.gguf", temperature=0, top_p=1, n_ctx=4096, max_tokens=-1,#output word count limitation verbose=True, #n_gpu_layers=35, #n_gpu_layers=-1,#all move to gpu #n_batch=256, ) memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff', retriever=vector_store.as_retriever(search_kwargs={"k": 2}), memory=memory, condense_question_prompt=chatbot_prompt) return chain def main(): # Initialize session state initialize_session_state() im = Image.open('sricon.png') st.set_page_config(page_title=' 🤖Automatic Video Assistant🔗', layout='wide', page_icon = im) # Set up the Streamlit app layout st.title("🤖 Automatic Video Assistant 🔗") st.subheader(" Powered by LangChain + Streamlit") hide_default_format = """ """ st.markdown(hide_default_format, unsafe_allow_html=True) #sidebar with st.sidebar: st.markdown("# Introduction") st.markdown( "Automatic Video Assistant is able to summarize videos and answer related questions.") st.markdown("You can select specific video types to enhance the assistant's performance.") st.markdown("You can input local video or YouTube video link.") st.markdown("# Input your video to start!") st.markdown("---") st.markdown("# Feedback") txt = st.text_area( "We will continue to improve💪?", "Please share your feedback... ", ) if st.button('Submit'): save_feedback(txt) st.write('Your feedback is submitted!') option = st.selectbox( 'Please indicate you video type for better interaction😀', ('Default', 'Lecture', 'Speech', 'Tutorial', 'Documentary')) st.write('Selected video type:', option) #User Input File audio_file = st.file_uploader("Upload Video", type=["mp4", "wav","mp3","mov","avi","wmv"]) with st.form('myform', clear_on_submit=True): youtube_url = st.text_input("Or enter a YouTube URL") submitted = st.form_submit_button('Submit') # Check if either YouTube URL or file uploaded if (submitted and youtube_url) or audio_file: if youtube_url: loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=False) else: #transcript = transcribe(audio_file) with tempfile.NamedTemporaryFile(delete=False) as tmp_audio_file: tmp_audio_file.write(audio_file.read()) tmp_audio_path = tmp_audio_file.name transcript = transcribe(tmp_audio_path) with st.expander("See Transcript"): if youtube_url: transcript = loader.load() # Save the transcript to a text file with open("transcript.txt", "w", encoding="utf-8") as file: transcript_text = '\n'.join([document.page_content for document in transcript]) file.write(transcript_text) with open("transcript.txt", "r", encoding="utf-8") as file: transcript = file.read() else: with open("transcript.txt", "w") as f: f.write(transcript) # Display the transcript st.write(transcript) # Provide a download button for the transcript st.download_button("Download Transcript", transcript, key='transcript_download_button') st.subheader("Do you want a summary for this video?") if 'clicked' not in st.session_state: st.session_state.clicked = False def click_button(): st.session_state.clicked = True st.button('Generate summary', on_click=click_button) #Summarize if st.session_state.clicked: with st.expander("See Summary", expanded=True): st.header("Summary") summary = summarize(transcript, option) with open("summary.txt", "w") as f: f.write(summary) with open("summary.txt", "r") as f: for line in f: st.write(line) #st.write_stream(summary) #st.download_button("Download Summary", summary, key='summary_download_button') loader = TextLoader("transcript.txt") documents = loader.load() st.header("Chatbot🤖") text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20) text_chunks = text_splitter.split_documents(documents) # Create embeddings embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cuda:1'}) #model_kwargs={'device': 'cpu'}) #Almost the same speed # Create vector store vector_store = FAISS.from_documents(text_chunks, embedding=embeddings) # Create the chain object chain = create_conversational_chain(vector_store, option) display_chat_history(chain) if st.button("Click to start with a new video"): streamlit_js_eval(js_expressions="parent.window.location.reload()") if __name__ == "__main__": main()