Spaces:

dadadar
/

Automatic_video_assistant

No application file

App Files Files Community

Automatic_video_assistant / App.py

dadadar

Upload 3 files

46b3b77 verified almost 2 years ago

raw

history blame contribute delete

19.9 kB

	from langchain.llms import LlamaCpp
	from langchain.vectorstores import FAISS
	from langchain.prompts import PromptTemplate
	from langchain.docstore.document import Document
	from langchain.memory import ConversationBufferMemory
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.chains import ConversationalRetrievalChain
	from langchain.chains.summarize import load_summarize_chain
	from langchain.document_loaders import YoutubeLoader, TextLoader
	from langchain.llms.huggingface_pipeline import HuggingFacePipeline
	from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
	import os
	import sys
	import torch
	import tempfile
	import transformers
	from PIL import Image
	from datetime import datetime
	from torch import cuda, bfloat16
	from transformers import pipeline
	import streamlit as st
	from streamlit_chat import message
	from streamlit_js_eval import streamlit_js_eval

	def save_feedback(feedback):
	feedback_dir = "feedback"
	if not os.path.exists(feedback_dir):
	os.makedirs(feedback_dir)
	timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
	filename = f"feedback_{timestamp}.txt"
	filepath = os.path.join(feedback_dir, filename)
	with open(filepath, "w") as file:
	file.write(feedback)

	def transcribe(tmp_audio_path):

	device = "cuda:1" if torch.cuda.is_available() else "cpu"

	pipe = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-small.en",
	chunk_length_s=30,
	device=device,
	)

	audio = tmp_audio_path
	prediction = pipe(audio, batch_size=8)["text"]

	return prediction.strip()

	def summarize(transcript, option):
	# Laod model
	llm = LlamaCpp(
	streaming = True,
	model_path="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
	#model_path="mistral-7b-openorca.Q4_K_M.gguf",
	#model_path="starling-lm-7b-alpha.Q4_K_M.gguf",
	#model_path="zephyr-7b-beta.Q4_K_M.gguf",
	#model_path="dolphin-2.1-mistral-7b.Q4_K_M.gguf",
	#model_path="llama-2-7b.Q4_K_M.gguf",
	temperature=0,
	top_p=1,
	n_ctx=4096,
	max_tokens=-1,#output word count limitation
	verbose=True,
	#n_gpu_layers=35,
	#n_gpu_layers=-1,#all move to gpu
	#n_batch=256,
	context_length = 6000
	)

	### Text preprocessing
	target_len = 600
	chunk_size = 3000
	chunk_overlap = 200
	text_splitter = CharacterTextSplitter(
	chunk_size=chunk_size,
	chunk_overlap=chunk_overlap,
	length_function=len,
	)
	texts = text_splitter.split_text(transcript,)
	docs = [Document(page_content=t) for t in texts[:]]




	general_prompt_template = """
	Do not explain what you are doing. Do not self reference. You are a professional summary writer.
	Write a concise summary of the text that cover the key points of the text. and present the results as follows:
	- Serveral paragraphs with the following content: Topic, Outline, Description
	- A key point list in the format of one key point in one paragraph
	- A markdown list with the definition of the important key terms mentioned
	```{text}```
	SUMMARY:

	"""


	lecture_prompt_template = """
	Do not explain what you are doing. Do not self reference. You are tasked with summarizing a lecture. Write a concise summary covering the lecture's key points and organize the results as follows:
	- Lecture Topic: Provide a brief overview of the main subject discussed in the lecture.
	- Lecture Description: Summarize the content and purpose of the lecture in a few sentences.
	- Outline: Present an outline of the lecture's structure, including main sections and subtopics.
	- Key Points: List the most important points discussed in the lecture, each presented in a separate paragraph.
	- Formulas and Equations: Include any significant formulas or equations introduced in the lecture.
	- Markdown Table: Create a markdown table to define and explain important terms and concepts mentioned in the lecture.
	```{text}```
	SUMMARY:

	"""

	tutorial_prompt_template = """
	Do not explain what you are doing. Do not self reference. You have been assigned to summarize a tutorial video. Your task is to provide a concise summary covering the tutorial's main points and organize the results as follows:
	- Tutorial Topic: Briefly introduce the main subject matter covered in the tutorial.
	- Tutorial Description: Summarize the purpose and objectives of the tutorial in a few sentences.
	- Tutorial Structure: Outline the tutorial's structure, including main sections, steps, or modules.
	- Key Points: List the essential concepts or techniques explained in the tutorial, with each concept presented in its paragraph.
	- Practical Examples: Include any practical examples or demonstrations provided in the tutorial.
	- Tips and Tricks: Highlight any useful tips or tricks shared by the tutorial presenter.
	- Markdown Table: Create a markdown table to define and explain important terms and concepts introduced in the tutorial.
	```{text}```
	SUMMARY:
	"""

	speech_prompt_template = """
	Do not explain what you are doing. Do not self reference. Your task is to summarize a speech. Write a concise summary covering the key points of the speech and organize the results as follows:
	- Speech Topic: Provide a brief introduction to the main subject matter addressed in the speech.
	- Speaker Introduction: Briefly introduce the speaker, including their background and credentials.
	- Speech Overview: Summarize the main themes or objectives of the speech in a few sentences.
	- Key Messages: List the key messages or arguments conveyed in the speech, with each message presented in its paragraph.
	- Examples and Illustrations: Include any relevant examples or illustrations provided by the speaker to support their points.
	- Closing Remarks: Summarize any concluding remarks or calls to action made by the speaker.
	- Markdown Table: Create a markdown table to define and explain important terms or concepts mentioned in the speech.
	```{text}```
	SUMMARY:
	"""

	documentary_prompt_template = """
	Do not explain what you are doing. Do not self reference. Your task is to summarize a documentary. Write a concise summary covering the main points of the documentary and organize the results as follows:
	- Documentary Title: Provide the title of the documentary.
	- Documentary Overview: Briefly introduce the subject matter and purpose of the documentary in a few sentences.
	- Director's Background: Provide background information about the director or creators of the documentary.
	- Key Themes: List the key themes or topics explored in the documentary, with each theme presented in its paragraph.
	- Interviews and Testimonials: Highlight any interviews or testimonials featured in the documentary.
	- Footage and Visuals: Describe any significant footage or visual elements used to convey the documentary's message.
	- Conclusion: Summarize the main takeaways or conclusions drawn from the documentary.
	- Markdown Table: Create a markdown table to define and explain important terms or concepts mentioned in the documentary.
	```{text}```
	SUMMARY:
	"""



	if option == 'Default':
	prompt_template = general_prompt_template
	elif option == 'Lecture':
	prompt_template = lecture_prompt_template
	elif option == 'Speech':
	prompt_template = speech_prompt_template
	elif option == 'Tutorial':
	prompt_template = tutorial_prompt_template
	elif option == 'Documentary':
	prompt_template = documentary_prompt_template




	PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
	refine_template = (
	"Your job is to produce a final summary\n"
	"We have provided an existing summary up to a certain point: {existing_answer}\n"
	"We have the opportunity to refine the existing summary"
	"with some more context below.\n"
	"------------\n"
	"{text}\n"
	"------------\n"
	f"Given the new context, refine the original summary in English within {target_len} words and do not mention the summary is refined."
	)
	refine_prompt = PromptTemplate(
	input_variables=["existing_answer", "text"],
	template=refine_template,
	)
	chain = load_summarize_chain(
	llm,
	chain_type="refine",
	return_intermediate_steps=True,
	question_prompt=PROMPT,
	refine_prompt=refine_prompt,
	)


	resp = chain(docs)
	output_text = resp["output_text"]

	return output_text

	def initialize_session_state():
	if 'history' not in st.session_state:
	st.session_state['history'] = []

	if 'generated' not in st.session_state:
	st.session_state['generated'] = ["Hello! Ask me anything about your video!"]

	if 'past' not in st.session_state:
	st.session_state['past'] = ["Hey!"]

	def conversation_chat(query, chain, history):
	result = chain({"question": query, "chat_history": history})
	history.append((query, result["answer"]))
	return result["answer"]

	def display_chat_history(chain):
	reply_container = st.container()
	container = st.container()

	with container:
	with st.form(key='my_form', clear_on_submit=True):
	user_input = st.text_input("Question:", placeholder="Ask about your video transcript", key='input')
	submit_button = st.form_submit_button(label='Send')

	if submit_button and user_input:
	with st.spinner('Generating response...'):
	output = conversation_chat(user_input, chain, st.session_state['history'])

	st.session_state['past'].append(user_input)
	st.session_state['generated'].append(output)

	if st.session_state['generated']:
	with reply_container:
	for i in range(len(st.session_state['generated'])):
	message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
	message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")

	def create_conversational_chain(vector_store, option):
	chatbot_general_prompt = PromptTemplate(input_variables=["history", "context", "question"], template="""
	You are a knowledgeable chatbot, here to help with questions of the user. Your tone should be professional and informative.

	Context: {context}
	History: {history}

	User: {question}
	Chatbot:""
	""")
	chatbot_lecture_prompt = PromptTemplate(input_variables=["history", "context", "question"], template="""
	You are a knowledgeable chatbot, you already have the knowledge of a lecture video transcript. Help with questions of the user
	with use of this lecture video transcript. Your tone should be professional and informative.

	Context: {context}
	History: {history}

	User: {question}
	Chatbot:""
	""")
	chatbot_speech_prompt = PromptTemplate(input_variables=["history", "context", "question"], template="""
	You are a knowledgeable chatbot, you already have the knowledge of a speech video transcript. Help with questions of the user
	with use of this speech video transcript. Your tone should be professional and informative.

	Context: {context}
	History: {history}

	User: {question}
	Chatbot:""
	""")
	chatbot_tutorial_prompt = PromptTemplate(input_variables=["history", "context", "question"], template="""
	You are a knowledgeable chatbot, you already have the knowledge of a tutorial video transcript. Help with questions of the user
	with use of this tutorial video transcript. Your tone should be professional and informative.

	Context: {context}
	History: {history}

	User: {question}
	Chatbot:""
	""")

	chatbot_documentary_prompt = PromptTemplate(input_variables=["history", "context", "question"], template="""
	You are a knowledgeable chatbot, you already have the knowledge of a documentary video transcript. Help with questions of the user
	with use of this documentary video transcript. Your tone should be professional and informative.

	Context: {context}
	History: {history}

	User: {question}
	Chatbot:""
	""")

	if option == 'Default':
	chatbot_prompt = chatbot_general_prompt
	elif option == 'Lecture':
	chatbot_prompt = chatbot_lecture_prompt
	elif option == 'Speech':
	chatbot_prompt = chatbot_speech_prompt
	elif option == 'Tutorial':
	chatbot_prompt = chatbot_tutorial_prompt
	elif option == 'Documentary':
	chatbot_prompt = chatbot_documentary_prompt


	# Create llm
	llm = LlamaCpp(
	streaming = True,
	#model_path="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
	#model_path="mistral-7b-openorca.Q4_K_M.gguf",
	#model_path="starling-lm-7b-alpha.Q4_K_M.gguf",
	model_path="zephyr-7b-beta.Q4_K_M.gguf",
	#model_path="dolphin-2.1-mistral-7b.Q4_K_M.gguf",
	#model_path="llama-2-7b.Q4_K_M.gguf",
	temperature=0,
	top_p=1,
	n_ctx=4096,
	max_tokens=-1,#output word count limitation
	verbose=True,
	#n_gpu_layers=35,
	#n_gpu_layers=-1,#all move to gpu
	#n_batch=256,
	)

	memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

	chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',
	retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
	memory=memory, condense_question_prompt=chatbot_prompt)
	return chain

	def main():
	# Initialize session state
	initialize_session_state()
	im = Image.open('sricon.png')
	st.set_page_config(page_title=' 🤖Automatic Video Assistant🔗', layout='wide', page_icon = im)

	# Set up the Streamlit app layout
	st.title("🤖 Automatic Video Assistant 🔗")
	st.subheader(" Powered by LangChain + Streamlit")

	hide_default_format = """
	<style>
	#MainMenu {visibility: hidden; }
	footer {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_default_format, unsafe_allow_html=True)

	#sidebar
	with st.sidebar:
	st.markdown("# Introduction")
	st.markdown(
	"Automatic Video Assistant is able to summarize videos and answer related questions.")
	st.markdown("You can select specific video types to enhance the assistant's performance.")
	st.markdown("You can input local video or YouTube video link.")
	st.markdown("# Input your video to start!")
	st.markdown("---")
	st.markdown("# Feedback")
	txt = st.text_area(
	"We will continue to improve💪?",
	"Please share your feedback... ",
	)
	if st.button('Submit'):
	save_feedback(txt)
	st.write('Your feedback is submitted!')


	option = st.selectbox(
	'Please indicate you video type for better interaction😀',
	('Default', 'Lecture', 'Speech', 'Tutorial', 'Documentary'))

	st.write('Selected video type:', option)


	#User Input File
	audio_file = st.file_uploader("Upload Video", type=["mp4", "wav","mp3","mov","avi","wmv"])

	with st.form('myform', clear_on_submit=True):
	youtube_url = st.text_input("Or enter a YouTube URL")
	submitted = st.form_submit_button('Submit')

	# Check if either YouTube URL or file uploaded
	if (submitted and youtube_url) or audio_file:
	if youtube_url:
	loader = YoutubeLoader.from_youtube_url(youtube_url, add_video_info=False)
	else:
	#transcript = transcribe(audio_file)
	with tempfile.NamedTemporaryFile(delete=False) as tmp_audio_file:
	tmp_audio_file.write(audio_file.read())
	tmp_audio_path = tmp_audio_file.name
	transcript = transcribe(tmp_audio_path)

	with st.expander("See Transcript"):
	if youtube_url:
	transcript = loader.load()
	# Save the transcript to a text file
	with open("transcript.txt", "w", encoding="utf-8") as file:
	transcript_text = '\n'.join([document.page_content for document in transcript])
	file.write(transcript_text)
	with open("transcript.txt", "r", encoding="utf-8") as file:
	transcript = file.read()
	else:
	with open("transcript.txt", "w") as f:
	f.write(transcript)
	# Display the transcript
	st.write(transcript)
	# Provide a download button for the transcript
	st.download_button("Download Transcript", transcript, key='transcript_download_button')

	st.subheader("Do you want a summary for this video?")
	if 'clicked' not in st.session_state:
	st.session_state.clicked = False
	def click_button():
	st.session_state.clicked = True
	st.button('Generate summary', on_click=click_button)
	#Summarize
	if st.session_state.clicked:
	with st.expander("See Summary", expanded=True):
	st.header("Summary")
	summary = summarize(transcript, option)
	with open("summary.txt", "w") as f:
	f.write(summary)
	with open("summary.txt", "r") as f:
	for line in f:
	st.write(line)
	#st.write_stream(summary)
	#st.download_button("Download Summary", summary, key='summary_download_button')



	loader = TextLoader("transcript.txt")
	documents = loader.load()

	st.header("Chatbot🤖")
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
	text_chunks = text_splitter.split_documents(documents)

	# Create embeddings
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={'device': 'cuda:1'})
	#model_kwargs={'device': 'cpu'}) #Almost the same speed

	# Create vector store
	vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)

	# Create the chain object
	chain = create_conversational_chain(vector_store, option)

	display_chat_history(chain)

	if st.button("Click to start with a new video"):
	streamlit_js_eval(js_expressions="parent.window.location.reload()")

	if __name__ == "__main__":
	main()