Spaces:

VaishakhRaveendran
/

Audio_2_chat

Sleeping

App Files Files Community

Audio_2_chat / app.py

VaishakhRaveendran

Update app.py

ad75a19 over 2 years ago

raw

history blame contribute delete

8.09 kB

	import streamlit as st
	import tempfile
	import re
	from scripts.utils import save_uploaded_file, break_chunks
	from scripts.whisper_transcribe import wtranscribe
	from scripts import text2markdown, summarize
	# from scripts import credentials # REMOVE CREDENTIALS

	# create temp dir for storing video and outputs
	temp_dir = tempfile.TemporaryDirectory()
	temp_path = temp_dir.name

	__, col, __ = st.columns([1, 3, 1])
	with col:
	st.image("./images/notebook_logo_transparent.png")

	with st.expander("How to Use YOListenO"):
	instruct = """
	1. Upload a lecture/meeting audio or video file
	2. Select whether you want the output as a lecture or meeting note
	3. (Optional): Adjust advanced settings for better fine-tuning and click "Submit" for each adjusted tab
	4. (Optional): Use your own OpenAI api key under advanced settings as the default has a limit
	5. Click "Start YOListenO" and let the magic begin!

	Note: Sometimes OpenAI's server might be busy, which might cause an error. Simply try again after a while!
	"""
	st.write(instruct)


	st.write("# Upload Audio/Video:\n")

	file = st.file_uploader(
	"Choose a File", accept_multiple_files=False, type=["mp3", "wav", "mp4", "mov", "mpeg"],
	)

	if file is not None:
	# save uploaded file to temp location
	file_details = {"FileName": file.name, "FileType": file.type}
	file_path = save_uploaded_file(file, temp_path)

	# user options
	with st.expander("Settings", expanded=True):
	note_type = st.selectbox(
	'Desired Note Type:',
	('Lecture', 'Meeting')
	)
	if note_type == 'Lecture':
	sys_content = text2markdown.SYSTEM_CONTENT_LECTURES
	use_content = text2markdown.USER_CONTENT_LECTURES
	elif note_type == 'Meeting':
	sys_content = text2markdown.SYSTEM_CONTENT_MEETINGS
	use_content = text2markdown.USER_CONTENT_MEETINGS

	# user advanced settings
	with st.expander("Advanced Settings"):
	system, user, additional = st.tabs(["System Content", "User Content", "Additional Settings"])

	with system:
	with st.form("system_content_form"):
	system_content = st.text_area(
	label='System Content (leave as default if unsure)',
	value=sys_content,
	help='Provide system role to set behavior of the assistant and provide high level instructions for the conversation',
	placeholder=sys_content
	)
	# Every form must have a submit button.
	submitted = st.form_submit_button("Submit System Content")

	st.subheader('Preview')
	st.info(system_content)

	with user:
	with st.form("user_content_form"):
	user_content = st.text_area(
	label='User Content (leave as default if unsure)',
	value=use_content,
	help='Specific instructions from the user for the assistant',
	placeholder=use_content
	)
	# Every form must have a submit button.
	submitted = st.form_submit_button("Submit User Content")

	st.subheader('Preview')
	st.info(user_content)

	with additional:
	with st.form("add_settings_form"):
	temp = st.number_input(
	"Temperature",
	min_value=0.0,
	max_value=1.0,
	value=0.2,
	step=0.1,
	help="How creative and random the markdown output is. 0 will give straightforward, deterministic responses, while 1 will give wildly varied responses.",
	)
	transcript_max_token_len = st.number_input(
	"Transcript Max Token Length",
	min_value=0,
	max_value=4000,
	value=2500,
	step=100,
	help="The number of tokens in transcript before chunking is used. This is a workaround for very long transcripts as GPT3.5 cannot handle >4k tokens."
	)
	summarized_chunk_max_len = st.number_input(
	"Chunking Summary Max Length",
	min_value=50,
	max_value=500,
	value=200,
	step=50,
	help="The maximum length of each summarized chunk if chunking is performed for very long transcripts."
	)
	apikey = st.text_input(
	label='API Key (Optional)',
	type="password",
	help="Use your own OpenAI key if possible. Else, it will use mine but there is a limit every month."
	)

	# Every form must have a submit button.
	submitted = st.form_submit_button("Submit Additional Settings")

	convert_bt = st.button("Start YOListenO!")
	if convert_bt:

	if not apikey:
	# apikey = credentials.api_key # CHANGE THIS TO SECRET
	apikey = st.secrets["API_KEY"]

	# transcribe
	with st.spinner(text="YOListenO working its magic: Transcribing..."):
	transcript = wtranscribe(
	model='base',
	audio=file_path,
	temperature=0.2,
	)
	transcript_orig = transcript

	# tokenize and check length.
	tokens = re.findall(r"[\w']+\|[.,!?;]", transcript)
	if len(tokens) > transcript_max_token_len:
	# break into smaller chunks, summarize each chunk, and merge back together
	with st.spinner(text="YOListenO working its magic: Chunking & Summarizing..."):
	# perform chunking
	chunks = break_chunks(tokens, [".", ",", "!", "?", ";"], 700, 1100)
	# summarise each chunk
	summarized_chunks = []
	for chunk in chunks:
	summary = summarize.summarize(
	text=chunk,
	api_key=apikey,
	summarized_max_len=summarized_chunk_max_len,
	)
	summarized_chunks.append(summary)
	# join summarised chunks into transcript
	transcript = ' '.join(summarized_chunks)

	# convert to markdown
	with st.spinner(
	text="YOListenO working its magic: CONVERSION IN PROGRESS ..."
	):
	result = text2markdown.text2markdown(
	transcript=transcript,
	api_key=apikey,
	system_content=system_content,
	user_content=user_content,
	temperature=temp,
	)

	tab_transcript, tab_markdown, tab_raw= st.tabs(
	[
	"Transcript and Audio",
	"Markdown Preview",
	"Raw Markdown"
	]
	)

	# display transcript and audio
	with tab_transcript:
	audio_file = open(file_path, 'rb')
	audio_bytes = audio_file.read()
	st.audio(audio_bytes)
	st.info(transcript_orig)

	with tab_markdown:
	st.header('Preview')
	st.divider()
	st.markdown(result)

	with tab_raw:
	st.header('Raw Output')
	st.divider()
	st.text(result)


	with st.expander("About YOListenO"):
	__, col2, __ = st.columns([1, 1, 1])
	with col2:
	st.image("./images/notebook_logo_transparent.png")

	about = """
	[YOListenO (You Only Listen Once)](https://github.com/teyang-lau/YOListenO) is an AI tool making use of OpenAI's
	[Whisper](https://github.com/openai/whisper) and [GPT3.5](https://platform.openai.com/docs/guides/chat)
	for turning audio/video lectures/meetings into markdown notes.

	Created by:
	* LAU TeYang
	"""
	st.write(about)
	st.write("")