Spaces:

qdqd
/

Youtube2mcq

Sleeping

App Files Files Community

Youtube2mcq / app.py

qdqd

Create app.py

389d3a7 verified over 1 year ago

raw

history blame contribute delete

7.88 kB

	import streamlit as st
	from youtube_transcript_api import YouTubeTranscriptApi
	from youtube_transcript_api.formatters import TextFormatter
	import re
	from gradio_client import Client
	from deep_translator import GoogleTranslator
	import os

	def extract_video_id(url):
	patterns = [
	r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([_\-a-zA-Z0-9]{11})',
	r'(?:https?:\/\/)?(?:www\.)?youtu\.be\/([_\-a-zA-Z0-9]{11})'
	]
	for pattern in patterns:
	match = re.match(pattern, url)
	if match:
	return match.group(1)
	return None

	def download_youtube_transcript(video_url):
	video_id = extract_video_id(video_url)
	if not video_id:
	st.error("Invalid YouTube URL.")
	return None

	try:
	transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
	transcript = None
	for transcript_obj in transcripts:
	try:
	transcript = transcript_obj.fetch()
	break
	except Exception:
	continue

	if not transcript:
	st.error("Transcript not available for this video.")
	return None

	formatter = TextFormatter()
	formatted_transcript = formatter.format_transcript(transcript)
	return formatted_transcript
	except Exception as e:
	st.error(f"An error occurred: {e}")
	return None

	def translate_text(text, dest_language):
	translator = GoogleTranslator(source='auto', target=dest_language)
	chunk_size = 500
	chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
	translated_chunks = [translator.translate(chunk) for chunk in chunks]
	return ' '.join(translated_chunks)

	def generate_mcqs(transcript, num_questions=10, language='en'):
	client = Client(os.getenv("CLIENT"))
	prompt = f"""
	Based on the following transcript, generate exactly {num_questions} multiple-choice questions (MCQs) with 4 options each.
	Ensure the questions cover various aspects of the video content to thoroughly check user understanding.
	Format each question as follows:
	Q: [Question]
	A) [Option A]
	B) [Option B]
	C) [Option C]
	D) [Option D]
	Correct Answer: [A/B/C/D]

	Transcript:
	{transcript}
	"""

	result = client.predict(prompt=prompt, api_name="/predict")
	return result

	def parse_mcqs(mcqs_text):
	questions = []
	current_question = {}
	for line in mcqs_text.split('\n'):
	line = line.strip()
	if line.startswith('Q:'):
	if current_question:
	questions.append(current_question)
	current_question = {'question': line[2:].strip(), 'options': []}
	elif line.startswith(('A)', 'B)', 'C)', 'D)')):
	current_question['options'].append(line)
	elif line.startswith('Correct Answer:'):
	current_question['correct_answer'] = line.split(':')[1].strip()
	if current_question:
	questions.append(current_question)
	return questions

	def reset_state():
	for key in ['transcript', 'translated_transcript', 'questions', 'answers', 'submitted']:
	if key in st.session_state:
	del st.session_state[key]
	st.session_state['video_url'] = ""
	st.session_state['num_questions'] = 10

	def display_mcqs():
	if st.session_state['questions']:
	for i, q in enumerate(st.session_state['questions']):
	st.subheader(f"Q{i+1}: {q['question']}")
	options = [opt.split(') ')[1] for opt in q['options']]
	answer = st.radio("Select an answer:", options, key=f"q{i}", index=None)
	st.session_state['answers'][i] = answer

	if st.button("Submit Answers"):
	st.session_state['submitted'] = True

	if st.session_state['submitted']:
	correct_count = 0
	for i, q in enumerate(st.session_state['questions']):
	st.subheader(f"Q{i+1}: {q['question']}")
	correct_option = q['correct_answer']
	selected_option = st.session_state['answers'][i]

	try:
	correct_answer = next(opt for opt in q['options'] if opt.startswith(correct_option))
	correct_answer = correct_answer.split(') ')[1]
	if selected_option:
	if selected_option == correct_answer:
	st.success("Correct!")
	correct_count += 1
	else:
	st.error(f"Incorrect. The correct answer is {correct_answer}")
	else:
	st.warning("You didn't answer this question.")
	except StopIteration:
	st.error("There was an error with the correct answer. Please try again.")

	total_questions = len(st.session_state['questions'])
	score_percentage = (correct_count / total_questions) * 100
	st.write(f"Your score: {correct_count}/{total_questions} ({score_percentage:.2f}%)")
	if score_percentage >= 80:
	st.success("Great job! You have a good understanding of the video content.")
	elif score_percentage >= 60:
	st.info("Good effort! You might want to review some parts of the video.")
	else:
	st.warning("You might need to watch the video again to improve your understanding.")

	if st.button("Start Again"):
	reset_state()
	st.experimental_rerun()

	# Streamlit App
	st.set_page_config(page_title="YouTube Video MCQ Generator", layout="wide")
	st.title("YouTube Video MCQ Generator")

	# Initialize session state variables
	if 'language' not in st.session_state:
	st.session_state['language'] = "English"
	if 'video_url' not in st.session_state:
	st.session_state['video_url'] = ""
	if 'num_questions' not in st.session_state:
	st.session_state['num_questions'] = 10
	if 'submitted' not in st.session_state:
	st.session_state['submitted'] = False
	if 'questions' not in st.session_state:
	st.session_state['questions'] = None

	# Language selection
	st.session_state['language'] = st.radio("Select language for questions:", ["English", "Hindi"])
	language_code = 'en' if st.session_state['language'] == "English" else 'hi'

	# Input for YouTube URL
	st.session_state['video_url'] = st.text_input("Enter YouTube Video URL:", value=st.session_state['video_url'])

	if st.session_state['video_url']:
	if 'transcript' not in st.session_state:
	transcript = download_youtube_transcript(st.session_state['video_url'])
	if transcript:
	st.session_state['transcript'] = transcript

	if 'translated_transcript' not in st.session_state and 'transcript' in st.session_state:
	with st.spinner("Translating transcript..."):
	translated_transcript = translate_text(st.session_state['transcript'], language_code)
	st.session_state['translated_transcript'] = translated_transcript

	if 'translated_transcript' in st.session_state:
	st.success("Transcript downloaded and translated successfully!")

	st.session_state['num_questions'] = st.slider("Number of questions to generate:", min_value=5, max_value=20, value=st.session_state['num_questions'])

	if st.button("Generate MCQs"):
	with st.spinner(f"Generating {st.session_state['num_questions']} questions in {st.session_state['language']}..."):
	mcqs_text = generate_mcqs(st.session_state['translated_transcript'], st.session_state['num_questions'], st.session_state['language'])
	questions = parse_mcqs(mcqs_text)
	st.session_state['questions'] = questions
	st.session_state['answers'] = [''] * len(questions)
	st.session_state['submitted'] = False

	if st.session_state['questions']:
	display_mcqs()