Youtube2mcq / app.py
qdqd's picture
Create app.py
389d3a7 verified
import streamlit as st
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import re
from gradio_client import Client
from deep_translator import GoogleTranslator
import os
def extract_video_id(url):
patterns = [
r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([_\-a-zA-Z0-9]{11})',
r'(?:https?:\/\/)?(?:www\.)?youtu\.be\/([_\-a-zA-Z0-9]{11})'
]
for pattern in patterns:
match = re.match(pattern, url)
if match:
return match.group(1)
return None
def download_youtube_transcript(video_url):
video_id = extract_video_id(video_url)
if not video_id:
st.error("Invalid YouTube URL.")
return None
try:
transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
transcript = None
for transcript_obj in transcripts:
try:
transcript = transcript_obj.fetch()
break
except Exception:
continue
if not transcript:
st.error("Transcript not available for this video.")
return None
formatter = TextFormatter()
formatted_transcript = formatter.format_transcript(transcript)
return formatted_transcript
except Exception as e:
st.error(f"An error occurred: {e}")
return None
def translate_text(text, dest_language):
translator = GoogleTranslator(source='auto', target=dest_language)
chunk_size = 500
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
translated_chunks = [translator.translate(chunk) for chunk in chunks]
return ' '.join(translated_chunks)
def generate_mcqs(transcript, num_questions=10, language='en'):
client = Client(os.getenv("CLIENT"))
prompt = f"""
Based on the following transcript, generate exactly {num_questions} multiple-choice questions (MCQs) with 4 options each.
Ensure the questions cover various aspects of the video content to thoroughly check user understanding.
Format each question as follows:
Q: [Question]
A) [Option A]
B) [Option B]
C) [Option C]
D) [Option D]
Correct Answer: [A/B/C/D]
Transcript:
{transcript}
"""
result = client.predict(prompt=prompt, api_name="/predict")
return result
def parse_mcqs(mcqs_text):
questions = []
current_question = {}
for line in mcqs_text.split('\n'):
line = line.strip()
if line.startswith('Q:'):
if current_question:
questions.append(current_question)
current_question = {'question': line[2:].strip(), 'options': []}
elif line.startswith(('A)', 'B)', 'C)', 'D)')):
current_question['options'].append(line)
elif line.startswith('Correct Answer:'):
current_question['correct_answer'] = line.split(':')[1].strip()
if current_question:
questions.append(current_question)
return questions
def reset_state():
for key in ['transcript', 'translated_transcript', 'questions', 'answers', 'submitted']:
if key in st.session_state:
del st.session_state[key]
st.session_state['video_url'] = ""
st.session_state['num_questions'] = 10
def display_mcqs():
if st.session_state['questions']:
for i, q in enumerate(st.session_state['questions']):
st.subheader(f"Q{i+1}: {q['question']}")
options = [opt.split(') ')[1] for opt in q['options']]
answer = st.radio("Select an answer:", options, key=f"q{i}", index=None)
st.session_state['answers'][i] = answer
if st.button("Submit Answers"):
st.session_state['submitted'] = True
if st.session_state['submitted']:
correct_count = 0
for i, q in enumerate(st.session_state['questions']):
st.subheader(f"Q{i+1}: {q['question']}")
correct_option = q['correct_answer']
selected_option = st.session_state['answers'][i]
try:
correct_answer = next(opt for opt in q['options'] if opt.startswith(correct_option))
correct_answer = correct_answer.split(') ')[1]
if selected_option:
if selected_option == correct_answer:
st.success("Correct!")
correct_count += 1
else:
st.error(f"Incorrect. The correct answer is {correct_answer}")
else:
st.warning("You didn't answer this question.")
except StopIteration:
st.error("There was an error with the correct answer. Please try again.")
total_questions = len(st.session_state['questions'])
score_percentage = (correct_count / total_questions) * 100
st.write(f"Your score: {correct_count}/{total_questions} ({score_percentage:.2f}%)")
if score_percentage >= 80:
st.success("Great job! You have a good understanding of the video content.")
elif score_percentage >= 60:
st.info("Good effort! You might want to review some parts of the video.")
else:
st.warning("You might need to watch the video again to improve your understanding.")
if st.button("Start Again"):
reset_state()
st.experimental_rerun()
# Streamlit App
st.set_page_config(page_title="YouTube Video MCQ Generator", layout="wide")
st.title("YouTube Video MCQ Generator")
# Initialize session state variables
if 'language' not in st.session_state:
st.session_state['language'] = "English"
if 'video_url' not in st.session_state:
st.session_state['video_url'] = ""
if 'num_questions' not in st.session_state:
st.session_state['num_questions'] = 10
if 'submitted' not in st.session_state:
st.session_state['submitted'] = False
if 'questions' not in st.session_state:
st.session_state['questions'] = None
# Language selection
st.session_state['language'] = st.radio("Select language for questions:", ["English", "Hindi"])
language_code = 'en' if st.session_state['language'] == "English" else 'hi'
# Input for YouTube URL
st.session_state['video_url'] = st.text_input("Enter YouTube Video URL:", value=st.session_state['video_url'])
if st.session_state['video_url']:
if 'transcript' not in st.session_state:
transcript = download_youtube_transcript(st.session_state['video_url'])
if transcript:
st.session_state['transcript'] = transcript
if 'translated_transcript' not in st.session_state and 'transcript' in st.session_state:
with st.spinner("Translating transcript..."):
translated_transcript = translate_text(st.session_state['transcript'], language_code)
st.session_state['translated_transcript'] = translated_transcript
if 'translated_transcript' in st.session_state:
st.success("Transcript downloaded and translated successfully!")
st.session_state['num_questions'] = st.slider("Number of questions to generate:", min_value=5, max_value=20, value=st.session_state['num_questions'])
if st.button("Generate MCQs"):
with st.spinner(f"Generating {st.session_state['num_questions']} questions in {st.session_state['language']}..."):
mcqs_text = generate_mcqs(st.session_state['translated_transcript'], st.session_state['num_questions'], st.session_state['language'])
questions = parse_mcqs(mcqs_text)
st.session_state['questions'] = questions
st.session_state['answers'] = [''] * len(questions)
st.session_state['submitted'] = False
if st.session_state['questions']:
display_mcqs()