Spaces:

qdqd
/

Youtube2mcq

Sleeping

File size: 7,875 Bytes

389d3a7

import streamlit as st
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import re
from gradio_client import Client
from deep_translator import GoogleTranslator
import os

def extract_video_id(url):
    patterns = [
        r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([_\-a-zA-Z0-9]{11})',
        r'(?:https?:\/\/)?(?:www\.)?youtu\.be\/([_\-a-zA-Z0-9]{11})'
    ]
    for pattern in patterns:
        match = re.match(pattern, url)
        if match:
            return match.group(1)
    return None

def download_youtube_transcript(video_url):
    video_id = extract_video_id(video_url)
    if not video_id:
        st.error("Invalid YouTube URL.")
        return None

    try:
        transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None
        for transcript_obj in transcripts:
            try:
                transcript = transcript_obj.fetch()
                break
            except Exception:
                continue

        if not transcript:
            st.error("Transcript not available for this video.")
            return None

        formatter = TextFormatter()
        formatted_transcript = formatter.format_transcript(transcript)
        return formatted_transcript
    except Exception as e:
        st.error(f"An error occurred: {e}")
        return None

def translate_text(text, dest_language):
    translator = GoogleTranslator(source='auto', target=dest_language)
    chunk_size = 500
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
    translated_chunks = [translator.translate(chunk) for chunk in chunks]
    return ' '.join(translated_chunks)

def generate_mcqs(transcript, num_questions=10, language='en'):
    client = Client(os.getenv("CLIENT"))
    prompt = f"""
    Based on the following transcript, generate exactly {num_questions} multiple-choice questions (MCQs) with 4 options each.
    Ensure the questions cover various aspects of the video content to thoroughly check user understanding.
    Format each question as follows:
    Q: [Question]
    A) [Option A]
    B) [Option B]
    C) [Option C]
    D) [Option D]
    Correct Answer: [A/B/C/D]

    Transcript:
    {transcript}
    """

    result = client.predict(prompt=prompt, api_name="/predict")
    return result

def parse_mcqs(mcqs_text):
    questions = []
    current_question = {}
    for line in mcqs_text.split('\n'):
        line = line.strip()
        if line.startswith('Q:'):
            if current_question:
                questions.append(current_question)
            current_question = {'question': line[2:].strip(), 'options': []}
        elif line.startswith(('A)', 'B)', 'C)', 'D)')):
            current_question['options'].append(line)
        elif line.startswith('Correct Answer:'):
            current_question['correct_answer'] = line.split(':')[1].strip()
    if current_question:
        questions.append(current_question)
    return questions

def reset_state():
    for key in ['transcript', 'translated_transcript', 'questions', 'answers', 'submitted']:
        if key in st.session_state:
            del st.session_state[key]
    st.session_state['video_url'] = ""
    st.session_state['num_questions'] = 10

def display_mcqs():
    if st.session_state['questions']:
        for i, q in enumerate(st.session_state['questions']):
            st.subheader(f"Q{i+1}: {q['question']}")
            options = [opt.split(') ')[1] for opt in q['options']]
            answer = st.radio("Select an answer:", options, key=f"q{i}", index=None)
            st.session_state['answers'][i] = answer

        if st.button("Submit Answers"):
            st.session_state['submitted'] = True

        if st.session_state['submitted']:
            correct_count = 0
            for i, q in enumerate(st.session_state['questions']):
                st.subheader(f"Q{i+1}: {q['question']}")
                correct_option = q['correct_answer']
                selected_option = st.session_state['answers'][i]

                try:
                    correct_answer = next(opt for opt in q['options'] if opt.startswith(correct_option))
                    correct_answer = correct_answer.split(') ')[1]
                    if selected_option:
                        if selected_option == correct_answer:
                            st.success("Correct!")
                            correct_count += 1
                        else:
                            st.error(f"Incorrect. The correct answer is {correct_answer}")
                    else:
                        st.warning("You didn't answer this question.")
                except StopIteration:
                    st.error("There was an error with the correct answer. Please try again.")

            total_questions = len(st.session_state['questions'])
            score_percentage = (correct_count / total_questions) * 100
            st.write(f"Your score: {correct_count}/{total_questions} ({score_percentage:.2f}%)")
            if score_percentage >= 80:
                st.success("Great job! You have a good understanding of the video content.")
            elif score_percentage >= 60:
                st.info("Good effort! You might want to review some parts of the video.")
            else:
                st.warning("You might need to watch the video again to improve your understanding.")

    if st.button("Start Again"):
        reset_state()
        st.experimental_rerun()

# Streamlit App
st.set_page_config(page_title="YouTube Video MCQ Generator", layout="wide")
st.title("YouTube Video MCQ Generator")

# Initialize session state variables
if 'language' not in st.session_state:
    st.session_state['language'] = "English"
if 'video_url' not in st.session_state:
    st.session_state['video_url'] = ""
if 'num_questions' not in st.session_state:
    st.session_state['num_questions'] = 10
if 'submitted' not in st.session_state:
    st.session_state['submitted'] = False
if 'questions' not in st.session_state:
    st.session_state['questions'] = None

# Language selection
st.session_state['language'] = st.radio("Select language for questions:", ["English", "Hindi"])
language_code = 'en' if st.session_state['language'] == "English" else 'hi'

# Input for YouTube URL
st.session_state['video_url'] = st.text_input("Enter YouTube Video URL:", value=st.session_state['video_url'])

if st.session_state['video_url']:
    if 'transcript' not in st.session_state:
        transcript = download_youtube_transcript(st.session_state['video_url'])
        if transcript:
            st.session_state['transcript'] = transcript

    if 'translated_transcript' not in st.session_state and 'transcript' in st.session_state:
        with st.spinner("Translating transcript..."):
            translated_transcript = translate_text(st.session_state['transcript'], language_code)
            st.session_state['translated_transcript'] = translated_transcript

    if 'translated_transcript' in st.session_state:
        st.success("Transcript downloaded and translated successfully!")

        st.session_state['num_questions'] = st.slider("Number of questions to generate:", min_value=5, max_value=20, value=st.session_state['num_questions'])

        if st.button("Generate MCQs"):
            with st.spinner(f"Generating {st.session_state['num_questions']} questions in {st.session_state['language']}..."):
                mcqs_text = generate_mcqs(st.session_state['translated_transcript'], st.session_state['num_questions'], st.session_state['language'])
                questions = parse_mcqs(mcqs_text)
                st.session_state['questions'] = questions
                st.session_state['answers'] = [''] * len(questions)
                st.session_state['submitted'] = False

if st.session_state['questions']:
    display_mcqs()