File size: 7,875 Bytes
389d3a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import streamlit as st
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import re
from gradio_client import Client
from deep_translator import GoogleTranslator
import os

def extract_video_id(url):
    patterns = [
        r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([_\-a-zA-Z0-9]{11})',
        r'(?:https?:\/\/)?(?:www\.)?youtu\.be\/([_\-a-zA-Z0-9]{11})'
    ]
    for pattern in patterns:
        match = re.match(pattern, url)
        if match:
            return match.group(1)
    return None

def download_youtube_transcript(video_url):
    video_id = extract_video_id(video_url)
    if not video_id:
        st.error("Invalid YouTube URL.")
        return None

    try:
        transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None
        for transcript_obj in transcripts:
            try:
                transcript = transcript_obj.fetch()
                break
            except Exception:
                continue

        if not transcript:
            st.error("Transcript not available for this video.")
            return None

        formatter = TextFormatter()
        formatted_transcript = formatter.format_transcript(transcript)
        return formatted_transcript
    except Exception as e:
        st.error(f"An error occurred: {e}")
        return None

def translate_text(text, dest_language):
    translator = GoogleTranslator(source='auto', target=dest_language)
    chunk_size = 500
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
    translated_chunks = [translator.translate(chunk) for chunk in chunks]
    return ' '.join(translated_chunks)

def generate_mcqs(transcript, num_questions=10, language='en'):
    client = Client(os.getenv("CLIENT"))
    prompt = f"""
    Based on the following transcript, generate exactly {num_questions} multiple-choice questions (MCQs) with 4 options each.
    Ensure the questions cover various aspects of the video content to thoroughly check user understanding.
    Format each question as follows:
    Q: [Question]
    A) [Option A]
    B) [Option B]
    C) [Option C]
    D) [Option D]
    Correct Answer: [A/B/C/D]

    Transcript:
    {transcript}
    """

    result = client.predict(prompt=prompt, api_name="/predict")
    return result

def parse_mcqs(mcqs_text):
    questions = []
    current_question = {}
    for line in mcqs_text.split('\n'):
        line = line.strip()
        if line.startswith('Q:'):
            if current_question:
                questions.append(current_question)
            current_question = {'question': line[2:].strip(), 'options': []}
        elif line.startswith(('A)', 'B)', 'C)', 'D)')):
            current_question['options'].append(line)
        elif line.startswith('Correct Answer:'):
            current_question['correct_answer'] = line.split(':')[1].strip()
    if current_question:
        questions.append(current_question)
    return questions

def reset_state():
    for key in ['transcript', 'translated_transcript', 'questions', 'answers', 'submitted']:
        if key in st.session_state:
            del st.session_state[key]
    st.session_state['video_url'] = ""
    st.session_state['num_questions'] = 10

def display_mcqs():
    if st.session_state['questions']:
        for i, q in enumerate(st.session_state['questions']):
            st.subheader(f"Q{i+1}: {q['question']}")
            options = [opt.split(') ')[1] for opt in q['options']]
            answer = st.radio("Select an answer:", options, key=f"q{i}", index=None)
            st.session_state['answers'][i] = answer

        if st.button("Submit Answers"):
            st.session_state['submitted'] = True

        if st.session_state['submitted']:
            correct_count = 0
            for i, q in enumerate(st.session_state['questions']):
                st.subheader(f"Q{i+1}: {q['question']}")
                correct_option = q['correct_answer']
                selected_option = st.session_state['answers'][i]

                try:
                    correct_answer = next(opt for opt in q['options'] if opt.startswith(correct_option))
                    correct_answer = correct_answer.split(') ')[1]
                    if selected_option:
                        if selected_option == correct_answer:
                            st.success("Correct!")
                            correct_count += 1
                        else:
                            st.error(f"Incorrect. The correct answer is {correct_answer}")
                    else:
                        st.warning("You didn't answer this question.")
                except StopIteration:
                    st.error("There was an error with the correct answer. Please try again.")

            total_questions = len(st.session_state['questions'])
            score_percentage = (correct_count / total_questions) * 100
            st.write(f"Your score: {correct_count}/{total_questions} ({score_percentage:.2f}%)")
            if score_percentage >= 80:
                st.success("Great job! You have a good understanding of the video content.")
            elif score_percentage >= 60:
                st.info("Good effort! You might want to review some parts of the video.")
            else:
                st.warning("You might need to watch the video again to improve your understanding.")

    if st.button("Start Again"):
        reset_state()
        st.experimental_rerun()

# Streamlit App
st.set_page_config(page_title="YouTube Video MCQ Generator", layout="wide")
st.title("YouTube Video MCQ Generator")

# Initialize session state variables
if 'language' not in st.session_state:
    st.session_state['language'] = "English"
if 'video_url' not in st.session_state:
    st.session_state['video_url'] = ""
if 'num_questions' not in st.session_state:
    st.session_state['num_questions'] = 10
if 'submitted' not in st.session_state:
    st.session_state['submitted'] = False
if 'questions' not in st.session_state:
    st.session_state['questions'] = None

# Language selection
st.session_state['language'] = st.radio("Select language for questions:", ["English", "Hindi"])
language_code = 'en' if st.session_state['language'] == "English" else 'hi'

# Input for YouTube URL
st.session_state['video_url'] = st.text_input("Enter YouTube Video URL:", value=st.session_state['video_url'])

if st.session_state['video_url']:
    if 'transcript' not in st.session_state:
        transcript = download_youtube_transcript(st.session_state['video_url'])
        if transcript:
            st.session_state['transcript'] = transcript

    if 'translated_transcript' not in st.session_state and 'transcript' in st.session_state:
        with st.spinner("Translating transcript..."):
            translated_transcript = translate_text(st.session_state['transcript'], language_code)
            st.session_state['translated_transcript'] = translated_transcript

    if 'translated_transcript' in st.session_state:
        st.success("Transcript downloaded and translated successfully!")

        st.session_state['num_questions'] = st.slider("Number of questions to generate:", min_value=5, max_value=20, value=st.session_state['num_questions'])

        if st.button("Generate MCQs"):
            with st.spinner(f"Generating {st.session_state['num_questions']} questions in {st.session_state['language']}..."):
                mcqs_text = generate_mcqs(st.session_state['translated_transcript'], st.session_state['num_questions'], st.session_state['language'])
                questions = parse_mcqs(mcqs_text)
                st.session_state['questions'] = questions
                st.session_state['answers'] = [''] * len(questions)
                st.session_state['submitted'] = False

if st.session_state['questions']:
    display_mcqs()