Spaces:
Sleeping
Sleeping
File size: 7,875 Bytes
389d3a7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | import streamlit as st
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import re
from gradio_client import Client
from deep_translator import GoogleTranslator
import os
def extract_video_id(url):
patterns = [
r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([_\-a-zA-Z0-9]{11})',
r'(?:https?:\/\/)?(?:www\.)?youtu\.be\/([_\-a-zA-Z0-9]{11})'
]
for pattern in patterns:
match = re.match(pattern, url)
if match:
return match.group(1)
return None
def download_youtube_transcript(video_url):
video_id = extract_video_id(video_url)
if not video_id:
st.error("Invalid YouTube URL.")
return None
try:
transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
transcript = None
for transcript_obj in transcripts:
try:
transcript = transcript_obj.fetch()
break
except Exception:
continue
if not transcript:
st.error("Transcript not available for this video.")
return None
formatter = TextFormatter()
formatted_transcript = formatter.format_transcript(transcript)
return formatted_transcript
except Exception as e:
st.error(f"An error occurred: {e}")
return None
def translate_text(text, dest_language):
translator = GoogleTranslator(source='auto', target=dest_language)
chunk_size = 500
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
translated_chunks = [translator.translate(chunk) for chunk in chunks]
return ' '.join(translated_chunks)
def generate_mcqs(transcript, num_questions=10, language='en'):
client = Client(os.getenv("CLIENT"))
prompt = f"""
Based on the following transcript, generate exactly {num_questions} multiple-choice questions (MCQs) with 4 options each.
Ensure the questions cover various aspects of the video content to thoroughly check user understanding.
Format each question as follows:
Q: [Question]
A) [Option A]
B) [Option B]
C) [Option C]
D) [Option D]
Correct Answer: [A/B/C/D]
Transcript:
{transcript}
"""
result = client.predict(prompt=prompt, api_name="/predict")
return result
def parse_mcqs(mcqs_text):
questions = []
current_question = {}
for line in mcqs_text.split('\n'):
line = line.strip()
if line.startswith('Q:'):
if current_question:
questions.append(current_question)
current_question = {'question': line[2:].strip(), 'options': []}
elif line.startswith(('A)', 'B)', 'C)', 'D)')):
current_question['options'].append(line)
elif line.startswith('Correct Answer:'):
current_question['correct_answer'] = line.split(':')[1].strip()
if current_question:
questions.append(current_question)
return questions
def reset_state():
for key in ['transcript', 'translated_transcript', 'questions', 'answers', 'submitted']:
if key in st.session_state:
del st.session_state[key]
st.session_state['video_url'] = ""
st.session_state['num_questions'] = 10
def display_mcqs():
if st.session_state['questions']:
for i, q in enumerate(st.session_state['questions']):
st.subheader(f"Q{i+1}: {q['question']}")
options = [opt.split(') ')[1] for opt in q['options']]
answer = st.radio("Select an answer:", options, key=f"q{i}", index=None)
st.session_state['answers'][i] = answer
if st.button("Submit Answers"):
st.session_state['submitted'] = True
if st.session_state['submitted']:
correct_count = 0
for i, q in enumerate(st.session_state['questions']):
st.subheader(f"Q{i+1}: {q['question']}")
correct_option = q['correct_answer']
selected_option = st.session_state['answers'][i]
try:
correct_answer = next(opt for opt in q['options'] if opt.startswith(correct_option))
correct_answer = correct_answer.split(') ')[1]
if selected_option:
if selected_option == correct_answer:
st.success("Correct!")
correct_count += 1
else:
st.error(f"Incorrect. The correct answer is {correct_answer}")
else:
st.warning("You didn't answer this question.")
except StopIteration:
st.error("There was an error with the correct answer. Please try again.")
total_questions = len(st.session_state['questions'])
score_percentage = (correct_count / total_questions) * 100
st.write(f"Your score: {correct_count}/{total_questions} ({score_percentage:.2f}%)")
if score_percentage >= 80:
st.success("Great job! You have a good understanding of the video content.")
elif score_percentage >= 60:
st.info("Good effort! You might want to review some parts of the video.")
else:
st.warning("You might need to watch the video again to improve your understanding.")
if st.button("Start Again"):
reset_state()
st.experimental_rerun()
# Streamlit App
st.set_page_config(page_title="YouTube Video MCQ Generator", layout="wide")
st.title("YouTube Video MCQ Generator")
# Initialize session state variables
if 'language' not in st.session_state:
st.session_state['language'] = "English"
if 'video_url' not in st.session_state:
st.session_state['video_url'] = ""
if 'num_questions' not in st.session_state:
st.session_state['num_questions'] = 10
if 'submitted' not in st.session_state:
st.session_state['submitted'] = False
if 'questions' not in st.session_state:
st.session_state['questions'] = None
# Language selection
st.session_state['language'] = st.radio("Select language for questions:", ["English", "Hindi"])
language_code = 'en' if st.session_state['language'] == "English" else 'hi'
# Input for YouTube URL
st.session_state['video_url'] = st.text_input("Enter YouTube Video URL:", value=st.session_state['video_url'])
if st.session_state['video_url']:
if 'transcript' not in st.session_state:
transcript = download_youtube_transcript(st.session_state['video_url'])
if transcript:
st.session_state['transcript'] = transcript
if 'translated_transcript' not in st.session_state and 'transcript' in st.session_state:
with st.spinner("Translating transcript..."):
translated_transcript = translate_text(st.session_state['transcript'], language_code)
st.session_state['translated_transcript'] = translated_transcript
if 'translated_transcript' in st.session_state:
st.success("Transcript downloaded and translated successfully!")
st.session_state['num_questions'] = st.slider("Number of questions to generate:", min_value=5, max_value=20, value=st.session_state['num_questions'])
if st.button("Generate MCQs"):
with st.spinner(f"Generating {st.session_state['num_questions']} questions in {st.session_state['language']}..."):
mcqs_text = generate_mcqs(st.session_state['translated_transcript'], st.session_state['num_questions'], st.session_state['language'])
questions = parse_mcqs(mcqs_text)
st.session_state['questions'] = questions
st.session_state['answers'] = [''] * len(questions)
st.session_state['submitted'] = False
if st.session_state['questions']:
display_mcqs() |