Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from youtube_transcript_api.formatters import TextFormatter | |
| import re | |
| from gradio_client import Client | |
| from deep_translator import GoogleTranslator | |
| import os | |
| def extract_video_id(url): | |
| patterns = [ | |
| r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([_\-a-zA-Z0-9]{11})', | |
| r'(?:https?:\/\/)?(?:www\.)?youtu\.be\/([_\-a-zA-Z0-9]{11})' | |
| ] | |
| for pattern in patterns: | |
| match = re.match(pattern, url) | |
| if match: | |
| return match.group(1) | |
| return None | |
| def download_youtube_transcript(video_url): | |
| video_id = extract_video_id(video_url) | |
| if not video_id: | |
| st.error("Invalid YouTube URL.") | |
| return None | |
| try: | |
| transcripts = YouTubeTranscriptApi.list_transcripts(video_id) | |
| transcript = None | |
| for transcript_obj in transcripts: | |
| try: | |
| transcript = transcript_obj.fetch() | |
| break | |
| except Exception: | |
| continue | |
| if not transcript: | |
| st.error("Transcript not available for this video.") | |
| return None | |
| formatter = TextFormatter() | |
| formatted_transcript = formatter.format_transcript(transcript) | |
| return formatted_transcript | |
| except Exception as e: | |
| st.error(f"An error occurred: {e}") | |
| return None | |
| def translate_text(text, dest_language): | |
| translator = GoogleTranslator(source='auto', target=dest_language) | |
| chunk_size = 500 | |
| chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | |
| translated_chunks = [translator.translate(chunk) for chunk in chunks] | |
| return ' '.join(translated_chunks) | |
| def generate_mcqs(transcript, num_questions=10, language='en'): | |
| client = Client(os.getenv("CLIENT")) | |
| prompt = f""" | |
| Based on the following transcript, generate exactly {num_questions} multiple-choice questions (MCQs) with 4 options each. | |
| Ensure the questions cover various aspects of the video content to thoroughly check user understanding. | |
| Format each question as follows: | |
| Q: [Question] | |
| A) [Option A] | |
| B) [Option B] | |
| C) [Option C] | |
| D) [Option D] | |
| Correct Answer: [A/B/C/D] | |
| Transcript: | |
| {transcript} | |
| """ | |
| result = client.predict(prompt=prompt, api_name="/predict") | |
| return result | |
| def parse_mcqs(mcqs_text): | |
| questions = [] | |
| current_question = {} | |
| for line in mcqs_text.split('\n'): | |
| line = line.strip() | |
| if line.startswith('Q:'): | |
| if current_question: | |
| questions.append(current_question) | |
| current_question = {'question': line[2:].strip(), 'options': []} | |
| elif line.startswith(('A)', 'B)', 'C)', 'D)')): | |
| current_question['options'].append(line) | |
| elif line.startswith('Correct Answer:'): | |
| current_question['correct_answer'] = line.split(':')[1].strip() | |
| if current_question: | |
| questions.append(current_question) | |
| return questions | |
| def reset_state(): | |
| for key in ['transcript', 'translated_transcript', 'questions', 'answers', 'submitted']: | |
| if key in st.session_state: | |
| del st.session_state[key] | |
| st.session_state['video_url'] = "" | |
| st.session_state['num_questions'] = 10 | |
| def display_mcqs(): | |
| if st.session_state['questions']: | |
| for i, q in enumerate(st.session_state['questions']): | |
| st.subheader(f"Q{i+1}: {q['question']}") | |
| options = [opt.split(') ')[1] for opt in q['options']] | |
| answer = st.radio("Select an answer:", options, key=f"q{i}", index=None) | |
| st.session_state['answers'][i] = answer | |
| if st.button("Submit Answers"): | |
| st.session_state['submitted'] = True | |
| if st.session_state['submitted']: | |
| correct_count = 0 | |
| for i, q in enumerate(st.session_state['questions']): | |
| st.subheader(f"Q{i+1}: {q['question']}") | |
| correct_option = q['correct_answer'] | |
| selected_option = st.session_state['answers'][i] | |
| try: | |
| correct_answer = next(opt for opt in q['options'] if opt.startswith(correct_option)) | |
| correct_answer = correct_answer.split(') ')[1] | |
| if selected_option: | |
| if selected_option == correct_answer: | |
| st.success("Correct!") | |
| correct_count += 1 | |
| else: | |
| st.error(f"Incorrect. The correct answer is {correct_answer}") | |
| else: | |
| st.warning("You didn't answer this question.") | |
| except StopIteration: | |
| st.error("There was an error with the correct answer. Please try again.") | |
| total_questions = len(st.session_state['questions']) | |
| score_percentage = (correct_count / total_questions) * 100 | |
| st.write(f"Your score: {correct_count}/{total_questions} ({score_percentage:.2f}%)") | |
| if score_percentage >= 80: | |
| st.success("Great job! You have a good understanding of the video content.") | |
| elif score_percentage >= 60: | |
| st.info("Good effort! You might want to review some parts of the video.") | |
| else: | |
| st.warning("You might need to watch the video again to improve your understanding.") | |
| if st.button("Start Again"): | |
| reset_state() | |
| st.experimental_rerun() | |
| # Streamlit App | |
| st.set_page_config(page_title="YouTube Video MCQ Generator", layout="wide") | |
| st.title("YouTube Video MCQ Generator") | |
| # Initialize session state variables | |
| if 'language' not in st.session_state: | |
| st.session_state['language'] = "English" | |
| if 'video_url' not in st.session_state: | |
| st.session_state['video_url'] = "" | |
| if 'num_questions' not in st.session_state: | |
| st.session_state['num_questions'] = 10 | |
| if 'submitted' not in st.session_state: | |
| st.session_state['submitted'] = False | |
| if 'questions' not in st.session_state: | |
| st.session_state['questions'] = None | |
| # Language selection | |
| st.session_state['language'] = st.radio("Select language for questions:", ["English", "Hindi"]) | |
| language_code = 'en' if st.session_state['language'] == "English" else 'hi' | |
| # Input for YouTube URL | |
| st.session_state['video_url'] = st.text_input("Enter YouTube Video URL:", value=st.session_state['video_url']) | |
| if st.session_state['video_url']: | |
| if 'transcript' not in st.session_state: | |
| transcript = download_youtube_transcript(st.session_state['video_url']) | |
| if transcript: | |
| st.session_state['transcript'] = transcript | |
| if 'translated_transcript' not in st.session_state and 'transcript' in st.session_state: | |
| with st.spinner("Translating transcript..."): | |
| translated_transcript = translate_text(st.session_state['transcript'], language_code) | |
| st.session_state['translated_transcript'] = translated_transcript | |
| if 'translated_transcript' in st.session_state: | |
| st.success("Transcript downloaded and translated successfully!") | |
| st.session_state['num_questions'] = st.slider("Number of questions to generate:", min_value=5, max_value=20, value=st.session_state['num_questions']) | |
| if st.button("Generate MCQs"): | |
| with st.spinner(f"Generating {st.session_state['num_questions']} questions in {st.session_state['language']}..."): | |
| mcqs_text = generate_mcqs(st.session_state['translated_transcript'], st.session_state['num_questions'], st.session_state['language']) | |
| questions = parse_mcqs(mcqs_text) | |
| st.session_state['questions'] = questions | |
| st.session_state['answers'] = [''] * len(questions) | |
| st.session_state['submitted'] = False | |
| if st.session_state['questions']: | |
| display_mcqs() |