import gradio as gr import torch import numpy as np import pandas as pd from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, pipeline from transformers import MarianMTModel, MarianTokenizer import nltk import random import json import os import io import sqlite3 import PyPDF2 import pdfplumber import re import csv from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from sklearn.metrics.pairwise import cosine_similarity from sentence_transformers import SentenceTransformer import faiss import tabula import pdf2image import pytesseract from PIL import Image from youtube_transcript_api import YouTubeTranscriptApi from pytube import YouTube import requests from bs4 import BeautifulSoup from datetime import datetime, timedelta import uuid from gtts import gTTS from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas # Set NLTK data path to persistent storage nltk.data.path.append('/data/nltk_data') # Download NLTK resources if not already present try: nltk.download('punkt', download_dir='/data/nltk_data') nltk.download('stopwords', download_dir='/data/nltk_data') except Exception as e: print(f"Failed to download NLTK data: {e}") # Check GPU availability print("Checking GPU availability...") print(f"CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"CUDA device: {torch.cuda.get_device_name(0)}") else: print("Device set to use CPU") # Create file storage directory os.makedirs('/data/files', exist_ok=True) if not os.access('/data/files', os.W_OK): raise PermissionError("No write permission for /data/files directory") # Initialize SQLite database def init_sqlite_db(): os.makedirs('/data', exist_ok=True) if not os.access('/data', os.W_OK): raise PermissionError("No write permission for /data directory") conn = sqlite3.connect('/data/vernacular_learning.db') c = conn.cursor() c.execute(''' CREATE TABLE IF NOT EXISTS users ( user_id TEXT PRIMARY KEY, username TEXT, age INTEGER, education_level TEXT, language_preference TEXT, learning_goal TEXT, learning_style TEXT, progress TEXT, quiz_results TEXT, feedback TEXT ) ''') c.execute(''' CREATE TABLE IF NOT EXISTS content ( id INTEGER PRIMARY KEY AUTOINCREMENT, source TEXT, text TEXT, language TEXT, category TEXT, subcategory TEXT, embedding TEXT ) ''') c.execute(''' CREATE TABLE IF NOT EXISTS embeddings ( id INTEGER PRIMARY KEY AUTOINCREMENT, source TEXT, embedding TEXT ) ''') c.execute(''' CREATE TABLE IF NOT EXISTS feedback ( id INTEGER PRIMARY KEY AUTOINCREMENT, user_id TEXT, text TEXT, sentiment TEXT, score REAL, FOREIGN KEY (user_id) REFERENCES users (user_id) ) ''') c.execute(''' CREATE TABLE IF NOT EXISTS video_sessions ( session_id TEXT PRIMARY KEY, user_id TEXT, session_name TEXT, start_time TEXT, duration INTEGER, participants TEXT, content_id TEXT, scheduled_time TEXT, FOREIGN KEY (user_id) REFERENCES users (user_id) ) ''') c.execute(''' CREATE TABLE IF NOT EXISTS forum ( id INTEGER PRIMARY KEY AUTOINCREMENT, user_id TEXT, message TEXT, timestamp TEXT, FOREIGN KEY (user_id) REFERENCES users (user_id) ) ''') conn.commit() return conn # Define supported languages SUPPORTED_LANGUAGES = { "Hindi": "hi", "Tamil": "ta", "Bengali": "bn", "English": "en" } # Define learning paths LEARNING_PATHS = { "Vocational Skills": [ "Basic Communication", "Technical Vocabulary", "Practical Applications" ], "Exam Preparation": [ "Subject Terminology", "Question Formats", "Speed Learning Techniques" ] } # Initialize models sentence_model = SentenceTransformer('all-MiniLM-L6-v2') translation_model_name = "Helsinki-NLP/opus-mt-en-hi" translation_tokenizer = MarianTokenizer.from_pretrained(translation_model_name) translation_model = MarianMTModel.from_pretrained(translation_model_name) summarization_model = pipeline("summarization", model="facebook/bart-large-cnn") sentiment_model = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") lesson_plan_model = pipeline("text2text-generation", model="t5-small") # FAISS index for similarity search dimension = 384 index = faiss.IndexFlatL2(dimension) # Initialize database conn = init_sqlite_db() def preprocess_text(text): text = re.sub(r'\s+', ' ', text).strip() sentences = nltk.sent_tokenize(text) return sentences def translate_text(text, target_lang): if target_lang not in SUPPORTED_LANGUAGES.values(): return text model_name = f"Helsinki-NLP/opus-mt-en-{target_lang}" try: tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) translated = model.generate(**inputs) return tokenizer.batch_decode(translated, skip_special_tokens=True)[0] except: return text def summarize_text(text, max_length=130, min_length=30): try: summary = summarization_model(text, max_length=max_length, min_length=min_length, do_sample=False) return summary[0]['summary_text'] except: return text[:max_length] def generate_tts(text, language, output_file): try: tts = gTTS(text=text, lang=language, slow=False) tts.save(output_file) return output_file except: return None def generate_pdf(content, output_file): try: c = canvas.Canvas(output_file, pagesize=letter) c.drawString(100, 750, "Vernacular Learning Content") y = 700 for line in content.split('\n'): if y < 50: c.showPage() y = 750 c.drawString(100, y, line[:100]) y -= 15 c.save() return output_file except: return None def extract_text_from_pdf(pdf_file): text = "" try: with pdfplumber.open(pdf_file) as pdf: for page in pdf.pages: text += page.extract_text() or "" if not text.strip(): images = pdf2image.convert_from_path(pdf_file) for image in images: text += pytesseract.image_to_string(image) except: text = "Error extracting text from PDF." return text def extract_text_from_youtube(youtube_url): try: video_id = YouTube(youtube_url).video_id transcript = YouTubeTranscriptApi.get_transcript(video_id) text = " ".join([entry['text'] for entry in transcript]) return text except: return "Error extracting transcript." def extract_text_from_url(url): try: response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') paragraphs = soup.find_all('p') text = " ".join([p.get_text() for p in paragraphs]) return text except: return "Error extracting text from URL." def create_embedding(text): sentences = preprocess_text(text) embeddings = sentence_model.encode(sentences, convert_to_tensor=False) return np.mean(embeddings, axis=0) def store_embedding(source, embedding): embedding = embedding.astype(np.float32) index.add(np.array([embedding])) conn = init_sqlite_db() c = conn.cursor() c.execute("INSERT INTO embeddings (source, embedding) VALUES (?, ?)", (source, json.dumps(embedding.tolist()))) conn.commit() conn.close() def store_content(source, text, language, category, subcategory): embedding = create_embedding(text) store_embedding(source, embedding) conn = init_sqlite_db() c = conn.cursor() c.execute("INSERT INTO content (source, text, language, category, subcategory, embedding) VALUES (?, ?, ?, ?, ?, ?)", (source, text, language, category, subcategory, json.dumps(embedding.tolist()))) conn.commit() conn.close() def search_similar_content(query, top_k=3): query_embedding = create_embedding(query).astype(np.float32) distances, indices = index.search(np.array([query_embedding]), top_k) conn = init_sqlite_db() c = conn.cursor() results = [] for idx in indices[0]: c.execute("SELECT source, text, language, category, subcategory FROM content WHERE id = ?", (idx + 1,)) result = c.fetchone() if result: results.append(result) conn.close() return results def generate_quiz(content, num_questions=3): sentences = preprocess_text(content) questions = [] for _ in range(min(num_questions, len(sentences))): sentence = random.choice(sentences) words = sentence.split() if len(words) > 5: answer = random.choice(words) question = sentence.replace(answer, "____") questions.append({"question": question, "answer": answer}) return questions def evaluate_quiz(questions, answers): score = 0 total = len(questions) for i, q in enumerate(questions): if i < len(answers) and answers[i].strip().lower() == q['answer'].lower(): score += 1 return score, total def analyze_feedback(feedback_text): sentiment = sentiment_model(feedback_text)[0] return sentiment['label'], sentiment['score'] def store_user_feedback(user_id, feedback_text): sentiment, score = analyze_feedback(feedback_text) conn = init_sqlite_db() c = conn.cursor() c.execute("INSERT INTO feedback (user_id, text, sentiment, score) VALUES (?, ?, ?, ?)", (user_id, feedback_text, sentiment, score)) conn.commit() conn.close() return sentiment, score def create_user_profile(username, age, education_level, language_preference, learning_goal, learning_style): user_id = str(uuid.uuid4()) conn = init_sqlite_db() c = conn.cursor() c.execute(''' INSERT OR REPLACE INTO users (user_id, username, age, education_level, language_preference, learning_goal, learning_style, progress, quiz_results, feedback) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', (user_id, username, age, education_level, language_preference, learning_goal, learning_style, json.dumps({}), json.dumps({}), json.dumps({}))) conn.commit() conn.close() return user_id def update_user_progress(user_id, content_id, status): conn = init_sqlite_db() c = conn.cursor() c.execute("SELECT progress FROM users WHERE user_id = ?", (user_id,)) progress = json.loads(c.fetchone()[0]) progress[content_id] = status c.execute("UPDATE users SET progress = ? WHERE user_id = ?", (json.dumps(progress), user_id)) conn.commit() conn.close() def store_quiz_results(user_id, quiz_results): conn = init_sqlite_db() c = conn.cursor() c.execute("SELECT quiz_results FROM users WHERE user_id = ?", (user_id,)) results = json.loads(c.fetchone()[0]) results[str(datetime.now())] = quiz_results c.execute("UPDATE users SET quiz_results = ? WHERE user_id = ?", (json.dumps(results), user_id)) conn.commit() conn.close() def start_video_session(user_id, session_name, content_id, scheduled_time): session_id = str(uuid.uuid4()) start_time = datetime.now().isoformat() jitsi_url = f"https://meet.jit.si/{session_id}" conn = init_sqlite_db() c = conn.cursor() c.execute(''' INSERT INTO video_sessions (session_id, user_id, session_name, start_time, duration, participants, content_id, scheduled_time) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ''', (session_id, user_id, session_name, start_time, 0, json.dumps([user_id]), content_id or "", scheduled_time or "")) conn.commit() conn.close() return session_id, jitsi_url def end_video_session(session_id): conn = init_sqlite_db() c = conn.cursor() c.execute("SELECT start_time FROM video_sessions WHERE session_id = ?", (session_id,)) result = c.fetchone() if result: start_time = datetime.fromisoformat(result[0]) duration = int((datetime.now() - start_time).total_seconds() / 60) c.execute("UPDATE video_sessions SET duration = ? WHERE session_id = ?", (duration, session_id)) conn.commit() conn.close() def generate_lesson_plan(topic, grade_level, objectives, language): try: prompt = f"Generate a lesson plan for {topic} for {grade_level} students with objectives: {objectives}. Include an introduction, activities, and assessment. Language: {language}." result = lesson_plan_model(prompt, max_length=200, num_return_sequences=1)[0]['generated_text'] return translate_text(result, SUPPORTED_LANGUAGES.get(language, 'en')) except: return "Error generating lesson plan." def generate_flashcards(content_id, num_cards=5): conn = init_sqlite_db() c = conn.cursor() c.execute("SELECT text FROM content WHERE id = ?", (content_id,)) result = c.fetchone() conn.close() if not result: return [] text = result[0] vectorizer = TfidfVectorizer(stop_words='english') tfidf_matrix = vectorizer.fit_transform([text]) feature_names = vectorizer.get_feature_names_out() scores = tfidf_matrix.toarray()[0] keywords = sorted(zip(feature_names, scores), key=lambda x: x[1], reverse=True)[:num_cards] flashcards = [{"front": kw[0], "back": f"Key term in {text[:50]}..."} for kw in keywords] return flashcards def get_student_progress(user_id): conn = init_sqlite_db() c = conn.cursor() c.execute("SELECT quiz_results FROM users WHERE user_id = ?", (user_id,)) result = c.fetchone() conn.close() if result: quiz_results = json.loads(result[0]) scores = [r['score'] for r in quiz_results.values()] totals = [r['total'] for r in quiz_results.values()] return scores, totals return [], [] def store_forum_message(user_id, message): conn = init_sqlite_db() c = conn.cursor() timestamp = datetime.now().isoformat() c.execute("INSERT INTO forum (user_id, message, timestamp) VALUES (?, ?, ?)", (user_id, message, timestamp)) conn.commit() conn.close() return "Message posted." def get_forum_messages(): conn = init_sqlite_db() c = conn.cursor() c.execute("SELECT u.username, f.message, f.timestamp FROM forum f JOIN users u ON f.user_id = u.user_id ORDER BY f.timestamp DESC LIMIT 20") messages = c.fetchall() conn.close() return messages def generate_content_tts(content_output, language): if not content_output or "Error" in content_output: return None summary = content_output.split("Summary: ")[-1].split("\n")[0] output_file = f"/data/files/tts_{uuid.uuid4()}.mp3" lang_code = SUPPORTED_LANGUAGES.get(language, 'en') return generate_tts(summary, lang_code, output_file) def download_content(content_output): if not content_output or "Error" in content_output: return None output_file = f"/data/files/content_{uuid.uuid4()}.pdf" return generate_pdf(content_output, output_file) def create_gradio_interface(): with gr.Blocks(theme='default') as interface: gr.Markdown("# 🌍 Vernacular Learning Platform") gr.Markdown("Interactive learning platform for teachers and rural students with enhanced video chat, offline content, and community forum") with gr.Tabs(): with gr.Tab("User Guide"): gr.Markdown(""" ### 📘 How to Use This Platform Follow these steps to learn and make decisions: 1. **User Profile**: Create your profile to personalize learning. 2. **Content Upload**: Upload content, listen via TTS, or download as PDF for offline use. 3. **Lesson Planning**: Teachers can generate lesson plans. 4. **Student Progress**: Teachers can view quiz score visualizations. 5. **Flashcards**: Students can learn key terms interactively. 6. **Community Forum**: Discuss ideas and share insights. 7. **Search Content**: Find similar content. 8. **Take Quiz**: Test knowledge with quizzes. 9. **Video Chat**: Schedule and join video sessions linked to content. 10. **Feedback**: Provide feedback to improve the platform. **Tips for Decision-Making**: - Teachers: Use progress dashboards and lesson plans to tailor teaching, schedule video sessions for discussions. - Students: Use flashcards, TTS, and forum to enhance learning, join video chats to clarify doubts. """) with gr.Tab("User Profile"): username = gr.Textbox(label="Username") age = gr.Slider(minimum=10, maximum=100, step=1, label="Age") education_level = gr.Dropdown(choices=["High School", "Undergraduate", "Graduate", "Other"], label="Education Level") language_preference = gr.Dropdown(choices=list(SUPPORTED_LANGUAGES.keys()), label="Language Preference") learning_goal = gr.Dropdown(choices=list(LEARNING_PATHS.keys()), label="Learning Goal") learning_style = gr.Dropdown(choices=["Visual", "Auditory", "Kinesthetic", "Reading/Writing"], label="Learning Style") create_profile_button = gr.Button("Create/Update Profile") profile_output = gr.Textbox(label="Profile Status") create_profile_button.click( fn=create_user_profile, inputs=[username, age, education_level, language_preference, learning_goal, learning_style], outputs=profile_output ) with gr.Tab("Content Upload"): content_type = gr.Radio(choices=["PDF", "YouTube", "URL"], label="Content Type", value="PDF") pdf_input = gr.File(label="Upload PDF", visible=True) url_input = gr.Textbox(label="Enter YouTube/URL", visible=False) language = gr.Dropdown(choices=list(SUPPORTED_LANGUAGES.keys()), label="Target Language") category = gr.Textbox(label="Category (e.g., Math, Science)") subcategory = gr.Textbox(label="Subcategory (e.g., Algebra, Physics)") process_button = gr.Button("Process Content") tts_button = gr.Button("Listen to Content") download_button = gr.Button("Download Content as PDF") content_output = gr.Textbox(label="Content Processing Output", lines=10) tts_output = gr.Audio(label="Content Audio") download_output = gr.File(label="Download PDF") def update_input_visibility(content_type): return gr.update(visible=content_type == "PDF"), gr.update(visible=content_type != "PDF") content_type.change( fn=update_input_visibility, inputs=content_type, outputs=[pdf_input, url_input] ) def process_content(content_type, pdf_input, url_input, language, category, subcategory): if content_type == "PDF": if not pdf_input: return "Please upload a PDF file." text = extract_text_from_pdf(pdf_input.name) source = pdf_input.name else: if not url_input: return "Please enter a YouTube or URL." source = url_input text = extract_text_from_youtube(url_input) if content_type == "YouTube" else extract_text_from_url(url_input) if "Error" in text: return text translated_text = translate_text(text, SUPPORTED_LANGUAGES[language]) summary = summarize_text(translated_text) store_content(source, translated_text, language, category, subcategory) return f"Content processed:\nSummary: {summary}\nStored in database." process_button.click( fn=process_content, inputs=[content_type, pdf_input, url_input, language, category, subcategory], outputs=content_output ) tts_button.click( fn=generate_content_tts, inputs=[content_output, language], outputs=tts_output ) download_button.click( fn=download_content, inputs=content_output, outputs=download_output ) with gr.Tab("Lesson Planning"): topic = gr.Textbox(label="Topic (e.g., Fractions)") grade_level = gr.Dropdown(choices=["1-5", "6-8", "9-12"], label="Grade Level") objectives = gr.Textbox(label="Learning Objectives (e.g., Understand fraction addition)") language_plan = gr.Dropdown(choices=list(SUPPORTED_LANGUAGES.keys()), label="Language") generate_plan_button = gr.Button("Generate Lesson Plan") plan_output = gr.Textbox(label="Lesson Plan", lines=10) generate_plan_button.click( fn=generate_lesson_plan, inputs=[topic, grade_level, objectives, language_plan], outputs=plan_output ) with gr.Tab("Student Progress"): user_id_progress = gr.Textbox(label="Student User ID") fetch_progress_button = gr.Button("Fetch Progress") progress_output = gr.Textbox(label="Quiz Results") progress_chart = gr.JSON(label="Quiz Scores Chart") def fetch_progress(user_id): scores, totals = get_student_progress(user_id) if not scores: return "No quiz results found.", {} result_text = "\n".join([f"Quiz {i+1}: {s}/{t}" for i, (s, t) in enumerate(zip(scores, totals))]) chart_data = { "type": "bar", "data": { "labels": [f"Quiz {i+1}" for i in range(len(scores))], "datasets": [{ "label": "Scores", "data": scores, "backgroundColor": "rgba(75, 192, 192, 0.2)", "borderColor": "rgba(75, 192, 192, 1)", "borderWidth": 1 }] }, "options": { "scales": { "y": { "beginAtZero": True, "title": {"display": True, "text": "Score"} }, "x": { "title": {"display": True, "text": "Quiz"} } } } } return result_text, chart_data fetch_progress_button.click( fn=fetch_progress, inputs=user_id_progress, outputs=[progress_output, progress_chart] ) with gr.Tab("Flashcards"): content_id_flash = gr.Textbox(label="Content ID") num_cards = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of Flashcards") generate_flashcards_button = gr.Button("Generate Flashcards") flashcard_output = gr.HTML(label="Flashcards") def display_flashcards(content_id, num_cards): flashcards = generate_flashcards(content_id, num_cards) if not flashcards: return "Content not found or no flashcards generated." html = "
" for i, card in enumerate(flashcards): html += f"""

Card {i+1}

Front: {card['front']}

Back: {card['back']}

""" html += "
" return html generate_flashcards_button.click( fn=display_flashcards, inputs=[content_id_flash, num_cards], outputs=flashcard_output ) with gr.Tab("Community Forum"): user_id_forum = gr.Textbox(label="User ID") message = gr.Textbox(label="Message") post_button = gr.Button("Post Message") forum_output = gr.Textbox(label="Forum Messages", lines=10) refresh_button = gr.Button("Refresh Forum") def post_message(user_id, message): return store_forum_message(user_id, message) def display_forum(): messages = get_forum_messages() return "\n".join([f"[{m[2]}] {m[0]}: {m[1]}" for m in messages]) post_button.click( fn=post_message, inputs=[user_id_forum, message], outputs=forum_output ) refresh_button.click( fn=display_forum, inputs=[], outputs=forum_output ) with gr.Tab("Search Content"): search_query = gr.Textbox(label="Search Query") search_button = gr.Button("Search") search_output = gr.Textbox(label="Search Results", lines=10) search_button.click( fn=search_similar_content, inputs=search_query, outputs=search_output ) with gr.Tab("Quiz"): content_id = gr.Textbox(label="Content ID (from Content Upload)") num_questions = gr.Slider(minimum=1, maximum=10, step=1, value=3, label="Number of Questions") generate_quiz_button = gr.Button("Generate Quiz") quiz_output = gr.Textbox(label="Quiz Questions", lines=10) answers = gr.Textbox(label="Enter Answers (one per line)") evaluate_quiz_button = gr.Button("Evaluate Quiz") quiz_result = gr.Textbox(label="Quiz Results") user_id_quiz = gr.Textbox(label="User ID") def generate_quiz_action(content_id, num_questions): conn = init_sqlite_db() c = conn.cursor() c.execute("SELECT text FROM content WHERE id = ?", (content_id,)) result = c.fetchone() conn.close() if result: questions = generate_quiz(result[0], num_questions) return json.dumps(questions), questions return "Content not found.", [] generate_quiz_button.click( fn=generate_quiz_action, inputs=[content_id, num_questions], outputs=[quiz_output, gr.State()] ) def evaluate_quiz_action(questions, answers, user_id): questions = json.loads(questions) if questions else [] answers = answers.split("\n") score, total = evaluate_quiz(questions, answers) store_quiz_results(user_id, {"score": score, "total": total}) return f"Score: {score}/{total}" evaluate_quiz_button.click( fn=evaluate_quiz_action, inputs=[quiz_output, answers, user_id_quiz], outputs=quiz_result ) with gr.Tab("Video Chat"): user_id_video = gr.Textbox(label="User ID") session_name = gr.Textbox(label="Session Name", value="Learning Session") content_id_video = gr.Textbox(label="Content ID (optional)") scheduled_time = gr.Textbox(label="Scheduled Time (YYYY-MM-DD HH:MM, optional)", placeholder="e.g., 2025-05-25 14:00") start_video_button = gr.Button("Start Video Session") video_output = gr.HTML(label="Video Chat") video_url = gr.Textbox(label="Video Chat URL") end_video_button = gr.Button("End Video Session") video_status = gr.Textbox(label="Video Session Status") def start_video_action(user_id, session_name, content_id, scheduled_time): if not user_id: return "", "", "Please provide a valid User ID." try: if scheduled_time: datetime.strptime(scheduled_time, "%Y-%m-%d %H:%M") except ValueError: return "", "", "Invalid scheduled time format. Use YYYY-MM-DD HH:MM." session_id, jitsi_url = start_video_session(user_id, session_name, content_id, scheduled_time) iframe = f'' return iframe, jitsi_url, f"Video session started: {session_id}" start_video_button.click( fn=start_video_action, inputs=[user_id_video, session_name, content_id_video, scheduled_time], outputs=[video_output, video_url, video_status] ) def end_video_action(session_id): end_video_session(session_id) return "Video session ended." end_video_button.click( fn=end_video_action, inputs=video_status, outputs=video_status ) with gr.Tab("Feedback"): user_id_feedback = gr.Textbox(label="User ID") feedback_text = gr.Textbox(label="Feedback") submit_feedback_button = gr.Button("Submit Feedback") feedback_output = gr.Textbox(label="Feedback Analysis") submit_feedback_button.click( fn=store_user_feedback, inputs=[user_id_feedback, feedback_text], outputs=feedback_output ) return interface if __name__ == "__main__": interface = create_gradio_interface() interface.launch(server_name="0.0.0.0", server_port=7860)