import os import gradio as gr import wikipediaapi from groq import Groq import torch from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer from deep_translator import GoogleTranslator import yake from datetime import datetime import shutil import glob # ✅ Set API Key # os.environ["GROQ_API_KEY"] = "your_api_key_here" # client = Groq(api_key=os.environ["GROQ_API_KEY"]) # ✅ Set API Key os.environ["GROQ_API_KEY"] = "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" # Replace with your API key client = Groq(api_key=os.environ["GROQ_API_KEY"]) # # ✅ Load M2M-100 Model # model_name = "facebook/m2m100_418M" # tokenizer = M2M100Tokenizer.from_pretrained(model_name) # model = M2M100ForConditionalGeneration.from_pretrained(model_name) def fetch_wikipedia_summary(topic): wiki_wiki = wikipediaapi.Wikipedia( user_agent="EducationalScriptApp/1.0", language="en" ) page = wiki_wiki.page(topic) return page.summary if page.exists() else "No Wikipedia summary available." def generate_script(topic, duration): try: factual_content = fetch_wikipedia_summary(topic) words_per_minute = 130 target_words = duration * words_per_minute response = client.chat.completions.create( messages=[{"role": "user", "content": f"Format the following factual content into a well-structured educational script in English with approximately {target_words} words: \n{factual_content}"}], model="llama-3.3-70b-versatile" ) return response.choices[0].message.content except Exception as e: return f"❌ Error in script generation: {str(e)}" # ✅ Function to Extract Keywords Using YAKE def extract_keywords(script): try: kw_extractor = yake.KeywordExtractor( lan="en", # Language n=3, # Max number of words in a keyword phrase (trigrams) dedupLim=0.9, # Reduce redundant phrases # top=10 # Extract top 10 keywords ) keywords = kw_extractor.extract_keywords(script) return ", ".join([kw[0] for kw in keywords]) # ✅ Extract only the keyword text except Exception as e: return f"❌ Error extracting keywords: {str(e)}" def save_keywords_file(keywords, topic): today = datetime.today().strftime('%Y_%b_%d') filename = f"Keywords/{topic}_Keyword_{today}.txt" os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename, "w", encoding="utf-8") as f: f.write(keywords) return filename # # ✅ Function to Translate English Script to Urdu # def translate_to_urdu(english_script): # try: # google_translation = GoogleTranslator(source='en', target='ur').translate(english_script) # tokenizer.src_lang = "en" # max_length = 500 # input_chunks = [google_translation[i:i+max_length] for i in range(0, len(google_translation), max_length)] # refined_chunks = [] # for chunk in input_chunks: # inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024).to(model.device) # translated_tokens = model.generate( # **inputs, # max_length=1024, # no_repeat_ngram_size=2, # forced_bos_token_id=tokenizer.get_lang_id("ur"), # num_beams=2 # ) # refined_chunks.append(tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]) # return " ".join(refined_chunks) # except Exception as e: # return f"❌ Error in translation: {str(e)}" def translate_to_urdu(english_script): try: # ✅ Define a max chunk size (Google Translator has a limit) max_chunk_size = 4500 # Stay below 5000 to be safe chunks = [english_script[i:i + max_chunk_size] for i in range(0, len(english_script), max_chunk_size)] translated_chunks = [] for chunk in chunks: translated_chunk = GoogleTranslator(source='en', target='ur').translate(chunk) translated_chunks.append(translated_chunk) return " ".join(translated_chunks) # ✅ Join all translated chunks except Exception as e: return f"❌ Error in translation: {str(e)}" def save_english_file(content, topic): today = datetime.today().strftime('%Y_%b_%d') # Format: 2025_Feb_21 filename = f"English_Scripts/{topic}_Eng_{today}.txt" os.makedirs(os.path.dirname(filename), exist_ok=True) # Ensure directory exists with open(filename, "w", encoding="utf-8") as f: f.write(content) return filename def save_urdu_file(content, topic): today = datetime.today().strftime('%Y_%b_%d') filename = f"Urdu_Scripts/{topic}_Urdu_{today}.txt" os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename, "w", encoding="utf-8") as f: f.write(content) return filename def save_final_urdu_file(topic, content): date_str = datetime.now().strftime("%Y_%b_%d") filename = f"Urdu_Final/{topic}_Urdu_Final_{date_str}.txt" # ✅ Corrected file path os.makedirs(os.path.dirname(filename), exist_ok=True) # ✅ Ensure the directory exists with open(filename, "w", encoding="utf-8") as f: f.write(content) return filename def finalize_process(): return "✅ Script Generation Completed Successfully!" def clear_old_files(): # ✅ Define all directories where files are stored directories = ["English_Scripts", "Urdu_Scripts", "Urdu_Final", "Keywords"] for directory in directories: if os.path.exists(directory): # ✅ Check if directory exists files = glob.glob(f"{directory}/*") # ✅ Get all files in the directory for file in files: try: os.remove(file) # ✅ Delete each file except Exception as e: print(f"❌ Error deleting {file}: {e}") return "", "", "", "", "" # ✅ Clear all textboxes in UI # ✅ Gradio UI with gr.Blocks() as app: gr.Markdown("# 🎬 AI-Powered Educational Script Generator") topic_input = gr.Textbox(label="Enter Topic") duration_input = gr.Slider(minimum=1, maximum=30, step=1, label="Duration (minutes)") generate_button = gr.Button("Generate English Script") eng_output = gr.Textbox(label="Generated English Script", interactive=False) download_english_button = gr.Button("Download English Script") download_english_button.click(save_english_file, inputs=[eng_output, topic_input], outputs=[gr.File()]) # ✅ Keyword Extraction Section extract_keywords_btn = gr.Button("🔑 Extract Keywords") keyword_output = gr.Textbox(label="🔍 Extracted Keywords", interactive=True) download_keywords_btn = gr.Button("⬇️ Download Keywords") download_keywords_btn.click(save_keywords_file, inputs=[keyword_output, topic_input], outputs=[gr.File()]) translate_button = gr.Button("Generate Urdu Script") urdu_output = gr.Textbox(label="Translated Urdu Script", interactive=False, rtl=True) download_urdu_button = gr.Button("Download Urdu Script") download_urdu_button.click(save_urdu_file, inputs=[urdu_output, topic_input], outputs=[gr.File()]) final_edited_urdu_output = gr.Textbox(label="Edited Urdu Script", interactive=True, rtl=True) download_final_urdu_button = gr.Button("Download Final Urdu Script") download_final_urdu_button.click(save_final_urdu_file, inputs=[topic_input, final_edited_urdu_output], outputs=[gr.File()]) # ✅ Button Actions # generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output]) generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output]) extract_keywords_btn.click(extract_keywords, inputs=[eng_output], outputs=[keyword_output]) translate_button.click(translate_to_urdu, inputs=[eng_output], outputs=[urdu_output]) status_output = gr.Textbox(label="Status", interactive=False) finalize_button = gr.Button("Finalize Process") finalize_button.click(finalize_process, outputs=[status_output]) generate_button.click( lambda topic, duration: (*clear_old_files(), generate_script(topic, duration)), inputs=[topic_input, duration_input], outputs=[keyword_output, urdu_output, final_edited_urdu_output, status_output] ) app.launch()