Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import wikipediaapi | |
| from groq import Groq | |
| import torch | |
| from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer | |
| from deep_translator import GoogleTranslator | |
| import yake | |
| from datetime import datetime | |
| import shutil | |
| import glob | |
| # β Set API Key | |
| # os.environ["GROQ_API_KEY"] = "your_api_key_here" | |
| # client = Groq(api_key=os.environ["GROQ_API_KEY"]) | |
| # β Set API Key | |
| os.environ["GROQ_API_KEY"] = "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" # Replace with your API key | |
| client = Groq(api_key=os.environ["GROQ_API_KEY"]) | |
| # # β Load M2M-100 Model | |
| # model_name = "facebook/m2m100_418M" | |
| # tokenizer = M2M100Tokenizer.from_pretrained(model_name) | |
| # model = M2M100ForConditionalGeneration.from_pretrained(model_name) | |
| def fetch_wikipedia_summary(topic): | |
| wiki_wiki = wikipediaapi.Wikipedia( | |
| user_agent="EducationalScriptApp/1.0", | |
| language="en" | |
| ) | |
| page = wiki_wiki.page(topic) | |
| return page.summary if page.exists() else "No Wikipedia summary available." | |
| def generate_script(topic, duration): | |
| try: | |
| factual_content = fetch_wikipedia_summary(topic) | |
| words_per_minute = 130 | |
| target_words = duration * words_per_minute | |
| response = client.chat.completions.create( | |
| messages=[{"role": "user", "content": f"Format the following factual content into a well-structured educational script in English with approximately {target_words} words: \n{factual_content}"}], | |
| model="llama-3.3-70b-versatile" | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"β Error in script generation: {str(e)}" | |
| # β Function to Extract Keywords Using YAKE | |
| def extract_keywords(script): | |
| try: | |
| kw_extractor = yake.KeywordExtractor( | |
| lan="en", # Language | |
| n=3, # Max number of words in a keyword phrase (trigrams) | |
| dedupLim=0.9, # Reduce redundant phrases | |
| # top=10 # Extract top 10 keywords | |
| ) | |
| keywords = kw_extractor.extract_keywords(script) | |
| return ", ".join([kw[0] for kw in keywords]) # β Extract only the keyword text | |
| except Exception as e: | |
| return f"β Error extracting keywords: {str(e)}" | |
| def save_keywords_file(keywords, topic): | |
| today = datetime.today().strftime('%Y_%b_%d') | |
| filename = f"Keywords/{topic}_Keyword_{today}.txt" | |
| os.makedirs(os.path.dirname(filename), exist_ok=True) | |
| with open(filename, "w", encoding="utf-8") as f: | |
| f.write(keywords) | |
| return filename | |
| # # β Function to Translate English Script to Urdu | |
| # def translate_to_urdu(english_script): | |
| # try: | |
| # google_translation = GoogleTranslator(source='en', target='ur').translate(english_script) | |
| # tokenizer.src_lang = "en" | |
| # max_length = 500 | |
| # input_chunks = [google_translation[i:i+max_length] for i in range(0, len(google_translation), max_length)] | |
| # refined_chunks = [] | |
| # for chunk in input_chunks: | |
| # inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024).to(model.device) | |
| # translated_tokens = model.generate( | |
| # **inputs, | |
| # max_length=1024, | |
| # no_repeat_ngram_size=2, | |
| # forced_bos_token_id=tokenizer.get_lang_id("ur"), | |
| # num_beams=2 | |
| # ) | |
| # refined_chunks.append(tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]) | |
| # return " ".join(refined_chunks) | |
| # except Exception as e: | |
| # return f"β Error in translation: {str(e)}" | |
| def translate_to_urdu(english_script): | |
| try: | |
| # β Define a max chunk size (Google Translator has a limit) | |
| max_chunk_size = 4500 # Stay below 5000 to be safe | |
| chunks = [english_script[i:i + max_chunk_size] for i in range(0, len(english_script), max_chunk_size)] | |
| translated_chunks = [] | |
| for chunk in chunks: | |
| translated_chunk = GoogleTranslator(source='en', target='ur').translate(chunk) | |
| translated_chunks.append(translated_chunk) | |
| return " ".join(translated_chunks) # β Join all translated chunks | |
| except Exception as e: | |
| return f"β Error in translation: {str(e)}" | |
| def save_english_file(content, topic): | |
| today = datetime.today().strftime('%Y_%b_%d') # Format: 2025_Feb_21 | |
| filename = f"English_Scripts/{topic}_Eng_{today}.txt" | |
| os.makedirs(os.path.dirname(filename), exist_ok=True) # Ensure directory exists | |
| with open(filename, "w", encoding="utf-8") as f: | |
| f.write(content) | |
| return filename | |
| def save_urdu_file(content, topic): | |
| today = datetime.today().strftime('%Y_%b_%d') | |
| filename = f"Urdu_Scripts/{topic}_Urdu_{today}.txt" | |
| os.makedirs(os.path.dirname(filename), exist_ok=True) | |
| with open(filename, "w", encoding="utf-8") as f: | |
| f.write(content) | |
| return filename | |
| def save_final_urdu_file(topic, content): | |
| date_str = datetime.now().strftime("%Y_%b_%d") | |
| filename = f"Urdu_Final/{topic}_Urdu_Final_{date_str}.txt" # β Corrected file path | |
| os.makedirs(os.path.dirname(filename), exist_ok=True) # β Ensure the directory exists | |
| with open(filename, "w", encoding="utf-8") as f: | |
| f.write(content) | |
| return filename | |
| def finalize_process(): | |
| return "β Script Generation Completed Successfully!" | |
| def clear_old_files(): | |
| # β Define all directories where files are stored | |
| directories = ["English_Scripts", "Urdu_Scripts", "Urdu_Final", "Keywords"] | |
| for directory in directories: | |
| if os.path.exists(directory): # β Check if directory exists | |
| files = glob.glob(f"{directory}/*") # β Get all files in the directory | |
| for file in files: | |
| try: | |
| os.remove(file) # β Delete each file | |
| except Exception as e: | |
| print(f"β Error deleting {file}: {e}") | |
| return "", "", "", "", "" # β Clear all textboxes in UI | |
| # β Gradio UI | |
| with gr.Blocks() as app: | |
| gr.Markdown("# π¬ AI-Powered Educational Script Generator") | |
| topic_input = gr.Textbox(label="Enter Topic") | |
| duration_input = gr.Slider(minimum=1, maximum=30, step=1, label="Duration (minutes)") | |
| generate_button = gr.Button("Generate English Script") | |
| eng_output = gr.Textbox(label="Generated English Script", interactive=False) | |
| download_english_button = gr.Button("Download English Script") | |
| download_english_button.click(save_english_file, inputs=[eng_output, topic_input], outputs=[gr.File()]) | |
| # β Keyword Extraction Section | |
| extract_keywords_btn = gr.Button("π Extract Keywords") | |
| keyword_output = gr.Textbox(label="π Extracted Keywords", interactive=True) | |
| download_keywords_btn = gr.Button("β¬οΈ Download Keywords") | |
| download_keywords_btn.click(save_keywords_file, inputs=[keyword_output, topic_input], outputs=[gr.File()]) | |
| translate_button = gr.Button("Generate Urdu Script") | |
| urdu_output = gr.Textbox(label="Translated Urdu Script", interactive=False, rtl=True) | |
| download_urdu_button = gr.Button("Download Urdu Script") | |
| download_urdu_button.click(save_urdu_file, inputs=[urdu_output, topic_input], outputs=[gr.File()]) | |
| final_edited_urdu_output = gr.Textbox(label="Edited Urdu Script", interactive=True, rtl=True) | |
| download_final_urdu_button = gr.Button("Download Final Urdu Script") | |
| download_final_urdu_button.click(save_final_urdu_file, inputs=[topic_input, final_edited_urdu_output], outputs=[gr.File()]) | |
| # β Button Actions | |
| # generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output]) | |
| generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output]) | |
| extract_keywords_btn.click(extract_keywords, inputs=[eng_output], outputs=[keyword_output]) | |
| translate_button.click(translate_to_urdu, inputs=[eng_output], outputs=[urdu_output]) | |
| status_output = gr.Textbox(label="Status", interactive=False) | |
| finalize_button = gr.Button("Finalize Process") | |
| finalize_button.click(finalize_process, outputs=[status_output]) | |
| generate_button.click( | |
| lambda topic, duration: (*clear_old_files(), generate_script(topic, duration)), | |
| inputs=[topic_input, duration_input], | |
| outputs=[keyword_output, urdu_output, final_edited_urdu_output, status_output] ) | |
| app.launch() | |