SCRIPTGENERATOR / app.py
Backened's picture
Update app.py
146e204 verified
import os
import gradio as gr
import wikipediaapi
from groq import Groq
import torch
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
from deep_translator import GoogleTranslator
import yake
from datetime import datetime
import shutil
import glob
# βœ… Set API Key
# os.environ["GROQ_API_KEY"] = "your_api_key_here"
# client = Groq(api_key=os.environ["GROQ_API_KEY"])
# βœ… Set API Key
os.environ["GROQ_API_KEY"] = "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" # Replace with your API key
client = Groq(api_key=os.environ["GROQ_API_KEY"])
# # βœ… Load M2M-100 Model
# model_name = "facebook/m2m100_418M"
# tokenizer = M2M100Tokenizer.from_pretrained(model_name)
# model = M2M100ForConditionalGeneration.from_pretrained(model_name)
def fetch_wikipedia_summary(topic):
wiki_wiki = wikipediaapi.Wikipedia(
user_agent="EducationalScriptApp/1.0",
language="en"
)
page = wiki_wiki.page(topic)
return page.summary if page.exists() else "No Wikipedia summary available."
def generate_script(topic, duration):
try:
factual_content = fetch_wikipedia_summary(topic)
words_per_minute = 130
target_words = duration * words_per_minute
response = client.chat.completions.create(
messages=[{"role": "user", "content": f"Format the following factual content into a well-structured educational script in English with approximately {target_words} words: \n{factual_content}"}],
model="llama-3.3-70b-versatile"
)
return response.choices[0].message.content
except Exception as e:
return f"❌ Error in script generation: {str(e)}"
# βœ… Function to Extract Keywords Using YAKE
def extract_keywords(script):
try:
kw_extractor = yake.KeywordExtractor(
lan="en", # Language
n=3, # Max number of words in a keyword phrase (trigrams)
dedupLim=0.9, # Reduce redundant phrases
# top=10 # Extract top 10 keywords
)
keywords = kw_extractor.extract_keywords(script)
return ", ".join([kw[0] for kw in keywords]) # βœ… Extract only the keyword text
except Exception as e:
return f"❌ Error extracting keywords: {str(e)}"
def save_keywords_file(keywords, topic):
today = datetime.today().strftime('%Y_%b_%d')
filename = f"Keywords/{topic}_Keyword_{today}.txt"
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, "w", encoding="utf-8") as f:
f.write(keywords)
return filename
# # βœ… Function to Translate English Script to Urdu
# def translate_to_urdu(english_script):
# try:
# google_translation = GoogleTranslator(source='en', target='ur').translate(english_script)
# tokenizer.src_lang = "en"
# max_length = 500
# input_chunks = [google_translation[i:i+max_length] for i in range(0, len(google_translation), max_length)]
# refined_chunks = []
# for chunk in input_chunks:
# inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
# translated_tokens = model.generate(
# **inputs,
# max_length=1024,
# no_repeat_ngram_size=2,
# forced_bos_token_id=tokenizer.get_lang_id("ur"),
# num_beams=2
# )
# refined_chunks.append(tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0])
# return " ".join(refined_chunks)
# except Exception as e:
# return f"❌ Error in translation: {str(e)}"
def translate_to_urdu(english_script):
try:
# βœ… Define a max chunk size (Google Translator has a limit)
max_chunk_size = 4500 # Stay below 5000 to be safe
chunks = [english_script[i:i + max_chunk_size] for i in range(0, len(english_script), max_chunk_size)]
translated_chunks = []
for chunk in chunks:
translated_chunk = GoogleTranslator(source='en', target='ur').translate(chunk)
translated_chunks.append(translated_chunk)
return " ".join(translated_chunks) # βœ… Join all translated chunks
except Exception as e:
return f"❌ Error in translation: {str(e)}"
def save_english_file(content, topic):
today = datetime.today().strftime('%Y_%b_%d') # Format: 2025_Feb_21
filename = f"English_Scripts/{topic}_Eng_{today}.txt"
os.makedirs(os.path.dirname(filename), exist_ok=True) # Ensure directory exists
with open(filename, "w", encoding="utf-8") as f:
f.write(content)
return filename
def save_urdu_file(content, topic):
today = datetime.today().strftime('%Y_%b_%d')
filename = f"Urdu_Scripts/{topic}_Urdu_{today}.txt"
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, "w", encoding="utf-8") as f:
f.write(content)
return filename
def save_final_urdu_file(topic, content):
date_str = datetime.now().strftime("%Y_%b_%d")
filename = f"Urdu_Final/{topic}_Urdu_Final_{date_str}.txt" # βœ… Corrected file path
os.makedirs(os.path.dirname(filename), exist_ok=True) # βœ… Ensure the directory exists
with open(filename, "w", encoding="utf-8") as f:
f.write(content)
return filename
def finalize_process():
return "βœ… Script Generation Completed Successfully!"
def clear_old_files():
# βœ… Define all directories where files are stored
directories = ["English_Scripts", "Urdu_Scripts", "Urdu_Final", "Keywords"]
for directory in directories:
if os.path.exists(directory): # βœ… Check if directory exists
files = glob.glob(f"{directory}/*") # βœ… Get all files in the directory
for file in files:
try:
os.remove(file) # βœ… Delete each file
except Exception as e:
print(f"❌ Error deleting {file}: {e}")
return "", "", "", "", "" # βœ… Clear all textboxes in UI
# βœ… Gradio UI
with gr.Blocks() as app:
gr.Markdown("# 🎬 AI-Powered Educational Script Generator")
topic_input = gr.Textbox(label="Enter Topic")
duration_input = gr.Slider(minimum=1, maximum=30, step=1, label="Duration (minutes)")
generate_button = gr.Button("Generate English Script")
eng_output = gr.Textbox(label="Generated English Script", interactive=False)
download_english_button = gr.Button("Download English Script")
download_english_button.click(save_english_file, inputs=[eng_output, topic_input], outputs=[gr.File()])
# βœ… Keyword Extraction Section
extract_keywords_btn = gr.Button("πŸ”‘ Extract Keywords")
keyword_output = gr.Textbox(label="πŸ” Extracted Keywords", interactive=True)
download_keywords_btn = gr.Button("⬇️ Download Keywords")
download_keywords_btn.click(save_keywords_file, inputs=[keyword_output, topic_input], outputs=[gr.File()])
translate_button = gr.Button("Generate Urdu Script")
urdu_output = gr.Textbox(label="Translated Urdu Script", interactive=False, rtl=True)
download_urdu_button = gr.Button("Download Urdu Script")
download_urdu_button.click(save_urdu_file, inputs=[urdu_output, topic_input], outputs=[gr.File()])
final_edited_urdu_output = gr.Textbox(label="Edited Urdu Script", interactive=True, rtl=True)
download_final_urdu_button = gr.Button("Download Final Urdu Script")
download_final_urdu_button.click(save_final_urdu_file, inputs=[topic_input, final_edited_urdu_output], outputs=[gr.File()])
# βœ… Button Actions
# generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
extract_keywords_btn.click(extract_keywords, inputs=[eng_output], outputs=[keyword_output])
translate_button.click(translate_to_urdu, inputs=[eng_output], outputs=[urdu_output])
status_output = gr.Textbox(label="Status", interactive=False)
finalize_button = gr.Button("Finalize Process")
finalize_button.click(finalize_process, outputs=[status_output])
generate_button.click(
lambda topic, duration: (*clear_old_files(), generate_script(topic, duration)),
inputs=[topic_input, duration_input],
outputs=[keyword_output, urdu_output, final_edited_urdu_output, status_output] )
app.launch()