Spaces:
Sleeping
Sleeping
File size: 8,458 Bytes
097944f 146e204 097944f 146e204 097944f 146e204 24ed693 097944f 146e204 097944f 146e204 097944f 146e204 097944f 146e204 097944f 146e204 097944f 146e204 097944f 146e204 097944f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
import os
import gradio as gr
import wikipediaapi
from groq import Groq
import torch
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
from deep_translator import GoogleTranslator
import yake
from datetime import datetime
import shutil
import glob
# β
Set API Key
# os.environ["GROQ_API_KEY"] = "your_api_key_here"
# client = Groq(api_key=os.environ["GROQ_API_KEY"])
# β
Set API Key
os.environ["GROQ_API_KEY"] = "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" # Replace with your API key
client = Groq(api_key=os.environ["GROQ_API_KEY"])
# # β
Load M2M-100 Model
# model_name = "facebook/m2m100_418M"
# tokenizer = M2M100Tokenizer.from_pretrained(model_name)
# model = M2M100ForConditionalGeneration.from_pretrained(model_name)
def fetch_wikipedia_summary(topic):
wiki_wiki = wikipediaapi.Wikipedia(
user_agent="EducationalScriptApp/1.0",
language="en"
)
page = wiki_wiki.page(topic)
return page.summary if page.exists() else "No Wikipedia summary available."
def generate_script(topic, duration):
try:
factual_content = fetch_wikipedia_summary(topic)
words_per_minute = 130
target_words = duration * words_per_minute
response = client.chat.completions.create(
messages=[{"role": "user", "content": f"Format the following factual content into a well-structured educational script in English with approximately {target_words} words: \n{factual_content}"}],
model="llama-3.3-70b-versatile"
)
return response.choices[0].message.content
except Exception as e:
return f"β Error in script generation: {str(e)}"
# β
Function to Extract Keywords Using YAKE
def extract_keywords(script):
try:
kw_extractor = yake.KeywordExtractor(
lan="en", # Language
n=3, # Max number of words in a keyword phrase (trigrams)
dedupLim=0.9, # Reduce redundant phrases
# top=10 # Extract top 10 keywords
)
keywords = kw_extractor.extract_keywords(script)
return ", ".join([kw[0] for kw in keywords]) # β
Extract only the keyword text
except Exception as e:
return f"β Error extracting keywords: {str(e)}"
def save_keywords_file(keywords, topic):
today = datetime.today().strftime('%Y_%b_%d')
filename = f"Keywords/{topic}_Keyword_{today}.txt"
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, "w", encoding="utf-8") as f:
f.write(keywords)
return filename
# # β
Function to Translate English Script to Urdu
# def translate_to_urdu(english_script):
# try:
# google_translation = GoogleTranslator(source='en', target='ur').translate(english_script)
# tokenizer.src_lang = "en"
# max_length = 500
# input_chunks = [google_translation[i:i+max_length] for i in range(0, len(google_translation), max_length)]
# refined_chunks = []
# for chunk in input_chunks:
# inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
# translated_tokens = model.generate(
# **inputs,
# max_length=1024,
# no_repeat_ngram_size=2,
# forced_bos_token_id=tokenizer.get_lang_id("ur"),
# num_beams=2
# )
# refined_chunks.append(tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0])
# return " ".join(refined_chunks)
# except Exception as e:
# return f"β Error in translation: {str(e)}"
def translate_to_urdu(english_script):
try:
# β
Define a max chunk size (Google Translator has a limit)
max_chunk_size = 4500 # Stay below 5000 to be safe
chunks = [english_script[i:i + max_chunk_size] for i in range(0, len(english_script), max_chunk_size)]
translated_chunks = []
for chunk in chunks:
translated_chunk = GoogleTranslator(source='en', target='ur').translate(chunk)
translated_chunks.append(translated_chunk)
return " ".join(translated_chunks) # β
Join all translated chunks
except Exception as e:
return f"β Error in translation: {str(e)}"
def save_english_file(content, topic):
today = datetime.today().strftime('%Y_%b_%d') # Format: 2025_Feb_21
filename = f"English_Scripts/{topic}_Eng_{today}.txt"
os.makedirs(os.path.dirname(filename), exist_ok=True) # Ensure directory exists
with open(filename, "w", encoding="utf-8") as f:
f.write(content)
return filename
def save_urdu_file(content, topic):
today = datetime.today().strftime('%Y_%b_%d')
filename = f"Urdu_Scripts/{topic}_Urdu_{today}.txt"
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, "w", encoding="utf-8") as f:
f.write(content)
return filename
def save_final_urdu_file(topic, content):
date_str = datetime.now().strftime("%Y_%b_%d")
filename = f"Urdu_Final/{topic}_Urdu_Final_{date_str}.txt" # β
Corrected file path
os.makedirs(os.path.dirname(filename), exist_ok=True) # β
Ensure the directory exists
with open(filename, "w", encoding="utf-8") as f:
f.write(content)
return filename
def finalize_process():
return "β
Script Generation Completed Successfully!"
def clear_old_files():
# β
Define all directories where files are stored
directories = ["English_Scripts", "Urdu_Scripts", "Urdu_Final", "Keywords"]
for directory in directories:
if os.path.exists(directory): # β
Check if directory exists
files = glob.glob(f"{directory}/*") # β
Get all files in the directory
for file in files:
try:
os.remove(file) # β
Delete each file
except Exception as e:
print(f"β Error deleting {file}: {e}")
return "", "", "", "", "" # β
Clear all textboxes in UI
# β
Gradio UI
with gr.Blocks() as app:
gr.Markdown("# π¬ AI-Powered Educational Script Generator")
topic_input = gr.Textbox(label="Enter Topic")
duration_input = gr.Slider(minimum=1, maximum=30, step=1, label="Duration (minutes)")
generate_button = gr.Button("Generate English Script")
eng_output = gr.Textbox(label="Generated English Script", interactive=False)
download_english_button = gr.Button("Download English Script")
download_english_button.click(save_english_file, inputs=[eng_output, topic_input], outputs=[gr.File()])
# β
Keyword Extraction Section
extract_keywords_btn = gr.Button("π Extract Keywords")
keyword_output = gr.Textbox(label="π Extracted Keywords", interactive=True)
download_keywords_btn = gr.Button("β¬οΈ Download Keywords")
download_keywords_btn.click(save_keywords_file, inputs=[keyword_output, topic_input], outputs=[gr.File()])
translate_button = gr.Button("Generate Urdu Script")
urdu_output = gr.Textbox(label="Translated Urdu Script", interactive=False, rtl=True)
download_urdu_button = gr.Button("Download Urdu Script")
download_urdu_button.click(save_urdu_file, inputs=[urdu_output, topic_input], outputs=[gr.File()])
final_edited_urdu_output = gr.Textbox(label="Edited Urdu Script", interactive=True, rtl=True)
download_final_urdu_button = gr.Button("Download Final Urdu Script")
download_final_urdu_button.click(save_final_urdu_file, inputs=[topic_input, final_edited_urdu_output], outputs=[gr.File()])
# β
Button Actions
# generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
extract_keywords_btn.click(extract_keywords, inputs=[eng_output], outputs=[keyword_output])
translate_button.click(translate_to_urdu, inputs=[eng_output], outputs=[urdu_output])
status_output = gr.Textbox(label="Status", interactive=False)
finalize_button = gr.Button("Finalize Process")
finalize_button.click(finalize_process, outputs=[status_output])
generate_button.click(
lambda topic, duration: (*clear_old_files(), generate_script(topic, duration)),
inputs=[topic_input, duration_input],
outputs=[keyword_output, urdu_output, final_edited_urdu_output, status_output] )
app.launch()
|