Backened commited on
Commit
19a55e1
Β·
verified Β·
1 Parent(s): 001c597

Create script_generator.py

Browse files
Files changed (1) hide show
  1. script_generator.py +212 -0
script_generator.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import wikipediaapi
4
+ from groq import Groq
5
+ import torch
6
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
7
+ from deep_translator import GoogleTranslator
8
+ import yake
9
+ from datetime import datetime
10
+ import shutil
11
+ import glob
12
+
13
+ # βœ… Set API Key
14
+ # os.environ["GROQ_API_KEY"] = "your_api_key_here"
15
+ # client = Groq(api_key=os.environ["GROQ_API_KEY"])
16
+
17
+ # βœ… Set API Key
18
+ os.environ["GROQ_API_KEY"] = "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" # Replace with your API key
19
+ client = Groq(api_key=os.environ["GROQ_API_KEY"])
20
+
21
+ # # βœ… Load M2M-100 Model
22
+ # model_name = "facebook/m2m100_418M"
23
+ # tokenizer = M2M100Tokenizer.from_pretrained(model_name)
24
+ # model = M2M100ForConditionalGeneration.from_pretrained(model_name)
25
+
26
+ def fetch_wikipedia_summary(topic):
27
+ wiki_wiki = wikipediaapi.Wikipedia(
28
+ user_agent="EducationalScriptApp/1.0",
29
+ language="en"
30
+ )
31
+ page = wiki_wiki.page(topic)
32
+ return page.summary if page.exists() else "No Wikipedia summary available."
33
+
34
+ def generate_script(topic, duration):
35
+ try:
36
+ factual_content = fetch_wikipedia_summary(topic)
37
+ words_per_minute = 130
38
+ target_words = duration * words_per_minute
39
+
40
+ response = client.chat.completions.create(
41
+ messages=[{"role": "user", "content": f"Format the following factual content into a well-structured educational script in English with approximately {target_words} words: \n{factual_content}"}],
42
+ model="llama-3.3-70b-versatile"
43
+ )
44
+ return response.choices[0].message.content
45
+ except Exception as e:
46
+ return f"❌ Error in script generation: {str(e)}"
47
+
48
+
49
+ # βœ… Function to Extract Keywords Using YAKE
50
+ def extract_keywords(script):
51
+ try:
52
+ kw_extractor = yake.KeywordExtractor(
53
+ lan="en", # Language
54
+ n=3, # Max number of words in a keyword phrase (trigrams)
55
+ dedupLim=0.9, # Reduce redundant phrases
56
+ # top=10 # Extract top 10 keywords
57
+ )
58
+
59
+ keywords = kw_extractor.extract_keywords(script)
60
+ return ", ".join([kw[0] for kw in keywords]) # βœ… Extract only the keyword text
61
+ except Exception as e:
62
+ return f"❌ Error extracting keywords: {str(e)}"
63
+
64
+
65
+ def save_keywords_file(keywords, topic):
66
+ today = datetime.today().strftime('%Y_%b_%d')
67
+ filename = f"Keywords/{topic}_Keyword_{today}.txt"
68
+ os.makedirs(os.path.dirname(filename), exist_ok=True)
69
+ with open(filename, "w", encoding="utf-8") as f:
70
+ f.write(keywords)
71
+ return filename
72
+
73
+
74
+ # # βœ… Function to Translate English Script to Urdu
75
+ # def translate_to_urdu(english_script):
76
+ # try:
77
+ # google_translation = GoogleTranslator(source='en', target='ur').translate(english_script)
78
+ # tokenizer.src_lang = "en"
79
+ # max_length = 500
80
+ # input_chunks = [google_translation[i:i+max_length] for i in range(0, len(google_translation), max_length)]
81
+ # refined_chunks = []
82
+ # for chunk in input_chunks:
83
+ # inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
84
+ # translated_tokens = model.generate(
85
+ # **inputs,
86
+ # max_length=1024,
87
+ # no_repeat_ngram_size=2,
88
+ # forced_bos_token_id=tokenizer.get_lang_id("ur"),
89
+ # num_beams=2
90
+ # )
91
+ # refined_chunks.append(tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0])
92
+ # return " ".join(refined_chunks)
93
+ # except Exception as e:
94
+ # return f"❌ Error in translation: {str(e)}"
95
+
96
+
97
+ def translate_to_urdu(english_script):
98
+ try:
99
+ # βœ… Define a max chunk size (Google Translator has a limit)
100
+ max_chunk_size = 4500 # Stay below 5000 to be safe
101
+ chunks = [english_script[i:i + max_chunk_size] for i in range(0, len(english_script), max_chunk_size)]
102
+
103
+ translated_chunks = []
104
+ for chunk in chunks:
105
+ translated_chunk = GoogleTranslator(source='en', target='ur').translate(chunk)
106
+ translated_chunks.append(translated_chunk)
107
+
108
+ return " ".join(translated_chunks) # βœ… Join all translated chunks
109
+ except Exception as e:
110
+ return f"❌ Error in translation: {str(e)}"
111
+
112
+
113
+
114
+ def save_english_file(content, topic):
115
+ today = datetime.today().strftime('%Y_%b_%d') # Format: 2025_Feb_21
116
+ filename = f"English_Scripts/{topic}_Eng_{today}.txt"
117
+ os.makedirs(os.path.dirname(filename), exist_ok=True) # Ensure directory exists
118
+ with open(filename, "w", encoding="utf-8") as f:
119
+ f.write(content)
120
+ return filename
121
+
122
+
123
+ def save_urdu_file(content, topic):
124
+ today = datetime.today().strftime('%Y_%b_%d')
125
+ filename = f"Urdu_Scripts/{topic}_Urdu_{today}.txt"
126
+ os.makedirs(os.path.dirname(filename), exist_ok=True)
127
+ with open(filename, "w", encoding="utf-8") as f:
128
+ f.write(content)
129
+ return filename
130
+
131
+
132
+ def save_final_urdu_file(topic, content):
133
+ date_str = datetime.now().strftime("%Y_%b_%d")
134
+ filename = f"Urdu_Final/{topic}_Urdu_Final_{date_str}.txt" # βœ… Corrected file path
135
+ os.makedirs(os.path.dirname(filename), exist_ok=True) # βœ… Ensure the directory exists
136
+ with open(filename, "w", encoding="utf-8") as f:
137
+ f.write(content)
138
+ return filename
139
+
140
+
141
+ def finalize_process():
142
+ return "βœ… Script Generation Completed Successfully!"
143
+
144
+
145
+ def clear_old_files():
146
+ # βœ… Define all directories where files are stored
147
+ directories = ["English_Scripts", "Urdu_Scripts", "Urdu_Final", "Keywords"]
148
+
149
+ for directory in directories:
150
+ if os.path.exists(directory): # βœ… Check if directory exists
151
+ files = glob.glob(f"{directory}/*") # βœ… Get all files in the directory
152
+ for file in files:
153
+ try:
154
+ os.remove(file) # βœ… Delete each file
155
+ except Exception as e:
156
+ print(f"❌ Error deleting {file}: {e}")
157
+
158
+ return "", "", "", "", "" # βœ… Clear all textboxes in UI
159
+
160
+
161
+
162
+
163
+ # βœ… Gradio UI
164
+ with gr.Blocks() as app:
165
+ gr.Markdown("# 🎬 AI-Powered Educational Script Generator")
166
+
167
+ topic_input = gr.Textbox(label="Enter Topic")
168
+ duration_input = gr.Slider(minimum=1, maximum=30, step=1, label="Duration (minutes)")
169
+
170
+
171
+ generate_button = gr.Button("Generate English Script")
172
+ eng_output = gr.Textbox(label="Generated English Script", interactive=False)
173
+ download_english_button = gr.Button("Download English Script")
174
+ download_english_button.click(save_english_file, inputs=[eng_output, topic_input], outputs=[gr.File()])
175
+
176
+
177
+ # βœ… Keyword Extraction Section
178
+ extract_keywords_btn = gr.Button("πŸ”‘ Extract Keywords")
179
+ keyword_output = gr.Textbox(label="πŸ” Extracted Keywords", interactive=True)
180
+ download_keywords_btn = gr.Button("⬇️ Download Keywords")
181
+ download_keywords_btn.click(save_keywords_file, inputs=[keyword_output, topic_input], outputs=[gr.File()])
182
+
183
+ translate_button = gr.Button("Generate Urdu Script")
184
+ urdu_output = gr.Textbox(label="Translated Urdu Script", interactive=False, rtl=True)
185
+ download_urdu_button = gr.Button("Download Urdu Script")
186
+ download_urdu_button.click(save_urdu_file, inputs=[urdu_output, topic_input], outputs=[gr.File()])
187
+
188
+
189
+ final_edited_urdu_output = gr.Textbox(label="Edited Urdu Script", interactive=True, rtl=True)
190
+ download_final_urdu_button = gr.Button("Download Final Urdu Script")
191
+ download_final_urdu_button.click(save_final_urdu_file, inputs=[topic_input, final_edited_urdu_output], outputs=[gr.File()])
192
+
193
+
194
+ # βœ… Button Actions
195
+ # generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
196
+ generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
197
+ extract_keywords_btn.click(extract_keywords, inputs=[eng_output], outputs=[keyword_output])
198
+ translate_button.click(translate_to_urdu, inputs=[eng_output], outputs=[urdu_output])
199
+
200
+ status_output = gr.Textbox(label="Status", interactive=False)
201
+ finalize_button = gr.Button("Finalize Process")
202
+ finalize_button.click(finalize_process, outputs=[status_output])
203
+
204
+ generate_button.click(
205
+ lambda topic, duration: (*clear_old_files(), generate_script(topic, duration)),
206
+ inputs=[topic_input, duration_input],
207
+ outputs=[keyword_output, urdu_output, final_edited_urdu_output, status_output] )
208
+
209
+
210
+
211
+
212
+ app.launch()