Backened commited on
Commit
e10c4b7
·
verified ·
1 Parent(s): 7d2eadf

Delete script_generator.py

Browse files
Files changed (1) hide show
  1. script_generator.py +0 -209
script_generator.py DELETED
@@ -1,209 +0,0 @@
1
- import os
2
- import gradio as gr
3
- import wikipediaapi
4
- from groq import Groq
5
- import torch
6
- from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
7
- from deep_translator import GoogleTranslator
8
- import yake
9
- from datetime import datetime
10
- import shutil
11
- import glob
12
-
13
-
14
- # ✅ Set API Key
15
- os.environ["GROQ_API_KEY"] = "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" # Replace with your API key
16
- client = Groq(api_key=os.environ["GROQ_API_KEY"])
17
-
18
- # # ✅ Load M2M-100 Model
19
- # model_name = "facebook/m2m100_418M"
20
- # tokenizer = M2M100Tokenizer.from_pretrained(model_name)
21
- # model = M2M100ForConditionalGeneration.from_pretrained(model_name)
22
-
23
- def fetch_wikipedia_summary(topic):
24
- wiki_wiki = wikipediaapi.Wikipedia(
25
- user_agent="EducationalScriptApp/1.0",
26
- language="en"
27
- )
28
- page = wiki_wiki.page(topic)
29
- return page.summary if page.exists() else "No Wikipedia summary available."
30
-
31
- def generate_script(topic, duration):
32
- try:
33
- factual_content = fetch_wikipedia_summary(topic)
34
- words_per_minute = 130
35
- target_words = duration * words_per_minute
36
-
37
- response = client.chat.completions.create(
38
- messages=[{"role": "user", "content": f"Format the following factual content into a well-structured educational script in English with approximately {target_words} words: \n{factual_content}"}],
39
- model="llama-3.3-70b-versatile"
40
- )
41
- return response.choices[0].message.content
42
- except Exception as e:
43
- return f"❌ Error in script generation: {str(e)}"
44
-
45
-
46
- # ✅ Function to Extract Keywords Using YAKE
47
- def extract_keywords(script):
48
- try:
49
- kw_extractor = yake.KeywordExtractor(
50
- lan="en", # Language
51
- n=3, # Max number of words in a keyword phrase (trigrams)
52
- dedupLim=0.9, # Reduce redundant phrases
53
- # top=10 # Extract top 10 keywords
54
- )
55
-
56
- keywords = kw_extractor.extract_keywords(script)
57
- return ", ".join([kw[0] for kw in keywords]) # ✅ Extract only the keyword text
58
- except Exception as e:
59
- return f"❌ Error extracting keywords: {str(e)}"
60
-
61
-
62
- def save_keywords_file(keywords, topic):
63
- today = datetime.today().strftime('%Y_%b_%d')
64
- filename = f"Keywords/{topic}_Keyword_{today}.txt"
65
- os.makedirs(os.path.dirname(filename), exist_ok=True)
66
- with open(filename, "w", encoding="utf-8") as f:
67
- f.write(keywords)
68
- return filename
69
-
70
-
71
- # # ✅ Function to Translate English Script to Urdu
72
- # def translate_to_urdu(english_script):
73
- # try:
74
- # google_translation = GoogleTranslator(source='en', target='ur').translate(english_script)
75
- # tokenizer.src_lang = "en"
76
- # max_length = 500
77
- # input_chunks = [google_translation[i:i+max_length] for i in range(0, len(google_translation), max_length)]
78
- # refined_chunks = []
79
- # for chunk in input_chunks:
80
- # inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
81
- # translated_tokens = model.generate(
82
- # **inputs,
83
- # max_length=1024,
84
- # no_repeat_ngram_size=2,
85
- # forced_bos_token_id=tokenizer.get_lang_id("ur"),
86
- # num_beams=2
87
- # )
88
- # refined_chunks.append(tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0])
89
- # return " ".join(refined_chunks)
90
- # except Exception as e:
91
- # return f"❌ Error in translation: {str(e)}"
92
-
93
-
94
- def translate_to_urdu(english_script):
95
- try:
96
- # ✅ Define a max chunk size (Google Translator has a limit)
97
- max_chunk_size = 4500 # Stay below 5000 to be safe
98
- chunks = [english_script[i:i + max_chunk_size] for i in range(0, len(english_script), max_chunk_size)]
99
-
100
- translated_chunks = []
101
- for chunk in chunks:
102
- translated_chunk = GoogleTranslator(source='en', target='ur').translate(chunk)
103
- translated_chunks.append(translated_chunk)
104
-
105
- return " ".join(translated_chunks) # ✅ Join all translated chunks
106
- except Exception as e:
107
- return f"❌ Error in translation: {str(e)}"
108
-
109
-
110
-
111
- def save_english_file(content, topic):
112
- today = datetime.today().strftime('%Y_%b_%d') # Format: 2025_Feb_21
113
- filename = f"English_Scripts/{topic}_Eng_{today}.txt"
114
- os.makedirs(os.path.dirname(filename), exist_ok=True) # Ensure directory exists
115
- with open(filename, "w", encoding="utf-8") as f:
116
- f.write(content)
117
- return filename
118
-
119
-
120
- def save_urdu_file(content, topic):
121
- today = datetime.today().strftime('%Y_%b_%d')
122
- filename = f"Urdu_Scripts/{topic}_Urdu_{today}.txt"
123
- os.makedirs(os.path.dirname(filename), exist_ok=True)
124
- with open(filename, "w", encoding="utf-8") as f:
125
- f.write(content)
126
- return filename
127
-
128
-
129
- def save_final_urdu_file(topic, content):
130
- date_str = datetime.now().strftime("%Y_%b_%d")
131
- filename = f"Urdu_Final/{topic}_Urdu_Final_{date_str}.txt" # ✅ Corrected file path
132
- os.makedirs(os.path.dirname(filename), exist_ok=True) # ✅ Ensure the directory exists
133
- with open(filename, "w", encoding="utf-8") as f:
134
- f.write(content)
135
- return filename
136
-
137
-
138
- def finalize_process():
139
- return "✅ Script Generation Completed Successfully!"
140
-
141
-
142
- def clear_old_files():
143
- # ✅ Define all directories where files are stored
144
- directories = ["English_Scripts", "Urdu_Scripts", "Urdu_Final", "Keywords"]
145
-
146
- for directory in directories:
147
- if os.path.exists(directory): # ✅ Check if directory exists
148
- files = glob.glob(f"{directory}/*") # ✅ Get all files in the directory
149
- for file in files:
150
- try:
151
- os.remove(file) # ✅ Delete each file
152
- except Exception as e:
153
- print(f"❌ Error deleting {file}: {e}")
154
-
155
- return "", "", "", "", "" # ✅ Clear all textboxes in UI
156
-
157
-
158
-
159
-
160
- # ✅ Gradio UI
161
- with gr.Blocks() as app:
162
- gr.Markdown("# 🎬 AI-Powered Educational Script Generator")
163
-
164
- topic_input = gr.Textbox(label="Enter Topic")
165
- duration_input = gr.Slider(minimum=1, maximum=30, step=1, label="Duration (minutes)")
166
-
167
-
168
- generate_button = gr.Button("Generate English Script")
169
- eng_output = gr.Textbox(label="Generated English Script", interactive=False)
170
- download_english_button = gr.Button("Download English Script")
171
- download_english_button.click(save_english_file, inputs=[eng_output, topic_input], outputs=[gr.File()])
172
-
173
-
174
- # ✅ Keyword Extraction Section
175
- extract_keywords_btn = gr.Button("🔑 Extract Keywords")
176
- keyword_output = gr.Textbox(label="🔍 Extracted Keywords", interactive=True)
177
- download_keywords_btn = gr.Button("⬇️ Download Keywords")
178
- download_keywords_btn.click(save_keywords_file, inputs=[keyword_output, topic_input], outputs=[gr.File()])
179
-
180
- translate_button = gr.Button("Generate Urdu Script")
181
- urdu_output = gr.Textbox(label="Translated Urdu Script", interactive=False, rtl=True)
182
- download_urdu_button = gr.Button("Download Urdu Script")
183
- download_urdu_button.click(save_urdu_file, inputs=[urdu_output, topic_input], outputs=[gr.File()])
184
-
185
-
186
- final_edited_urdu_output = gr.Textbox(label="Edited Urdu Script", interactive=True, rtl=True)
187
- download_final_urdu_button = gr.Button("Download Final Urdu Script")
188
- download_final_urdu_button.click(save_final_urdu_file, inputs=[topic_input, final_edited_urdu_output], outputs=[gr.File()])
189
-
190
-
191
- # ✅ Button Actions
192
- # generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
193
- generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
194
- extract_keywords_btn.click(extract_keywords, inputs=[eng_output], outputs=[keyword_output])
195
- translate_button.click(translate_to_urdu, inputs=[eng_output], outputs=[urdu_output])
196
-
197
- status_output = gr.Textbox(label="Status", interactive=False)
198
- finalize_button = gr.Button("Finalize Process")
199
- finalize_button.click(finalize_process, outputs=[status_output])
200
-
201
- generate_button.click(
202
- lambda topic, duration: (*clear_old_files(), generate_script(topic, duration)),
203
- inputs=[topic_input, duration_input],
204
- outputs=[keyword_output, urdu_output, final_edited_urdu_output, status_output] )
205
-
206
-
207
-
208
-
209
- app.launch()