Backened commited on
Commit
dbd9bf0
·
verified ·
1 Parent(s): bc75df6

Delete script_generator.py

Browse files
Files changed (1) hide show
  1. script_generator.py +0 -212
script_generator.py DELETED
@@ -1,212 +0,0 @@
1
- import os
2
- import gradio as gr
3
- import wikipediaapi
4
- from groq import Groq
5
- import torch
6
- from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
7
- from deep_translator import GoogleTranslator
8
- import yake
9
- from datetime import datetime
10
- import shutil
11
- import glob
12
-
13
- # ✅ Set API Key
14
- # os.environ["GROQ_API_KEY"] = "your_api_key_here"
15
- # client = Groq(api_key=os.environ["GROQ_API_KEY"])
16
-
17
- # ✅ Set API Key
18
- os.environ["GROQ_API_KEY"] = "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" # Replace with your API key
19
- client = Groq(api_key=os.environ["GROQ_API_KEY"])
20
-
21
- # # ✅ Load M2M-100 Model
22
- # model_name = "facebook/m2m100_418M"
23
- # tokenizer = M2M100Tokenizer.from_pretrained(model_name)
24
- # model = M2M100ForConditionalGeneration.from_pretrained(model_name)
25
-
26
- def fetch_wikipedia_summary(topic):
27
- wiki_wiki = wikipediaapi.Wikipedia(
28
- user_agent="EducationalScriptApp/1.0",
29
- language="en"
30
- )
31
- page = wiki_wiki.page(topic)
32
- return page.summary if page.exists() else "No Wikipedia summary available."
33
-
34
- def generate_script(topic, duration):
35
- try:
36
- factual_content = fetch_wikipedia_summary(topic)
37
- words_per_minute = 130
38
- target_words = duration * words_per_minute
39
-
40
- response = client.chat.completions.create(
41
- messages=[{"role": "user", "content": f"Format the following factual content into a well-structured educational script in English with approximately {target_words} words: \n{factual_content}"}],
42
- model="llama-3.3-70b-versatile"
43
- )
44
- return response.choices[0].message.content
45
- except Exception as e:
46
- return f"❌ Error in script generation: {str(e)}"
47
-
48
-
49
- # ✅ Function to Extract Keywords Using YAKE
50
- def extract_keywords(script):
51
- try:
52
- kw_extractor = yake.KeywordExtractor(
53
- lan="en", # Language
54
- n=3, # Max number of words in a keyword phrase (trigrams)
55
- dedupLim=0.9, # Reduce redundant phrases
56
- # top=10 # Extract top 10 keywords
57
- )
58
-
59
- keywords = kw_extractor.extract_keywords(script)
60
- return ", ".join([kw[0] for kw in keywords]) # ✅ Extract only the keyword text
61
- except Exception as e:
62
- return f"❌ Error extracting keywords: {str(e)}"
63
-
64
-
65
- def save_keywords_file(keywords, topic):
66
- today = datetime.today().strftime('%Y_%b_%d')
67
- filename = f"Keywords/{topic}_Keyword_{today}.txt"
68
- os.makedirs(os.path.dirname(filename), exist_ok=True)
69
- with open(filename, "w", encoding="utf-8") as f:
70
- f.write(keywords)
71
- return filename
72
-
73
-
74
- # # ✅ Function to Translate English Script to Urdu
75
- # def translate_to_urdu(english_script):
76
- # try:
77
- # google_translation = GoogleTranslator(source='en', target='ur').translate(english_script)
78
- # tokenizer.src_lang = "en"
79
- # max_length = 500
80
- # input_chunks = [google_translation[i:i+max_length] for i in range(0, len(google_translation), max_length)]
81
- # refined_chunks = []
82
- # for chunk in input_chunks:
83
- # inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
84
- # translated_tokens = model.generate(
85
- # **inputs,
86
- # max_length=1024,
87
- # no_repeat_ngram_size=2,
88
- # forced_bos_token_id=tokenizer.get_lang_id("ur"),
89
- # num_beams=2
90
- # )
91
- # refined_chunks.append(tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0])
92
- # return " ".join(refined_chunks)
93
- # except Exception as e:
94
- # return f"❌ Error in translation: {str(e)}"
95
-
96
-
97
- def translate_to_urdu(english_script):
98
- try:
99
- # ✅ Define a max chunk size (Google Translator has a limit)
100
- max_chunk_size = 4500 # Stay below 5000 to be safe
101
- chunks = [english_script[i:i + max_chunk_size] for i in range(0, len(english_script), max_chunk_size)]
102
-
103
- translated_chunks = []
104
- for chunk in chunks:
105
- translated_chunk = GoogleTranslator(source='en', target='ur').translate(chunk)
106
- translated_chunks.append(translated_chunk)
107
-
108
- return " ".join(translated_chunks) # ✅ Join all translated chunks
109
- except Exception as e:
110
- return f"❌ Error in translation: {str(e)}"
111
-
112
-
113
-
114
- def save_english_file(content, topic):
115
- today = datetime.today().strftime('%Y_%b_%d') # Format: 2025_Feb_21
116
- filename = f"English_Scripts/{topic}_Eng_{today}.txt"
117
- os.makedirs(os.path.dirname(filename), exist_ok=True) # Ensure directory exists
118
- with open(filename, "w", encoding="utf-8") as f:
119
- f.write(content)
120
- return filename
121
-
122
-
123
- def save_urdu_file(content, topic):
124
- today = datetime.today().strftime('%Y_%b_%d')
125
- filename = f"Urdu_Scripts/{topic}_Urdu_{today}.txt"
126
- os.makedirs(os.path.dirname(filename), exist_ok=True)
127
- with open(filename, "w", encoding="utf-8") as f:
128
- f.write(content)
129
- return filename
130
-
131
-
132
- def save_final_urdu_file(topic, content):
133
- date_str = datetime.now().strftime("%Y_%b_%d")
134
- filename = f"Urdu_Final/{topic}_Urdu_Final_{date_str}.txt" # ✅ Corrected file path
135
- os.makedirs(os.path.dirname(filename), exist_ok=True) # ✅ Ensure the directory exists
136
- with open(filename, "w", encoding="utf-8") as f:
137
- f.write(content)
138
- return filename
139
-
140
-
141
- def finalize_process():
142
- return "✅ Script Generation Completed Successfully!"
143
-
144
-
145
- def clear_old_files():
146
- # ✅ Define all directories where files are stored
147
- directories = ["English_Scripts", "Urdu_Scripts", "Urdu_Final", "Keywords"]
148
-
149
- for directory in directories:
150
- if os.path.exists(directory): # ✅ Check if directory exists
151
- files = glob.glob(f"{directory}/*") # ✅ Get all files in the directory
152
- for file in files:
153
- try:
154
- os.remove(file) # ✅ Delete each file
155
- except Exception as e:
156
- print(f"❌ Error deleting {file}: {e}")
157
-
158
- return "", "", "", "", "" # ✅ Clear all textboxes in UI
159
-
160
-
161
-
162
-
163
- # ✅ Gradio UI
164
- with gr.Blocks() as app:
165
- gr.Markdown("# 🎬 AI-Powered Educational Script Generator")
166
-
167
- topic_input = gr.Textbox(label="Enter Topic")
168
- duration_input = gr.Slider(minimum=1, maximum=30, step=1, label="Duration (minutes)")
169
-
170
-
171
- generate_button = gr.Button("Generate English Script")
172
- eng_output = gr.Textbox(label="Generated English Script", interactive=False)
173
- download_english_button = gr.Button("Download English Script")
174
- download_english_button.click(save_english_file, inputs=[eng_output, topic_input], outputs=[gr.File()])
175
-
176
-
177
- # ✅ Keyword Extraction Section
178
- extract_keywords_btn = gr.Button("🔑 Extract Keywords")
179
- keyword_output = gr.Textbox(label="🔍 Extracted Keywords", interactive=True)
180
- download_keywords_btn = gr.Button("⬇️ Download Keywords")
181
- download_keywords_btn.click(save_keywords_file, inputs=[keyword_output, topic_input], outputs=[gr.File()])
182
-
183
- translate_button = gr.Button("Generate Urdu Script")
184
- urdu_output = gr.Textbox(label="Translated Urdu Script", interactive=False, rtl=True)
185
- download_urdu_button = gr.Button("Download Urdu Script")
186
- download_urdu_button.click(save_urdu_file, inputs=[urdu_output, topic_input], outputs=[gr.File()])
187
-
188
-
189
- final_edited_urdu_output = gr.Textbox(label="Edited Urdu Script", interactive=True, rtl=True)
190
- download_final_urdu_button = gr.Button("Download Final Urdu Script")
191
- download_final_urdu_button.click(save_final_urdu_file, inputs=[topic_input, final_edited_urdu_output], outputs=[gr.File()])
192
-
193
-
194
- # ✅ Button Actions
195
- # generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
196
- generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
197
- extract_keywords_btn.click(extract_keywords, inputs=[eng_output], outputs=[keyword_output])
198
- translate_button.click(translate_to_urdu, inputs=[eng_output], outputs=[urdu_output])
199
-
200
- status_output = gr.Textbox(label="Status", interactive=False)
201
- finalize_button = gr.Button("Finalize Process")
202
- finalize_button.click(finalize_process, outputs=[status_output])
203
-
204
- generate_button.click(
205
- lambda topic, duration: (*clear_old_files(), generate_script(topic, duration)),
206
- inputs=[topic_input, duration_input],
207
- outputs=[keyword_output, urdu_output, final_edited_urdu_output, status_output] )
208
-
209
-
210
-
211
-
212
- app.launch()