Backened commited on
Commit
097944f
Β·
verified Β·
1 Parent(s): a437164

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +208 -0
app.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import wikipediaapi
4
+ from groq import Groq
5
+ import torch
6
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
7
+ from deep_translator import GoogleTranslator
8
+ import yake
9
+ from datetime import datetime
10
+
11
+ # βœ… Set API Key
12
+ # os.environ["GROQ_API_KEY"] = "your_api_key_here"
13
+ # client = Groq(api_key=os.environ["GROQ_API_KEY"])
14
+
15
+ # βœ… Set API Key
16
+ os.environ["GROQ_API_KEY"] = "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" # Replace with your API key
17
+ client = Groq(api_key=os.environ["GROQ_API_KEY"])
18
+
19
+ # βœ… Load M2M-100 Model
20
+ model_name = "facebook/m2m100_418M"
21
+ tokenizer = M2M100Tokenizer.from_pretrained(model_name)
22
+ model = M2M100ForConditionalGeneration.from_pretrained(model_name)
23
+
24
+ def fetch_wikipedia_summary(topic):
25
+ wiki_wiki = wikipediaapi.Wikipedia(
26
+ user_agent="EducationalScriptApp/1.0",
27
+ language="en"
28
+ )
29
+ page = wiki_wiki.page(topic)
30
+ return page.summary if page.exists() else "No Wikipedia summary available."
31
+
32
+ def generate_script(topic, duration):
33
+ try:
34
+ factual_content = fetch_wikipedia_summary(topic)
35
+ words_per_minute = 130
36
+ target_words = duration * words_per_minute
37
+
38
+ response = client.chat.completions.create(
39
+ messages=[{"role": "user", "content": f"Format the following factual content into a well-structured educational script in English with approximately {target_words} words: \n{factual_content}"}],
40
+ model="llama-3.3-70b-versatile"
41
+ )
42
+ return response.choices[0].message.content
43
+ except Exception as e:
44
+ return f"❌ Error in script generation: {str(e)}"
45
+
46
+ # # βœ… Function to Extract Keywords
47
+ # def extract_keywords(script):
48
+ # words = word_tokenize(script.lower())
49
+ # stop_words = set(stopwords.words('english'))
50
+ # filtered_words = [word for word in words if word.isalnum() and word not in stop_words]
51
+ # word_freq = Counter(filtered_words)
52
+ # keywords = [word for word, freq in word_freq.most_common(10)]
53
+ # return ", ".join(keywords)
54
+
55
+
56
+ # βœ… Function to Extract Keywords Using YAKE
57
+ def extract_keywords(script):
58
+ try:
59
+ kw_extractor = yake.KeywordExtractor(
60
+ lan="en", # Language
61
+ n=3, # Max number of words in a keyword phrase (trigrams)
62
+ dedupLim=0.9, # Reduce redundant phrases
63
+ # top=10 # Extract top 10 keywords
64
+ )
65
+
66
+ keywords = kw_extractor.extract_keywords(script)
67
+ return ", ".join([kw[0] for kw in keywords]) # βœ… Extract only the keyword text
68
+ except Exception as e:
69
+ return f"❌ Error extracting keywords: {str(e)}"
70
+
71
+
72
+ # # βœ… Function to Save Edited or Generated Keywords
73
+ # def save_keywords_file(keywords):
74
+ # filename = "keywords.txt"
75
+ # with open(filename, "w", encoding="utf-8") as f:
76
+ # f.write(keywords)
77
+ # return filename
78
+
79
+ # def save_keywords_file(keywords):
80
+ # if not keywords.strip():
81
+ # return "❌ No keywords available to save."
82
+
83
+ # filename = "keywords.txt"
84
+ # with open(filename, "w", encoding="utf-8") as f:
85
+ # f.write(keywords)
86
+ # return filename
87
+
88
+ def save_keywords_file(keywords, topic):
89
+ today = datetime.today().strftime('%Y_%b_%d')
90
+ filename = f"Keywords/{topic}_Keyword_{today}.txt"
91
+ os.makedirs(os.path.dirname(filename), exist_ok=True)
92
+ with open(filename, "w", encoding="utf-8") as f:
93
+ f.write(keywords)
94
+ return filename
95
+
96
+
97
+ # # βœ… Function to Translate English Script to Urdu
98
+ # def translate_to_urdu(english_script):
99
+ # try:
100
+ # google_translation = GoogleTranslator(source='en', target='ur').translate(english_script)
101
+ # tokenizer.src_lang = "en"
102
+ # max_length = 500
103
+ # input_chunks = [google_translation[i:i+max_length] for i in range(0, len(google_translation), max_length)]
104
+ # refined_chunks = []
105
+ # for chunk in input_chunks:
106
+ # inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
107
+ # translated_tokens = model.generate(
108
+ # **inputs,
109
+ # max_length=1024,
110
+ # no_repeat_ngram_size=2,
111
+ # forced_bos_token_id=tokenizer.get_lang_id("ur"),
112
+ # num_beams=2
113
+ # )
114
+ # refined_chunks.append(tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0])
115
+ # return " ".join(refined_chunks)
116
+ # except Exception as e:
117
+ # return f"❌ Error in translation: {str(e)}"
118
+
119
+
120
+ def translate_to_urdu(english_script):
121
+ try:
122
+ # βœ… Define a max chunk size (Google Translator has a limit)
123
+ max_chunk_size = 5000
124
+ chunks = [english_script[i:i + max_chunk_size] for i in range(0, len(english_script), max_chunk_size)]
125
+
126
+ # βœ… Translate each chunk separately
127
+ translated_chunks = [GoogleTranslator(source='en', target='ur').translate(chunk) for chunk in chunks]
128
+
129
+ return " ".join(translated_chunks) # βœ… Join all translated chunks
130
+ except Exception as e:
131
+ return f"❌ Error in translation: {str(e)}"
132
+
133
+
134
+ def save_english_file(content, topic):
135
+ today = datetime.today().strftime('%Y_%b_%d') # Format: 2025_Feb_21
136
+ filename = f"English_Scripts/{topic}_Eng_{today}.txt"
137
+ os.makedirs(os.path.dirname(filename), exist_ok=True) # Ensure directory exists
138
+ with open(filename, "w", encoding="utf-8") as f:
139
+ f.write(content)
140
+ return filename
141
+
142
+
143
+ def save_urdu_file(content, topic):
144
+ today = datetime.today().strftime('%Y_%b_%d')
145
+ filename = f"Urdu_Scripts/{topic}_Urdu_{today}.txt"
146
+ os.makedirs(os.path.dirname(filename), exist_ok=True)
147
+ with open(filename, "w", encoding="utf-8") as f:
148
+ f.write(content)
149
+ return filename
150
+
151
+
152
+ def save_final_urdu_file(topic, content):
153
+ date_str = datetime.now().strftime("%Y_%b_%d")
154
+ filename = f"Urdu_Final/{topic}_Urdu_Final_{date_str}.txt" # βœ… Corrected file path
155
+ os.makedirs(os.path.dirname(filename), exist_ok=True) # βœ… Ensure the directory exists
156
+ with open(filename, "w", encoding="utf-8") as f:
157
+ f.write(content)
158
+ return filename
159
+
160
+
161
+ def finalize_process():
162
+ return "βœ… Script Generation Completed Successfully!"
163
+
164
+
165
+
166
+
167
+ # βœ… Gradio UI
168
+ with gr.Blocks() as app:
169
+ gr.Markdown("# 🎬 AI-Powered Educational Script Generator")
170
+
171
+ topic_input = gr.Textbox(label="Enter Topic")
172
+ duration_input = gr.Slider(minimum=1, maximum=30, step=1, label="Duration (minutes)")
173
+
174
+
175
+ generate_button = gr.Button("Generate English Script")
176
+ eng_output = gr.Textbox(label="Generated English Script", interactive=False)
177
+ download_english_button = gr.Button("Download English Script")
178
+ download_english_button.click(save_english_file, inputs=[eng_output, topic_input], outputs=[gr.File()])
179
+
180
+
181
+ # βœ… Keyword Extraction Section
182
+ extract_keywords_btn = gr.Button("πŸ”‘ Extract Keywords")
183
+ keyword_output = gr.Textbox(label="πŸ” Extracted Keywords", interactive=True)
184
+ download_keywords_btn = gr.Button("⬇️ Download Keywords")
185
+ download_keywords_btn.click(save_keywords_file, inputs=[keyword_output], outputs=[gr.File()])
186
+
187
+ translate_button = gr.Button("Generate Urdu Script")
188
+ urdu_output = gr.Textbox(label="Translated Urdu Script", interactive=False, rtl=True)
189
+ download_urdu_button = gr.Button("Download Urdu Script")
190
+ download_urdu_button.click(save_urdu_file, inputs=[urdu_output, topic_input], outputs=[gr.File()])
191
+
192
+
193
+ final_edited_urdu_output = gr.Textbox(label="Edited Urdu Script", interactive=True, rtl=True)
194
+ download_final_urdu_button = gr.Button("Download Final Urdu Script")
195
+ download_final_urdu_button.click(save_final_urdu_file, inputs=[topic_input, final_edited_urdu_output], outputs=[gr.File()])
196
+
197
+
198
+ # βœ… Button Actions
199
+ generate_button.click(generate_script, inputs=[topic_input, duration_input], outputs=[eng_output])
200
+ extract_keywords_btn.click(extract_keywords, inputs=[eng_output], outputs=[keyword_output])
201
+ translate_button.click(translate_to_urdu, inputs=[eng_output], outputs=[urdu_output])
202
+
203
+ status_output = gr.Textbox(label="Status", interactive=False)
204
+ finalize_button = gr.Button("Finalize Process")
205
+ finalize_button.click(finalize_process, outputs=[status_output])
206
+
207
+
208
+ app.launch()