ayush2917 commited on
Commit
dc8759a
·
verified ·
1 Parent(s): 106ef2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -1
app.py CHANGED
@@ -118,4 +118,103 @@ def fetch_from_websites(topic, current_time, time_24h_ago):
118
  pub_date = (current_time - timedelta(seconds=time_diff)).strftime("%Y-%m-%dT%H:%M:%SZ")
119
 
120
  desc_elem = item.find_next('p') or item.find_next('div', class_=['summary', 'content', 'desc'])
121
- raw_desc = desc_elem.get_text(strip=True) if desc_elem else f"Latest {topic} update from {url
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  pub_date = (current_time - timedelta(seconds=time_diff)).strftime("%Y-%m-%dT%H:%M:%SZ")
119
 
120
  desc_elem = item.find_next('p') or item.find_next('div', class_=['summary', 'content', 'desc'])
121
+ raw_desc = desc_elem.get_text(strip=True) if desc_elem else f"Latest {topic} update from {url.split('/')[2]}."
122
+
123
+ articles.append({
124
+ "title": title,
125
+ "publishedAt": pub_date,
126
+ "raw_description": raw_desc
127
+ })
128
+ except Exception as e:
129
+ print(f"Failed to fetch from {url}: {e}")
130
+ if len(articles) >= 10:
131
+ break
132
+
133
+ while len(articles) < 10:
134
+ action = random.choice(["Surges", "Expands", "Grows", "Hits"])
135
+ metric = random.choice(["$500M", "20%", "$1B", "15%"])
136
+ headline = f"{topic} {action} {metric} in 2025"
137
+ if headline not in used_headlines:
138
+ used_headlines.add(headline)
139
+ time_diff = random.randint(0, 24 * 60 * 60)
140
+ pub_date = (current_time - timedelta(seconds=time_diff)).strftime("%Y-%m-%dT%H:%M:%SZ")
141
+ articles.append({
142
+ "title": headline,
143
+ "publishedAt": pub_date,
144
+ "raw_description": f"Simulated {topic} news due to insufficient web data."
145
+ })
146
+
147
+ return articles
148
+
149
+ def generate_bedrock_response(model, title, raw_desc, topic, current_date_str):
150
+ prompt = (
151
+ f"Generate a 60-80 word news article description for '{topic}' based on: "
152
+ f"Headline: {title}. Raw data: {raw_desc}. Ensure it’s unique, concise, "
153
+ f"reflects trends as of {current_date_str}, and focuses on {topic} relevance "
154
+ f"and economic implications."
155
+ )
156
+ body_dict = model["body_template"].copy()
157
+ body_dict["prompt"] = body_dict["prompt"].format(prompt=prompt)
158
+
159
+ try:
160
+ response = bedrock.invoke_model(
161
+ modelId=model["modelId"],
162
+ contentType=model["contentType"],
163
+ accept=model["accept"],
164
+ body=json.dumps(body_dict)
165
+ )
166
+ response_body = json.loads(response['body'].read().decode('utf-8'))
167
+ return (response_body.get('outputs', [{}])[0].get('text', raw_desc) if "mistral" in model["modelId"]
168
+ else response_body.get('generation', raw_desc)).strip()
169
+ except Exception as e:
170
+ print(f"Bedrock model {model['modelId']} failed: {e}")
171
+ return raw_desc[:400]
172
+
173
+ def fetch_news(topics, total_articles=10, model_index=0):
174
+ global latest_articles
175
+ current_time = datetime.utcnow()
176
+ time_24h_ago = current_time - timedelta(hours=24)
177
+ current_date_str = current_time.strftime("%Y-%m-%d")
178
+
179
+ all_articles = []
180
+ model = MODELS[model_index % len(MODELS)]
181
+
182
+ for topic in topics:
183
+ raw_articles = fetch_from_websites(topic, current_time, time_24h_ago)
184
+ for article in raw_articles[:total_articles]:
185
+ article["description"] = generate_bedrock_response(
186
+ model, article["title"], article["raw_description"], topic, current_date_str
187
+ )
188
+ del article["raw_description"]
189
+ all_articles.append(article)
190
+
191
+ latest_articles = all_articles[:total_articles * len(topics)]
192
+ with open('latest_india_news.json', 'w', encoding='utf-8') as f:
193
+ json.dump(latest_articles, f, indent=4)
194
+ return latest_articles
195
+
196
+ def news_fetcher_thread():
197
+ cycle_count = 0
198
+ selected_topics = get_user_topics() # Get topics once at start
199
+ while True:
200
+ start_time = time.time()
201
+ fetch_news(selected_topics, total_articles=10, model_index=cycle_count % len(MODELS))
202
+ elapsed_time = time.time() - start_time
203
+ sleep_time = max(300 - elapsed_time, 0) # 5 minutes = 300 seconds
204
+ time.sleep(sleep_time)
205
+ cycle_count += 1
206
+
207
+ @app.route('/')
208
+ def index():
209
+ return render_template('index.html', topics=TOPICS)
210
+
211
+ @app.route('/news')
212
+ def get_news():
213
+ return jsonify(latest_articles)
214
+
215
+ if __name__ == "__main__":
216
+ # Start news fetching in a separate thread
217
+ fetch_thread = threading.Thread(target=news_fetcher_thread, daemon=True)
218
+ fetch_thread.start()
219
+ # Run Flask app
220
+ app.run(host='0.0.0.0', port=5000, debug=False)