import gradio as gr from pygooglenews import GoogleNews import pandas as pd from datetime import datetime, timedelta import requests from bs4 import BeautifulSoup import time # --- CONFIGURATION --- SESSION_TIMEOUT_SECONDS = 1800 # 30 Minutes AUTH_USERS = [ ("admin", "admin123"), ("user", "user123") ] # --- BACKEND LOGIC --- def scrape_article_content(url): """Scrapes the main text from a news URL with a timeout.""" try: headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(url, headers=headers, timeout=4) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') paragraphs = soup.find_all('p') text = ' '.join([p.get_text() for p in paragraphs]) return text[:500] + "..." if len(text) > 500 else text return "Content extraction failed." except Exception: return "N/A (Scraping Blocked)" def perform_search(query, start_date, end_date, lang, country, fetch_content): log_text = "" lang_map = {'Bangla': 'bn', 'English': 'en'} country_map = {'Bangladesh': 'BD', 'USA': 'US', 'UK': 'GB', 'India': 'IN'} gn = GoogleNews(lang=lang_map.get(lang, 'bn'), country=country_map.get(country, 'BD')) try: # Validate Dates datetime.strptime(start_date, '%Y-%m-%d') datetime.strptime(end_date, '%Y-%m-%d') except ValueError: return None, "❌ Error: Invalid date format. Please use YYYY-MM-DD.", None try: log_text += f"🔎 Searching: {query} ({start_date} to {end_date})\n" search_result = gn.search(query=query, from_=start_date, to_=end_date) entries = search_result['entries'] if not entries: return None, "⚠️ No articles found. Try a different keyword or date range.", None news_data = [] log_text += f"✅ Found {len(entries)} articles. Processing...\n" for i, entry in enumerate(entries): item = { 'Date': entry.published[:16] if 'published' in entry else 'N/A', # Shorten date string 'Source': entry.source['title'], 'Title': entry.title, 'Link': entry.link } if fetch_content: item['Snippet'] = scrape_article_content(entry.link) time.sleep(0.1) news_data.append(item) df = pd.DataFrame(news_data) # Save CSV filename = f"BD_News_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" df.to_csv(filename, index=False, encoding='utf-8-sig') return df, log_text + "🚀 Process Complete.", filename except Exception as e: return None, f"❌ System Error: {str(e)}", None # --- AUTHENTICATION LOGIC --- def authenticate(username, password): for valid_user, valid_pass in AUTH_USERS: if username == valid_user and password == valid_pass: return ( gr.update(visible=False), gr.update(visible=True), {"logged_in": True, "time": time.time(), "user": username}, gr.update(value="") # Clear password ) return gr.update(), gr.update(), None, gr.update() def check_session_and_search(query, start, end, lang, country, fetch, session_data): # 1. Check Login Status if not session_data or not session_data.get("logged_in"): return (gr.update(visible=True), gr.update(visible=False), None, None, "⚠️ Session Expired.", None) # 2. Check Timeout if (time.time() - session_data.get("time")) > SESSION_TIMEOUT_SECONDS: return (gr.update(visible=True), gr.update(visible=False), None, None, "⚠️ Timeout (30m). Log in again.", None) # 3. Perform Search df, log, csv = perform_search(query, start, end, lang, country, fetch) return (gr.update(visible=False), gr.update(visible=True), df, csv, log, session_data) def manual_logout(): return gr.update(visible=True), gr.update(visible=False), None, "Logged out." # --- UI THEME & MARKDOWN --- # Custom Theme for a Professional Look theme = gr.themes.Soft( primary_hue="blue", neutral_hue="slate", text_size="sm", spacing_size="sm", ) guide_markdown = """ ### 🇧🇩 Search Logic Guide (সার্চ গাইড) Create powerful filters using these operators. | Goal | Operator | Example (Copy & Paste) | Description | | :--- | :--- | :--- | :--- | | **Both Required** | `AND` | `বিএনপি AND নির্বাচন` | Finds articles containing **both** keywords. | | **Either One** | `OR` | `বন্যা OR জলোচ্ছ্বাস` | Finds articles containing **either** word. | | **Exclude** | `-` | `আওয়ামী লীগ -শেখ হাসিনা` | Finds 'Awami League' but **removes** articles mentioning 'Sheikh Hasina'. | | **Exact Phrase** | `""` | `"পদ্মা সেতু"` | Finds the exact phrase 'Padma Bridge', not just the separate words. | | **Complex** | `()` | `(ঢাকা OR চট্টগ্রাম) AND ডেঙ্গু` | Finds Dengue news specifically for Dhaka or Chittagong. | """ # --- MAIN APP LAYOUT --- with gr.Blocks(theme=theme, title="BD News Analyst Pro", css="footer {visibility: hidden}") as app: session_state = gr.State() # === LOGIN VIEW === with gr.Column(visible=True) as login_view: with gr.Row(variant="panel"): with gr.Column(scale=1): pass # Spacer with gr.Column(scale=1): gr.Markdown("## 🔐 News Analyst Pro \n Please login to access the dashboard.") u_in = gr.Textbox(label="Username", placeholder="Enter username") p_in = gr.Textbox(label="Password", type="password", placeholder="Enter password") l_btn = gr.Button("Login", variant="primary") l_msg = gr.Markdown("") with gr.Column(scale=1): pass # Spacer # === DASHBOARD VIEW === with gr.Column(visible=False) as app_view: with gr.Row(): with gr.Column(scale=4): gr.Markdown("# 🇧🇩 Bangladesh News Intelligence Tool") with gr.Column(scale=1): logout_btn = gr.Button("🚪 Logout", variant="stop", size="sm") with gr.Row(): # --- Left Panel: Controls --- with gr.Column(scale=1, variant="panel"): gr.Markdown("### ⚙️ Search Configuration") # Search Guide Accordion with gr.Accordion("📘 How to Search (Click to Expand)", open=True): gr.Markdown(guide_markdown) query_in = gr.Textbox( label="Search Keyword (Supports Boolean)", placeholder="e.g. অর্থনীতি AND (রিজার্ভ OR ডলার)", lines=2, value="রাজনীতি" ) with gr.Row(): start_in = gr.Textbox(label="Start Date", value=(datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')) end_in = gr.Textbox(label="End Date", value=datetime.now().strftime('%Y-%m-%d')) with gr.Row(): lang_in = gr.Dropdown(["Bangla", "English"], value="Bangla", label="Language") country_in = gr.Dropdown(["Bangladesh", "USA", "UK", "India"], value="Bangladesh", label="Region") fetch_chk = gr.Checkbox(label="Fetch Full Content? (Slower)", value=False) run_btn = gr.Button("🚀 Run Analysis", variant="primary", size="lg") status_box = gr.Textbox(label="System Status", interactive=False, lines=4) # --- Right Panel: Results --- with gr.Column(scale=2): gr.Markdown("### 📊 Search Results") results_df = gr.Dataframe( label="News Data", interactive=False, wrap=True, headers=["Date", "Source", "Title", "Link", "Snippet"] ) download_btn = gr.File(label="📥 Download CSV Report") # === INTERACTIONS === l_btn.click(authenticate, [u_in, p_in], [login_view, app_view, session_state, p_in]) logout_btn.click(manual_logout, None, [login_view, app_view, session_state, l_msg]) run_btn.click( check_session_and_search, inputs=[query_in, start_in, end_in, lang_in, country_in, fetch_chk, session_state], outputs=[login_view, app_view, results_df, download_btn, status_box, session_state] ) if __name__ == "__main__": app.launch(share=True)