Spaces:

jsemrau
/

Postwriter

Runtime error

File size: 39,844 Bytes

#!/usr/bin/env python3
"""
LinkedIn Post Generator - Gradio App
Based on the Jupyter notebook for generating LinkedIn posts from news articles
"""

import gradio as gr
import time
import copy
from datetime import date, timedelta, timezone, datetime
import json
import csv
import ast
import os
import sys
import re
import pandas as pd
import numpy as np
import logging
import requests
from bs4 import BeautifulSoup
from io import BytesIO
from PyPDF2 import PdfReader
import urllib.parse
import dateutil.parser
from dateutil import parser as dateutil_parser
from tldextract import extract
from urllib.parse import quote_plus

from collections import defaultdict
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
from utils import clean_url, get_body,ner_tagger,remove_duplicate_relationships
news_selector=2

# Set up logging
logging.basicConfig(level=logging.INFO)

os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers"
os.environ["HF_DATASETS_CACHE"] = "/tmp/huggingface/datasets"

masterQuery = '"FinTech Agents" OR "Finance Agents" OR   "Cognitive Agents" OR  "Investment Agents" OR  "AI Agents" OR "Autonomous Agents" OR  "Agentic AI"'
goal="maximum engagement and SEO"


from gliner import GLiNER

# Import your existing libraries (make sure these are installed)
try:
    from huggingface_hub import login
    from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, TransformersModel
    from smolagents import WebSearchTool
    from tldextract import extract
    from dotenv import load_dotenv
    from connector import get_google_news, get_news_articles

except ImportError as e:
    print(f"Missing required library: {e}")
    print("Please install required packages:")
    print("pip install gradio huggingface_hub smolagents tldextract GoogleNews gliner feedparser pandas numpy requests beautifulsoup4 python-dateutil python-dotenv")
    sys.exit(1)

# Global variables
ner_model = None
agent = None
writer_agent= None
editor_agent=None

# Load configuration from environment
HF_TOKEN = os.getenv('HF_TOKEN')
openai_key=os.getenv('OPENAI')
DEFAULT_INTERESTS = os.getenv('INTERESTS', 'cognition, sentience, finance, investing, orchestration')
USE_LOCAL_MODELS = os.getenv('USE_LOCAL_MODELS', 'false').lower() == 'true'

# Check if HF_TOKEN is available
if not HF_TOKEN:
        print("❌ HuggingFace token not found. Please check your .env file.") 


def check_environment():
    """Check if required environment variables are set"""
    if not HF_TOKEN:
        return False, "âŒ HF_TOKEN not found in .env file. Please add your HuggingFace token."
    
    return True, f"✅ Environment configured successfully!\n🔑 HF Token: {'*' * 20}{HF_TOKEN[-4:] if len(HF_TOKEN) > 4 else '****'}\n🎯 Interests: {DEFAULT_INTERESTS}\n🖥️ Use Local Models: {USE_LOCAL_MODELS}"

def initialize_models(use_local=None, interests=None):
    """Initialize the AI models and agents"""
    global agent, writer_agent, editor_agent, ner_model
    
    # Use environment defaults if not provided
    if use_local is None:
        use_local = USE_LOCAL_MODELS
    if interests is None:
        interests = DEFAULT_INTERESTS
    
    # Check if HF_TOKEN is available
    if not HF_TOKEN:
          return "❌ HuggingFace token not found. Please check your .env file."
   
    try:
        # Login to HuggingFace
        login(HF_TOKEN, add_to_git_credential=False)

        # Initialize NER model
        print("Initialize NER")
        ner_model = GLiNER.from_pretrained("knowledgator/modern-gliner-bi-large-v1.0")
        print(f"Initialized NER")

        
        llm_engine = InferenceClientModel(
                api_key=HF_TOKEN,
                model_id="Qwen/Qwen3-Coder-480B-A35B-Instruct" ,
                timeout=3000,
                provider="fireworks-ai",
                temperature=0.25
            )
        
        
        # Initialize agent
        agent = CodeAgent(
            model=llm_engine,
            tools=[],
            add_base_tools=False,
            name="data_agent",
            description="Runs data analysis for you.",
            max_steps=1,
        )

        # Initialize agent
        writer_agent = CodeAgent(
            model=llm_engine,
            tools=[],
            add_base_tools=False,
            name="writer_agent",
            description="Write an engaging and creative LinkedIn post.",
            max_steps=5,
        )
        
        writer_engine = InferenceClientModel(
                api_key=HF_TOKEN,
                model_id="Qwen/Qwen3-Coder-480B-A35B-Instruct" ,
                timeout=3000,
                provider="fireworks-ai",
                temperature=0.4
            )
        
        
        # Initialize agent
        editor_agent = CodeAgent(
            model=writer_engine,
            tools=[],
            add_base_tools=False,
            name="editor_agent",
            description="Edits LinkedIn post.",
            max_steps=5,
        )

        # Add system prompt
        #system_prompt = f"You are a strategic digital marketing manager focused on improving my social footprint. My interests are {interests}. You will receive a social media post. Please let me know which one I should react on."
        #agent.prompt_templates["system_prompt"] += system_prompt
        
        return "âœ… Models initialized successfully!"
        
    except Exception as e:
        return f"âŒ Error initializing models: {str(e)}"

def initialize_editor():
    """Initialize the AI models and agents"""
    
    # Check if HF_TOKEN is available
    if not HF_TOKEN:
        return "âŒ HuggingFace token not found. Please check your .env file."
    
    try:
        # Login to HuggingFace
        login(HF_TOKEN, add_to_git_credential=False)

        # Initialize NER model
        
        writer_engine = InferenceClientModel(
                api_key=HF_TOKEN,
                model_id="Qwen/Qwen3-235B-A22B-Thinking-2507" ,
                timeout=3000,
                provider="fireworks-ai",
                temperature=0.4
            )
        
        
        # Initialize agent
        editor_agent = CodeAgent(
            model=writer_engine,
            tools=[],
            add_base_tools=False,
            name="editor_agent",
            description="Edits LinkedIn post.",
            max_steps=5,
        )
   
        print(type(editor_agent))
        return editor_agent
        
    except Exception as e:
        return f"âŒ Error initializing editor: {str(e)}"

def edit_single_article(post, edit_prompt):
    """Edit a single news article and generate LinkedIn post"""
    global editor_agent
 
    if editor_agent is None:
        return "âŒ Editor model not initialized. Please initialize models first."
        

    tmp_post=post['linkedin_post']
    post.setdefault('history', []).append(tmp_post)

    try:
        prompt = f"""Role: 
                    You are the editor agent.  

                    Task:  
                    â€¢ Adjust the provided post based on the requirements of the user.  

                    Format:  
                    â€¢ Input text will be wrapped in <input> tags.  
                    â€¢ Keep the original unless instructed otherwise. 
                    â€¢ Edit only where requested.
                    â€¢ Use similar language and style as in the input.

                    Instructions:  
                    1. Think step by step internally before answering.  
                    2. Do not include your reasoning in the final output.  
                    3. If uncertain, return the original post.  
                    4. Apply the user's edit instructions while maintaining the professional LinkedIn tone.
                    5. Keep the post length appropriate for LinkedIn (250-450 words).
                    6. If the user requests to check the original document use the "body" tag in the input.

                    User Edit Instructions: {edit_prompt}

                    Input:  
                    <input>  
                    {tmp_post}  
                    </input>  

                    Output:  
                    [Your edited post goes here.]  
                    """
        
        edited_response = editor_agent.run(prompt, reset=False)
        print(type(post))

        post['linkedin_post'] = edited_response

        with open('edit_output.json', 'w') as f:
            json.dump(post, f, indent=2)

    except Exception as e:
        print(f"Error editing post: {e}")
        #return f"âŒ Error editing post: {str(e)}"

    return post
    
def process_single_article(post, interests):
    """Process a single news article and generate LinkedIn post"""
    global agent, writer_agent, ner_model, editor_agent
    
    try:
        # Login to HuggingFace
        login(HF_TOKEN, add_to_git_credential=False)

        # Initialize NER model
        print("Initialize NER")
        ner_model = GLiNER.from_pretrained("knowledgator/modern-gliner-bi-large-v1.0")
        print(f"Initialized NER")

        
        llm_engine = InferenceClientModel(
                api_key=HF_TOKEN,
                model_id="Qwen/Qwen3-Coder-480B-A35B-Instruct" ,
                timeout=3000,
                provider="fireworks-ai",
                temperature=0.25
            )
        
        
        # Initialize agent
        agent = CodeAgent(
            model=llm_engine,
            tools=[],
            add_base_tools=False,
            name="data_agent",
            description="Runs data analysis for you.",
            max_steps=1,
        )

        # Initialize agent
        writer_agent = CodeAgent(
            model=llm_engine,
            tools=[],
            add_base_tools=False,
            name="writer_agent",
            description="Write an engaging and creative LinkedIn post.",
            max_steps=5,
        )
        
        writer_engine = InferenceClientModel(
                api_key=HF_TOKEN,
                model_id="Qwen/Qwen3-Coder-480B-A35B-Instruct" ,
                timeout=3000,
                provider="fireworks-ai",
                temperature=0.4
            )
        
        
        # Initialize agent
        editor_agent = CodeAgent(
            model=writer_engine,
            tools=[],
            add_base_tools=False,
            name="editor_agent",
            description="Edits LinkedIn post.",
            max_steps=5,
        )

        # Add system prompt
        #system_prompt = f"You are a strategic digital marketing manager focused on improving my social footprint. My interests are {interests}. You will receive a social media post. Please let me know which one I should react on."
        #agent.prompt_templates["system_prompt"] += system_prompt
        
        print("… Models initialized successfully!") 
        
    except Exception as e:
        print( f"! Error initializing models: {str(e)}")


    if agent is None or ner_model is None or writer_agent is None or editor_agent is None:
        return {"error": f"Models agent {agent}, ner_model {type(ner_model)} write_agent {writer_agent}, editor_agent {editor_agent} not initialized. Please initialize models first."}
    
    #try:
    title = post['title']
    media = post['media']
    date = post['date']
    desc = post['desc']
    url = clean_url(post['link'])
    domain = post['domain']
    
    # Get article body
    body = get_body(url)
    if not body:
        return {"error": f"Could not extract content from {url}"}
    
    # Extract named entities
    ner_tags = ner_tagger(body, ner_model)
    
    if len(ner_tags) == 0:
        return {"error": "No named entities found in article"}
    
    # Generate body summary
    #prompt1 = f"You are a reasoning agent. Write a concise and accurate 150 words summary of this text : {body}!"
    
    prompt1= f"""Role: 
                You are a reasoning agent skilled at summarizing text accurately and concisely.  

                Task:  
                Summarize the following text in ~150 words.  

                Format:  
                Input text will be wrapped in <input> tags.  
                Output summary must be wrapped in <summary> tags.  
                Use clear, neutral, and structured language.  

                Instructions:  
                Think step by step internally before answering.  
                Do not include your reasoning in the final summary.  
                
                Input:  
                <input>  
                {body}  
                </input>  

                Output:  
                <summary>  
                [Your ~150-word summary goes here.]  
                </summary>  """
    
    
    body_summary = agent.run(prompt1, reset=True)
    
    # Generate NER summary
    #prompt1_ner = f"You are a reasoning agent. Write a concise 150 words narrative of the relationships in these named entities : {ner_tags}?"
    
    prompt1_ner= f"""
                        Role:
                        You are a reasoning agent skilled at writing concise narratives.  

                        Task:  
                        Write a ~150-word narrative describing the relationships among the named entities provided.  

                        Format:  
                        Input entities will be wrapped in <input> tags.  
                        Output narrative must be wrapped in <output> tags.  
                        Use clear, neutral, and structured language.  

                        Instructions:  
                        Think step by step internally before answering.  
                        Do not include your reasoning in the final narrative.  
                        If relationships are uncertain, acknowledge them cautiously.  

                        Input:  
                        <input>  
                        {ner_tags}  
                        </input>  

                        Output:  
                        <output>  
                        [Your ~150-word narrative goes here.]  
                        </output>  
                """
    
    ner_summary = agent.run(prompt1_ner, reset=True)
    
    # Generate knowledge graph
    #prompt2 = f"""
    #You are a reasoning agent.
    # 
    # Given the following named entities:
    #{ner_tags}
    
    #1. Extract logical relationships between the entities.
    #2. Represent these relationships as an ASCII knowledge graph using arrows.
    #3. Use the format:
    #   Entity1 -->[relationship1]--> Entity2
    #           -->[relationship2]--> Entity3
    #           -->[relationship3]--> Entity4
    #4. Keep it clean and readable. Only return the ASCII diagram, nothing else.
    #"""
    
    prompt2="""Role:
                    You are a reasoning agent.  
            Task:  
                Extract logical relationships between the named entities.  
                Represent these relationships as an ASCII knowledge graph.  

                Format:  
                Input entities will be wrapped in <input> tags.  
                Output diagram must be wrapped in <output> tags.  
                Use the format:  
                Entity1 -->[relationship1]--> Entity2  
                        -->[relationship2]--> Entity3  
                        -->[relationship3]--> Entity4  
                Keep the diagram clean and readable.  
                Only return the ASCII diagram, nothing else.  

                Instructions:  
                Think step by step internally before answering.  
                Do not include reasoning in the final output.  

                Input:  
                <input>  
                {ner_tags}  
                </input>  

                Output:  
                <output>  
                [ASCII knowledge graph goes here]  
                </output>  
            """
    
    temp_graph = agent.run(prompt2, reset=True)
    
    ner_graph = ""
    if len(temp_graph) > 1:
        try:
            ner_graph = remove_duplicate_relationships(temp_graph)
        except Exception as e:
            print(f"Error processing graph: {e}")
    
    # Generate relevance assessment
    prompt3 = f"""
    You are a reasoning agent. Make a decision if the post is relevant for me to post about based on my interests {interests}. 
    Start your response with a binary 'Yes' or 'No' based on the relevance to my interest,
    rank the relevance on a scale from 1 (no match) to 10 (best match) and then explain why.
    Classify if the post is a marketing post, the user has a problem, news, or generic.
    Write a 2-3 sentence commentary about the key themes of the posts. 
    This is all the information you should include:
        Title: '{title}'. Summary {body_summary}:  Domain: '{domain}', and knowledge graph {ner_graph}
    Only return a JSON in the format {{'verdict': verdict, 'summary': commentary, 'relevance':relevance_score, 'classification':classification}}
    """
    
    verdict = agent.run(prompt3, reset=True)
    
    # Parse verdict
    verdict_dict = {}
    if isinstance(verdict, str):
        try:
            verdict_dict = ast.literal_eval(verdict)
        except Exception as e:
            print("Failed to parse verdict:", e)
            verdict_dict = {"verdict": "Unknown", "summary": verdict, "relevance": 0, "classification": "Unknown"}
    else:
        verdict_dict = verdict
    
    # Generate posting plan
    prompt4 = f"""
    You are a reasoning agent. 
    Before answering, work through this step by step:
    1. Sense : What are the different components provided to you in context?
    2. Symbolize What logical relationships exist and are relevant for the task?
    3. Plan How can the user achieve tbe goal {goal} ?
    4. Act ' How do these elements combine?
    5. Conclude  'What is the most accurate/helpful response?

    Now answer:
    ### 
        1. Parse the input text "{body_summary}" and detect the main event or change.
        2. Use this knowledge graph {ner_graph} to support your logical- and rule-based reasoning.
        3. Use this dictionary '{str(verdict_dict)}' to refine your reasoning. 
        4. Realize what this change means in practical and technological terms.
        5. Use short, clear language. Deduce at least 3 plausible effects or implications based on prior knowledge, economic logic, or similar historical patterns.
        6. Develop a logical plan for how to structure a LinkedIn post.
        7. Create simple logical if-then rules and include them in your plan. 
    
    Your response should be a stepwise and well-organized plan .               
    
    """
    
    plan_response = agent.run(prompt4, reset=True)
    
    # Generate post prompt
    
    prompt5 = f"""
                Role:  
                You are a VC at A16Z. 

                Persona:
                When responding, adopt the persona of Mr. Matt Vanderholt-Langheim: a 28-year-old Swiss-German banking heir turned New York investment banker. Your tone should be confident, analytical, and discreet, blending Wall Street pragmatism with old-money European refinement.
                
                
                Goal:
                Your goal is to maximize thoughtful engagement on LinkedIn by triggering reflection, conversation, or informed discussion.  

                Task:  
                Write a **single, clean prompt** for a writing agent.  
                - This writing agent will use your prompt to generate a LinkedIn post.  
                - Your output must be one directly executable prompt, nothing else.  

                Format:  
                - Input context will be wrapped in <input> tags.  
                - Output must be wrapped in <output> tags.  
                - Only one prompt may appear inside <output>.  

                Instructions:  

                1️⃣ Lead with a strong hook that grabs attention in the first few seconds. If you catch people early, they’ll keep reading.

                2️⃣ Share insights and takeaways that prove your expertise. Use data, quotes or examples to illustrate your point.

                3️⃣ End with a clear CTA that makes it easy for people to join the conversation.

                - Post length: 250–450 words.  
                - You may begin with a **question**.  
                - Write direct, professional, neutral **commentary** from the viewpoint of an investment company.  
                - Do not write marketing or promotional content.  
                - Post should reflect **insight, analysis, or opinion**, grounded in a trend, event, or data point.  
                - Specify whether to use **first-person** (personal, light, reflective) OR **third-person** (analytical, neutral, corporate). Do not mix both.  
                - Indicate tone and style (e.g., sharp, witty, direct; or sober, policy-minded).  
                - Avoid clichés (“we’re excited to announce,” “it’s not just”) and generic phrasing.  
                - Do not use long dashes ("—").  
                - Do not use "it's not just .... its"
                - Use bullet points sparingly.  
                - Integrate relationships in your reasoning.  
                - Use the data in 'plan_response' as the grounding for forward looking statements.  
                - Make an assessment who the target audience for this post is. 
                - You must use this assessment to align the style for maximum engagement and SEO. 
                - Only return a string with the post. 
                - Write short, punchy sentences.
                - Always add SEO optimized hashtags
                
                Examples:
                Here are examples of my writing style: 
                
                [1] Tic Tac Toe is a simple game.
                        Most of us know the rules of this game since we were children. 
                    Tic Tac Toe (from here on TTT) is an example of an abstract strategy game that is played on an NxN board. In its simplest and most common form, the board size is 3 x 3, but for this exercise, the larger boards are significantly more interesting.

                [2] The tunnels are real. The evidence is mounting. The time is now. 
        
                [3] Reasoning models will likely never make split-second decisions in live traffic. That doesn't mean that reasoning agents can't be useful for autonomous driving.

                [4] Much has been written about agent architectures, but not much has been implemented yet. 
                

                Input:  
                <input>  
                Relationships: {ner_summary}  
                Plan Response: {plan_response}  
                </input>  

                Output:  
                <output>  
                [Exactly one directly usable prompt for the writing agent goes here.]  
                </output>  

                """
    
    post_plan = writer_agent.run(prompt5, reset=True)
    
    # Generate final LinkedIn post
    prompt6 = f"""{post_plan} """
    
    post_response = writer_agent.run(prompt6, reset=True)
    
    # Return results
    result = {
        "title": title,
        "media": media,
        "date": date,
        "url": url,
        "body": body,
        "domain": domain,
        "verdict": verdict_dict.get('verdict', 'Unknown'),
        "summary": verdict_dict.get('summary', ''),
        "relevance": verdict_dict.get('relevance', 0),
        "classification": verdict_dict.get('classification', 'Unknown'),
        "body_summary": body_summary,
        "ner_summary": ner_summary,
        "ner_graph": ner_graph,
        "post_plan": post_plan,
        "linkedin_post": post_response,
        "raw_verdict": verdict
    }
    
    return result
        
    #except Exception as e:
    #    return {"error": f"Error processing article: {str(e)}"}

# Global work queue
work_queue = []

def clear_work_queue():
    """Clear the work queue"""
    return pd.DataFrame(), "Work queue cleared."

# Gradio Interface
def create_interface():
    """Create the Gradio interface"""
    
    with gr.Blocks(title="Post Generator", theme=gr.themes.Soft()) as app:
        gr.Markdown("#Post Generator")
        gr.Markdown("Generate engaging LinkedIn posts from recent news articles using AI agents and NER analysis.")

        
        with gr.Row():

            # Left Column - Search and Configuration
            with gr.Column(scale=1):
                gr.Markdown("### Search & Configure")
                query = gr.Textbox(
                    label="Search Query",
                    value=masterQuery,
                    info="Google News search query"
                )
                cutoff_days = gr.Slider(
                    label="Days to Look Back",
                    minimum=1,
                    maximum=14,
                    step=1,
                    value=1,
                    info="How many days back to search for news"
                )
                search_btn = gr.Button("Search News", variant="primary", size="lg")
                
                search_status = gr.Textbox(
                    label="Search Status", 
                    interactive=False,
                    lines=3
                )
                
                # Settings
                with gr.Accordion("Settings", open=False):
                    # News source selector
                    news_source = gr.Radio(
                        label="News Source",
                        choices=["Google News", "News API","Arxiv"],
                        value="News API",
                        info="Choose your preferred news source"
                    )
                    
                    use_local = gr.Checkbox(
                        label="Use Local Models",
                        value=USE_LOCAL_MODELS,
                        info="Use local models (requires powerful hardware)"
                    )
                    interests = gr.Textbox(
                        label="Your Interests",
                        value=DEFAULT_INTERESTS,
                        info="For content relevance scoring"
                    )
            
            # Middle Column - Found Articles Selection
            with gr.Column(scale=1):
                gr.Markdown("### Found Articles")
                
                # Article selector (radio with scroll)
                article_selector = gr.Radio(
                    label="Select Article to Process",
                    choices=[],
                    interactive=True,
                    visible=False,
                    elem_classes=["scrollable-radio"]
                )
                
                process_article_btn = gr.Button(
                    "Generate LinkedIn Post",
                    variant="primary",
                    size="lg",
                    visible=False
                )
                
                selected_article_info = gr.Markdown("", visible=False)
                processing_status = gr.Textbox(
                    label="Processing Status",
                    lines=8,
                    interactive=False,
                    visible=False
                )
                no_articles_msg = gr.Markdown("*No articles found yet. Use the search function to find articles.*", visible=True)
            
            # Right Column - Generated Posts
            with gr.Column(scale=1):
                gr.Markdown("### Generated LinkedIn Posts")
                
                linkedin_posts_display = gr.Textbox(
                    label="Generated LinkedIn Posts",
                    lines=25,
                    max_lines=30,
                    interactive=False,
                    show_copy_button=True,
                    placeholder="LinkedIn posts will appear here after processing articles..."
                )
                
                with gr.Row():
                    clear_posts_btn = gr.Button("Clear Posts", variant="secondary")
                    download_btn = gr.DownloadButton(label="Download CSV", visible=False)

                # --- New edit box + button ---
                with gr.Row():
                    edit_prompt = gr.Textbox(
                        label="Edit LinkedIn Post",
                        placeholder="Enter instructions to refine or rewrite the generated posts..."
                    )
                    edit_btn = gr.Button("Apply Edit", variant="primary")

        # --- States ---
        search_results_state = gr.State(value=[])
        generated_posts_state = gr.State(value=[])

        # --- Functions ---
        def apply_edit(user_prompt, posts_list):
            """Re-run write agent with user prompt to refine posts"""
            if not posts_list:
                return "No posts to edit.", posts_list

            # Just take the last post and reprocess with user prompt
            last_post = posts_list[-1]

            print(last_post)


            refined = edit_single_article(
                last_post,
                user_prompt,
            )
            if "error" in refined:
                return f"Edit error: {refined['error']}", posts_list

            #updated_posts = posts_list[:-1] + [refined]
            updated_posts = [refined]
            return "Post successfully edited.", updated_posts

        # --- Event bindings (add this at the bottom with the others) ---
        edit_btn.click(
            fn=apply_edit,
            inputs=[edit_prompt, generated_posts_state],
            outputs=[processing_status, generated_posts_state]
        )

        # Helper function to extract and format LinkedIn posts
        def extract_linkedin_posts(posts_list):
            """Extract LinkedIn posts and format them nicely"""
            if not posts_list:
                return "No posts generated yet. Click on article buttons to generate LinkedIn posts."
            
            formatted_posts = []
            for i, post_data in enumerate(posts_list):
                if isinstance(post_data, dict) and 'linkedin_post' in post_data:
                    post_title = ""
                    if 'title' in post_data:
                        post_title += f" - {post_data['title']}"
                    
                    post_url = ""
                    if 'url' in post_data:
                        post_url = f"\n\nSource: {post_data['url']}"
                    
                    formatted_posts.append(f"{post_title}\n\n{post_data['linkedin_post']}{post_url}\n\n" + "="*80 + "\n")
            
            return "\n".join(formatted_posts) if formatted_posts else "No valid LinkedIn posts generated yet."

        def handle_search(query_text, days_back, news_source_choice):
            """Handle search and populate article dropdown"""
            try:
                # Map UI selector to backend values
                news_selector_map = {
                    "Google News": 1,
                    "News API": 2,
                    "Arxiv": 3
                }
                
                news_selector = news_selector_map.get(news_source_choice, 2)  # Default to News API
                
                print(f" News Source Selector {news_source_choice}")

                print(f" News Selector {news_selector}")

                if news_selector==1:
                    articles = get_google_news(query_text, days_back)
                elif news_selector==2:
                    articles, df = get_news_articles(query_text, days_back, "gnews")
                elif news_selector==3:
                    articles, df = get_news_articles(query_text, days_back, "arxiv")
                else:
                    articles, df = get_news_articles(query_text, days_back, "gnews")

                print(f"Sourcing articles completed found {len(articles)} results using {news_source_choice}")

                if not articles:
                    return (
                        "No articles found for the given query.", 
                        gr.update(choices=[], visible=False),
                        gr.update(visible=False),
                        gr.update(visible=True),
                        gr.update(visible=False),
                        articles
                    )
                
                print(articles)
                # Create dropdown choices with article info
                choices = []
                for i, article in enumerate(articles):

                    title = article.get('title', 'No title')
                    domain = article.get('domain', 'Unknown')
                    date = article.get('date', 'Unknown')
                    # Format: "index: title (domain - date)"
                    choice_text = f"{i}: {title[:80]}{'...' if len(title) > 80 else ''} ({domain} - {date})"
                    choices.append(choice_text)
                
                status_msg = f"Found {len(articles)} articles using {news_source_choice}. Select an article from the dropdown below."
                
                return (
                    status_msg,
                    gr.update(choices=choices, visible=True, value=None),
                    gr.update(visible=True),
                    gr.update(visible=False),
                    gr.update(visible=False),
                    articles
                )
                
            except Exception as e:
                return (
                    f"Search error: {str(e)}", 
                    gr.update(choices=[], visible=False),
                    gr.update(visible=False),
                    gr.update(visible=True),
                    gr.update(visible=False),
                    []
                )

        def on_article_selected(selected_choice, articles):
            """Handle article selection from dropdown"""
            if not selected_choice or not articles:
                return gr.update(visible=False), gr.update(visible=False)
            
            try:
                # Extract index from choice text (format: "index: title...")
                article_index = int(selected_choice.split(':')[0])
                
                if 0 <= article_index < len(articles):
                    article = articles[article_index]
                    
                    # Format article info
                    article_info = f"""
                                    ### Selected Article
                                    **{article.get('title', 'No title')}**  
                                    {article.get('domain', 'Unknown')} | {article.get('date', 'Unknown')}  
                                    [Read Article]({clean_url(article.get('link', '#'))})

                                    {article.get('desc', 'No description')[:200]}{'...' if len(article.get('desc', '')) > 200 else ''}
                                    """
                    
                    return gr.update(value=article_info, visible=True), gr.update(visible=True)
                    
            except Exception as e:
                print(f"Error in article selection: {e}")
            
            return gr.update(visible=False), gr.update(visible=False)

        def process_selected_article(selected_choice, articles, use_local_models, user_interests, current_posts):
            """Process the selected article and generate LinkedIn post"""
            if not selected_choice or not articles:
                return "No article selected", "", current_posts, gr.update(visible=False)
            
            try:
                # Extract index from choice text
                article_index = int(selected_choice.split(':')[0])
                
                if article_index >= len(articles):
                    return "Invalid article selection", "", current_posts, gr.update(visible=False)
                
                article = articles[article_index]
                
                # Show processing status
                status_msg = f"Processing: {article.get('title', 'Unknown title')}\n\nExtracting content and generating LinkedIn post...\nThis may take 30-60 seconds."
                
                # Process the article using existing function
                result = process_single_article(article, user_interests)
                
                if "error" in result:
                    error_msg = f"Error processing article: {result['error']}"
                    return error_msg, "", current_posts, gr.update(visible=True)
                
                # Add new post to existing ones
                #updated_posts = current_posts + [result]
                updated_posts = [result]
                
                success_msg = f"Successfully generated LinkedIn post for:\n{article.get('title', 'Unknown title')}"
                
                return success_msg, "", updated_posts, gr.update(visible=True)
                
            except Exception as e:
                error_msg = f"Error: {str(e)}"
                return error_msg, "", current_posts, gr.update(visible=True)

        # Event bindings
        
        
        search_btn.click(
            fn=handle_search,
            inputs=[query, cutoff_days, news_source],
            outputs=[search_status, article_selector, process_article_btn, no_articles_msg, processing_status, search_results_state]
        )
        
        # Handle article selection from dropdown
        article_selector.change(
            fn=on_article_selected,
            inputs=[article_selector, search_results_state],
            outputs=[selected_article_info, processing_status]
        )
        
        # Handle article processing
        process_article_btn.click(
            fn=process_selected_article,
            inputs=[article_selector, search_results_state, use_local, interests, generated_posts_state],
            outputs=[processing_status, selected_article_info, generated_posts_state, processing_status]
        )
        
        clear_posts_btn.click(
            fn=lambda: ("", []),
            inputs=[],
            outputs=[linkedin_posts_display, generated_posts_state]
        )
        
        generated_posts_state.change(
            fn=extract_linkedin_posts,
            inputs=[generated_posts_state],
            outputs=[linkedin_posts_display]
        )
        
        generated_posts_state.change(
            fn=lambda posts_list: gr.update(visible=True, value=f"linkedin_posts_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv") if posts_list else gr.update(visible=False),
            inputs=[generated_posts_state],
            outputs=[download_btn]
        )
        
    # --- Add CSS for scrollable radio ---
    app.css = """
    .scrollable-radio {
        max-height: 300px;
        overflow-y: auto;
    }
    """
    
    return app


if __name__ == "__main__":

    
    #Initialize the model
    #print("Starting to initialize models")
    #initialize_models()
    #print("Models have been initialized")
    # Create and launch the app
    app = create_interface()
    
    print("Starting LinkedIn Post Generator...")
    print("Make sure you have installed all required packages:")
    print("   pip install gradio huggingface_hub smolagents tldextract GoogleNews gliner feedparser pandas numpy requests beautifulsoup4 python-dateutil")
    print("\nYou'll need a HuggingFace token to use the models.")
    print("The app will be available at: http://localhost:7860")
    
    #app.launch(
    #    server_name="0.0.0.0",  # Allow access from other devices on network
    #    server_port=7860,
    #    share=False,  # Set to True if you want a public link
    #    debug=True
    #)

    app.launch(ssr_mode=True)