Spaces:

Agents-MCP-Hackathon
/

Founder_Name_Extraction_v2

Sleeping

File size: 6,898 Bytes

cdb081c
a34296b
 
 
b3950a6
 
3238b9e
 
cdb081c
 
725cd97
cdb081c
725cd97
cdb081c
5aafe64
ddd51b5
5aafe64
cdb081c
 
3238b9e
cdb081c
 
 
 
 
 
 
 
5aafe64
cdb081c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5aafe64
41dc56e
cdb081c
054936e
cdb081c
 
054936e
cdb081c
3238b9e
cdb081c
725cd97
cdb081c
41dc56e
 
cdb081c
3238b9e
5aafe64
cdb081c
ddd51b5
cdb081c
5aafe64
cdb081c
 
 
054936e
cdb081c
 
 
5aafe64
 
 
 
 
 
 
3238b9e
cdb081c
 
5aafe64
cdb081c
5aafe64
 
 
cdb081c
 
 
5aafe64
 
 
cdb081c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41dc56e
3238b9e
5aafe64
3238b9e
 
cdb081c
41dc56e
7c6b357
 
 
41dc56e
cdb081c
5aafe64
 
725cd97
cdb081c
 
41dc56e
cdb081c
 
41dc56e
cdb081c
 
99f18e1
 
41dc56e
cdb081c
 
 
 
5aafe64
cdb081c
 
 
41dc56e
5aafe64
cdb081c
 
5aafe64
 
cdb081c
 
5aafe64
 
99f18e1
8230bce
196bf92
 
3238b9e
054936e
196bf92

import gradio as gr
import requests
import re
from duckduckgo_search import DDGS
import anthropic
import os
import json

# Initialize clients
anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

# === 1. Simplified Search Workflow ===

def search_workflow(name: str, progress=gr.Progress()):
    """
    A simple function to search for articles, fetching exactly 8 news articles: 4 recent, 4 historical.
    """
    if not name or not name.strip():
        return "❌ Please enter a company name.", ""

    progress(0, desc="Starting search...")
    
    # Define search queries
    recent_keywords = f'"{name}" founder news'
    historical_keywords = f'"{name}" founder history origin'
    
    all_articles_markdown = []
    raw_text_for_ai = ""

    try:
        with DDGS(timeout=20) as ddgs:
            # --- Fetch 4 Recent Articles (past year) ---
            progress(0.1, desc="Searching for recent articles...")
            # The 'timelimit="y"' parameter is a reliable way to get recent results.
            recent_results = ddgs.text(keywords=recent_keywords, max_results=4, timelimit='y') or []
            
            for i, res in enumerate(recent_results):
                title = res.get('title', 'No Title')
                url = res.get('href', '#')
                body = res.get('body', 'No snippet available.')
                
                # Format for display
                markdown = f"### (Recent) {title}\n**Source**: [{url}]({url})\n\n{body}\n"
                all_articles_markdown.append(markdown)
                
                # Format for AI
                raw_text_for_ai += f"Article (Recent):\nTitle: {title}\nContent: {body}\n\n"

            # --- Fetch 4 Historical Articles ---
            progress(0.5, desc="Searching for historical articles...")
            historical_results = ddgs.text(keywords=historical_keywords, max_results=4) or []

            for i, res in enumerate(historical_results):
                title = res.get('title', 'No Title')
                url = res.get('href', '#')
                body = res.get('body', 'No snippet available.')
                
                # Format for display
                markdown = f"### (Historical) {title}\n**Source**: [{url}]({url})\n\n{body}\n"
                all_articles_markdown.append(markdown)
                
                # Format for AI
                raw_text_for_ai += f"Article (Historical):\nTitle: {title}\nContent: {body}\n\n"

    except Exception as e:
        return f"❌ An error occurred during search: {e}", ""

    if not all_articles_markdown:
        return "[INFO] No articles found for that company.", ""
    
    progress(1.0, desc="Search complete!")
    
    final_markdown = f"## Found {len(all_articles_markdown)} Articles\n\n" + "\n---\n".join(all_articles_markdown)
    
    return final_markdown, raw_text_for_ai


# === 2. Simplified Extraction Workflow ===

def extraction_workflow(raw_text: str, company_name: str, progress=gr.Progress()):
    """
    A simple and robust tool to extract founders from text using the AI model.
    """
    if not raw_text or not raw_text.strip():
        return "❌ Please run a search first to get text to analyze."

    progress(0, desc="Preparing prompt for AI...")
    
    prompt = f"""From the provided article snippets about "{company_name}", extract the names of individuals explicitly identified as a founder.
Return a single, valid JSON object with the structure: {{"founders": [{{"name": "Founder's Name", "evidence": "A brief quote or context."}}]}}
If no founders are mentioned, return an empty list: {{"founders": []}}.
Do not add any text outside the JSON object.

ARTICLES:
---
{raw_text[:20000]}
---
"""
    try:
        progress(0.5, desc="Sending request to AI model...")
        message = anthropic_client.messages.create(
            model="claude-sonnet-4-20250514", # As requested
            max_tokens=1024,
            temperature=0.0,
            messages=[{"role": "user", "content": prompt}]
        )

        # This robust check prevents the 'list index out of range' error.
        if message and message.content and isinstance(message.content, list) and len(message.content) > 0:
            text_block = message.content[0]
            if hasattr(text_block, 'text'):
                json_text = text_block.text
                
                # Clean the response to find the JSON object
                match = re.search(r'\{.*\}', json_text, re.DOTALL)
                if match:
                    clean_json = match.group(0)
                    try:
                        parsed_json = json.loads(clean_json)
                        formatted_json = json.dumps(parsed_json, indent=2)
                        progress(1.0, desc="Extraction complete!")
                        return f"```json\n{formatted_json}\n```"
                    except json.JSONDecodeError:
                        return f"⚠️ **AI Warning**: The model returned malformed JSON.\n\n{clean_json}"
                else:
                    return f"⚠️ **AI Warning**: The model did not return a JSON object.\n\n{json_text}"
            
        return "❌ **API Error**: The AI model returned an empty or invalid response."

    except Exception as e:
        return f"❌ **An unexpected error occurred during extraction**: {e}"


# === 3. Simplified Gradio UI ===

with gr.Blocks(title="Founder Name Extraction Tool", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🔎 Founder Name Extraction")
    gr.Markdown("A tool to find company founders. **Step 1:** Search for articles. **Step 2:** Extract founders' names from the results.")
    
    # Hidden state to pass text from search to extraction
    search_results_for_ai = gr.State("")

    with gr.Row():
        name_input = gr.Textbox(label="Company Name", placeholder="e.g., 'OpenAI', 'SpaceX'", scale=3)
        search_btn = gr.Button("1. 🔍 Search for Articles", variant="primary", scale=1)
    
    with gr.Row():
        extract_btn = gr.Button("2. 📊 Extract Founders from Search Results", variant="secondary")

    with gr.Tab("Search Results"):
        output_search = gr.Markdown()
    with gr.Tab("Founder Intelligence Report"):
        output_extract = gr.Markdown()

    # --- Event Wiring ---
    
    # Search button populates the search results tab and the hidden state
    search_btn.click(
        fn=search_workflow,
        inputs=[name_input],
        outputs=[output_search, search_results_for_ai],
        show_progress="full"
    )
    
    # Extract button uses the hidden state to populate the extraction tab
    extract_btn.click(
        fn=extraction_workflow,
        inputs=[search_results_for_ai, name_input],
        outputs=[output_extract],
        show_progress="full"
    )
    


demo.queue()

if __name__ == "__main__":
    demo.launch(show_error=True)