| | import gradio as gr |
| | import requests |
| | import re |
| | from duckduckgo_search import DDGS |
| | import anthropic |
| | import os |
| | import json |
| |
|
| | |
| | anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) |
| |
|
| | |
| |
|
| | def search_workflow(name: str, progress=gr.Progress()): |
| | """ |
| | A simple function to search for articles, fetching exactly 8 news articles: 4 recent, 4 historical. |
| | """ |
| | if not name or not name.strip(): |
| | return "β Please enter a company name.", "" |
| |
|
| | progress(0, desc="Starting search...") |
| | |
| | |
| | recent_keywords = f'"{name}" founder news' |
| | historical_keywords = f'"{name}" founder history origin' |
| | |
| | all_articles_markdown = [] |
| | raw_text_for_ai = "" |
| |
|
| | try: |
| | with DDGS(timeout=20) as ddgs: |
| | |
| | progress(0.1, desc="Searching for recent articles...") |
| | |
| | recent_results = ddgs.text(keywords=recent_keywords, max_results=4, timelimit='y') or [] |
| | |
| | for i, res in enumerate(recent_results): |
| | title = res.get('title', 'No Title') |
| | url = res.get('href', '#') |
| | body = res.get('body', 'No snippet available.') |
| | |
| | |
| | markdown = f"### (Recent) {title}\n**Source**: [{url}]({url})\n\n{body}\n" |
| | all_articles_markdown.append(markdown) |
| | |
| | |
| | raw_text_for_ai += f"Article (Recent):\nTitle: {title}\nContent: {body}\n\n" |
| |
|
| | |
| | progress(0.5, desc="Searching for historical articles...") |
| | historical_results = ddgs.text(keywords=historical_keywords, max_results=4) or [] |
| |
|
| | for i, res in enumerate(historical_results): |
| | title = res.get('title', 'No Title') |
| | url = res.get('href', '#') |
| | body = res.get('body', 'No snippet available.') |
| | |
| | |
| | markdown = f"### (Historical) {title}\n**Source**: [{url}]({url})\n\n{body}\n" |
| | all_articles_markdown.append(markdown) |
| | |
| | |
| | raw_text_for_ai += f"Article (Historical):\nTitle: {title}\nContent: {body}\n\n" |
| |
|
| | except Exception as e: |
| | return f"β An error occurred during search: {e}", "" |
| |
|
| | if not all_articles_markdown: |
| | return "[INFO] No articles found for that company.", "" |
| | |
| | progress(1.0, desc="Search complete!") |
| | |
| | final_markdown = f"## Found {len(all_articles_markdown)} Articles\n\n" + "\n---\n".join(all_articles_markdown) |
| | |
| | return final_markdown, raw_text_for_ai |
| |
|
| |
|
| | |
| |
|
| | def extraction_workflow(raw_text: str, company_name: str, progress=gr.Progress()): |
| | """ |
| | A simple and robust tool to extract founders from text using the AI model. |
| | """ |
| | if not raw_text or not raw_text.strip(): |
| | return "β Please run a search first to get text to analyze." |
| |
|
| | progress(0, desc="Preparing prompt for AI...") |
| | |
| | prompt = f"""From the provided article snippets about "{company_name}", extract the names of individuals explicitly identified as a founder. |
| | Return a single, valid JSON object with the structure: {{"founders": [{{"name": "Founder's Name", "evidence": "A brief quote or context."}}]}} |
| | If no founders are mentioned, return an empty list: {{"founders": []}}. |
| | Do not add any text outside the JSON object. |
| | |
| | ARTICLES: |
| | --- |
| | {raw_text[:20000]} |
| | --- |
| | """ |
| | try: |
| | progress(0.5, desc="Sending request to AI model...") |
| | message = anthropic_client.messages.create( |
| | model="claude-sonnet-4-20250514", |
| | max_tokens=1024, |
| | temperature=0.0, |
| | messages=[{"role": "user", "content": prompt}] |
| | ) |
| |
|
| | |
| | if message and message.content and isinstance(message.content, list) and len(message.content) > 0: |
| | text_block = message.content[0] |
| | if hasattr(text_block, 'text'): |
| | json_text = text_block.text |
| | |
| | |
| | match = re.search(r'\{.*\}', json_text, re.DOTALL) |
| | if match: |
| | clean_json = match.group(0) |
| | try: |
| | parsed_json = json.loads(clean_json) |
| | formatted_json = json.dumps(parsed_json, indent=2) |
| | progress(1.0, desc="Extraction complete!") |
| | return f"```json\n{formatted_json}\n```" |
| | except json.JSONDecodeError: |
| | return f"β οΈ **AI Warning**: The model returned malformed JSON.\n\n{clean_json}" |
| | else: |
| | return f"β οΈ **AI Warning**: The model did not return a JSON object.\n\n{json_text}" |
| | |
| | return "β **API Error**: The AI model returned an empty or invalid response." |
| |
|
| | except Exception as e: |
| | return f"β **An unexpected error occurred during extraction**: {e}" |
| |
|
| |
|
| | |
| |
|
| | with gr.Blocks(title="Founder Name Extraction Tool", theme=gr.themes.Soft()) as demo: |
| | gr.Markdown("# π Founder Name Extraction") |
| | gr.Markdown("A tool to find company founders. **Step 1:** Search for articles. **Step 2:** Extract founders' names from the results.") |
| | |
| | |
| | search_results_for_ai = gr.State("") |
| |
|
| | with gr.Row(): |
| | name_input = gr.Textbox(label="Company Name", placeholder="e.g., 'OpenAI', 'SpaceX'", scale=3) |
| | search_btn = gr.Button("1. π Search for Articles", variant="primary", scale=1) |
| | |
| | with gr.Row(): |
| | extract_btn = gr.Button("2. π Extract Founders from Search Results", variant="secondary") |
| |
|
| | with gr.Tab("Search Results"): |
| | output_search = gr.Markdown() |
| | with gr.Tab("Founder Intelligence Report"): |
| | output_extract = gr.Markdown() |
| |
|
| | |
| | |
| | |
| | search_btn.click( |
| | fn=search_workflow, |
| | inputs=[name_input], |
| | outputs=[output_search, search_results_for_ai], |
| | show_progress="full" |
| | ) |
| | |
| | |
| | extract_btn.click( |
| | fn=extraction_workflow, |
| | inputs=[search_results_for_ai, name_input], |
| | outputs=[output_extract], |
| | show_progress="full" |
| | ) |
| | |
| |
|
| |
|
| | demo.queue() |
| |
|
| | if __name__ == "__main__": |
| | demo.launch(show_error=True) |
| |
|
| |
|