File size: 6,898 Bytes
cdb081c a34296b b3950a6 3238b9e cdb081c 725cd97 cdb081c 725cd97 cdb081c 5aafe64 ddd51b5 5aafe64 cdb081c 3238b9e cdb081c 5aafe64 cdb081c 5aafe64 41dc56e cdb081c 054936e cdb081c 054936e cdb081c 3238b9e cdb081c 725cd97 cdb081c 41dc56e cdb081c 3238b9e 5aafe64 cdb081c ddd51b5 cdb081c 5aafe64 cdb081c 054936e cdb081c 5aafe64 3238b9e cdb081c 5aafe64 cdb081c 5aafe64 cdb081c 5aafe64 cdb081c 41dc56e 3238b9e 5aafe64 3238b9e cdb081c 41dc56e 7c6b357 41dc56e cdb081c 5aafe64 725cd97 cdb081c 41dc56e cdb081c 41dc56e cdb081c 99f18e1 41dc56e cdb081c 5aafe64 cdb081c 41dc56e 5aafe64 cdb081c 5aafe64 cdb081c 5aafe64 99f18e1 8230bce 196bf92 3238b9e 054936e 196bf92 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | import gradio as gr
import requests
import re
from duckduckgo_search import DDGS
import anthropic
import os
import json
# Initialize clients
anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
# === 1. Simplified Search Workflow ===
def search_workflow(name: str, progress=gr.Progress()):
"""
A simple function to search for articles, fetching exactly 8 news articles: 4 recent, 4 historical.
"""
if not name or not name.strip():
return "β Please enter a company name.", ""
progress(0, desc="Starting search...")
# Define search queries
recent_keywords = f'"{name}" founder news'
historical_keywords = f'"{name}" founder history origin'
all_articles_markdown = []
raw_text_for_ai = ""
try:
with DDGS(timeout=20) as ddgs:
# --- Fetch 4 Recent Articles (past year) ---
progress(0.1, desc="Searching for recent articles...")
# The 'timelimit="y"' parameter is a reliable way to get recent results.
recent_results = ddgs.text(keywords=recent_keywords, max_results=4, timelimit='y') or []
for i, res in enumerate(recent_results):
title = res.get('title', 'No Title')
url = res.get('href', '#')
body = res.get('body', 'No snippet available.')
# Format for display
markdown = f"### (Recent) {title}\n**Source**: [{url}]({url})\n\n{body}\n"
all_articles_markdown.append(markdown)
# Format for AI
raw_text_for_ai += f"Article (Recent):\nTitle: {title}\nContent: {body}\n\n"
# --- Fetch 4 Historical Articles ---
progress(0.5, desc="Searching for historical articles...")
historical_results = ddgs.text(keywords=historical_keywords, max_results=4) or []
for i, res in enumerate(historical_results):
title = res.get('title', 'No Title')
url = res.get('href', '#')
body = res.get('body', 'No snippet available.')
# Format for display
markdown = f"### (Historical) {title}\n**Source**: [{url}]({url})\n\n{body}\n"
all_articles_markdown.append(markdown)
# Format for AI
raw_text_for_ai += f"Article (Historical):\nTitle: {title}\nContent: {body}\n\n"
except Exception as e:
return f"β An error occurred during search: {e}", ""
if not all_articles_markdown:
return "[INFO] No articles found for that company.", ""
progress(1.0, desc="Search complete!")
final_markdown = f"## Found {len(all_articles_markdown)} Articles\n\n" + "\n---\n".join(all_articles_markdown)
return final_markdown, raw_text_for_ai
# === 2. Simplified Extraction Workflow ===
def extraction_workflow(raw_text: str, company_name: str, progress=gr.Progress()):
"""
A simple and robust tool to extract founders from text using the AI model.
"""
if not raw_text or not raw_text.strip():
return "β Please run a search first to get text to analyze."
progress(0, desc="Preparing prompt for AI...")
prompt = f"""From the provided article snippets about "{company_name}", extract the names of individuals explicitly identified as a founder.
Return a single, valid JSON object with the structure: {{"founders": [{{"name": "Founder's Name", "evidence": "A brief quote or context."}}]}}
If no founders are mentioned, return an empty list: {{"founders": []}}.
Do not add any text outside the JSON object.
ARTICLES:
---
{raw_text[:20000]}
---
"""
try:
progress(0.5, desc="Sending request to AI model...")
message = anthropic_client.messages.create(
model="claude-sonnet-4-20250514", # As requested
max_tokens=1024,
temperature=0.0,
messages=[{"role": "user", "content": prompt}]
)
# This robust check prevents the 'list index out of range' error.
if message and message.content and isinstance(message.content, list) and len(message.content) > 0:
text_block = message.content[0]
if hasattr(text_block, 'text'):
json_text = text_block.text
# Clean the response to find the JSON object
match = re.search(r'\{.*\}', json_text, re.DOTALL)
if match:
clean_json = match.group(0)
try:
parsed_json = json.loads(clean_json)
formatted_json = json.dumps(parsed_json, indent=2)
progress(1.0, desc="Extraction complete!")
return f"```json\n{formatted_json}\n```"
except json.JSONDecodeError:
return f"β οΈ **AI Warning**: The model returned malformed JSON.\n\n{clean_json}"
else:
return f"β οΈ **AI Warning**: The model did not return a JSON object.\n\n{json_text}"
return "β **API Error**: The AI model returned an empty or invalid response."
except Exception as e:
return f"β **An unexpected error occurred during extraction**: {e}"
# === 3. Simplified Gradio UI ===
with gr.Blocks(title="Founder Name Extraction Tool", theme=gr.themes.Soft()) as demo:
gr.Markdown("# π Founder Name Extraction")
gr.Markdown("A tool to find company founders. **Step 1:** Search for articles. **Step 2:** Extract founders' names from the results.")
# Hidden state to pass text from search to extraction
search_results_for_ai = gr.State("")
with gr.Row():
name_input = gr.Textbox(label="Company Name", placeholder="e.g., 'OpenAI', 'SpaceX'", scale=3)
search_btn = gr.Button("1. π Search for Articles", variant="primary", scale=1)
with gr.Row():
extract_btn = gr.Button("2. π Extract Founders from Search Results", variant="secondary")
with gr.Tab("Search Results"):
output_search = gr.Markdown()
with gr.Tab("Founder Intelligence Report"):
output_extract = gr.Markdown()
# --- Event Wiring ---
# Search button populates the search results tab and the hidden state
search_btn.click(
fn=search_workflow,
inputs=[name_input],
outputs=[output_search, search_results_for_ai],
show_progress="full"
)
# Extract button uses the hidden state to populate the extraction tab
extract_btn.click(
fn=extraction_workflow,
inputs=[search_results_for_ai, name_input],
outputs=[output_extract],
show_progress="full"
)
demo.queue()
if __name__ == "__main__":
demo.launch(show_error=True)
|