import streamlit as st import os import json import cloudscraper import requests import pandas as pd import time from bs4 import BeautifulSoup from groq import Groq # --- SECURITY OVERRIDE: HARDCODED KEYS --- # Repository MUST be private if keys are hardcoded here. GROQ_API_KEY = "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" APOLLO_API_KEY = "afmLI7smM_MfmIPRQnxV3g" os.environ["GROQ_API_KEY"] = GROQ_API_KEY client = Groq(api_key=os.environ["GROQ_API_KEY"]) # --- UI CONFIGURATION --- st.set_page_config(page_title="TradeApollo | Intent Engine", page_icon="🎯", layout="wide") # --- ENGINE FUNCTIONS --- def fetch_clean_text(url): clean_url = url.replace('[', '').replace(']', '') if clean_url.startswith('http') and '(' in clean_url: clean_url = clean_url.split('(')[0] try: scraper = cloudscraper.create_scraper(browser={'browser': 'chrome', 'platform': 'windows', 'desktop': True}) response = scraper.get(clean_url, timeout=15) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') return soup.get_text(separator=' ', strip=True) except Exception: return None def extract_trigger_events(clean_text, niche): prompt = f""" You are a ruthless B2B data analyst looking for high-intent leads in the following niche: "{niche}". Analyze the text and identify 'Trigger Events' (e.g., Seed Funding, Series A/B Funding, Executive Hires). CRITICAL RULES: 1. EXCLUDE MEGA-CAPS: Do not extract news about Apple, Google, Meta, Amazon, etc. 2. TARGET NICHE ONLY: Only extract companies relevant to the user's niche: "{niche}". 3. NO DOMAIN GUESSING: If the exact website URL is not explicitly in the text, return "None" for domain_url. Do not guess. 4. If the domain is "None", exclude the company entirely. Respond in pure JSON containing a single key "trigger_events" mapping to a list of dictionaries with keys: "company_name", "domain_url", "trigger_event_type", "event_summary", "icebreaker_line". Text: {clean_text[:15000]} """ try: chat_completion = client.chat.completions.create( messages=[ {"role": "system", "content": "You output strict JSON only."}, {"role": "user", "content": prompt} ], model="llama-3.3-70b-versatile", response_format={"type": "json_object"}, temperature=0, ) return json.loads(chat_completion.choices[0].message.content).get("trigger_events", []) except Exception: return [] def enrich_lead_data(domain): url = "https://api.apollo.io/v1/people/search" headers = {"Cache-Control": "no-cache", "Content-Type": "application/json"} data = { "api_key": APOLLO_API_KEY, "q_organization_domains": domain, "person_titles": ["CEO", "Founder", "Vice President", "CTO", "Director"], "page": 1 } try: response = requests.post(url, headers=headers, json=data) results = response.json() if results.get('people') and len(results['people']) > 0: target = results['people'][0] email = target.get('email') name = target.get('name', 'Executive') title = target.get('title', 'Leadership') if email: return name, title, email except Exception: pass return None, None, None # --- THE FRONT-END --- st.title("TradeApollo: Intent Signal Refinery 🎯") st.markdown("Enter your target niche to autonomously hunt recently funded startups and executive hires in real-time.") target_niche = st.text_input("Target B2B Niche (e.g., 'Fintech', 'SaaS', 'Healthcare Startups')") start_hunt = st.button("Deploy Shadow Scout") if start_hunt and target_niche: with st.spinner(f"Hunting high-intent '{target_niche}' targets across global PR wires... This takes about 15 seconds."): # We target a single, high-density wire to keep the web app fast target_urls = ["https://techcrunch.com/category/startups/", "https://www.finsmes.com/category/venture-capital"] extracted_leads = [] for url in target_urls: raw_text = fetch_clean_text(url) if raw_text: events = extract_trigger_events(raw_text, target_niche) for item in events: domain = item.get('domain_url') if domain and domain != "None": name, title, email = enrich_lead_data(domain) if email: # Create a masked version of the email for the teaser masked_email = email[0] + "***@" + email.split('@')[1] extracted_leads.append({ "Company": item.get('company_name'), "Event": item.get('trigger_event_type'), "Executive Name": name, "Executive Title": title, "Domain": domain, "Verified Email": masked_email, # Masked for the UI "_real_email": email, # Hidden real email for the CSV "Icebreaker": item.get('icebreaker_line') }) time.sleep(1) # Tiny throttle for the web app if extracted_leads: st.success(f"Target Acquisition Complete. {len(extracted_leads)} High-Value Leads Extracted.") # Create DataFrames df_display = pd.DataFrame(extracted_leads).drop(columns=['_real_email']) df_real = pd.DataFrame(extracted_leads).drop(columns=['Verified Email']).rename(columns={'_real_email': 'Verified Email'}) # Display the blurred teaser table st.table(df_display) st.warning("🔒 Emails are cryptographically locked. Enter an active TradeApollo License Key to decrypt and download the raw CSV payload.") # --- THE PAYWALL --- st.markdown("---") st.subheader("Unlock Payload") col1, col2 = st.columns(2) with col1: st.markdown("[Purchase a License Key for $49 via Whop](https://whop.com/tradeapollo)") with col2: license_key = st.text_input("Enter License Key", type="password") if st.button("Decrypt & Download CSV"): if license_key == "TRADEAPOLLO-ALPHA-777": # Your master key st.success("Key Verified. Payload Unlocked.") # Generate CSV csv_data = df_real.to_csv(index=False).encode('utf-8') st.download_button( label="Download Enriched CSV", data=csv_data, file_name=f"TradeApollo_{target_niche.replace(' ', '_')}_Leads.csv", mime="text/csv", ) else: st.error("Invalid or Expired License Key. Purchase access via Whop.") else: st.error(f"No fully verified executives found for '{target_niche}' today. Try broadening your niche.")