Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- enrich.py +45 -0
- requirements.txt +7 -3
- sample_enriched_full.csv +11 -0
- streamlit_app.py +40 -0
- utils.py +8 -0
enrich.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import whois
|
| 2 |
+
import time
|
| 3 |
+
|
| 4 |
+
def enrich_domain_data(domains):
|
| 5 |
+
enriched = []
|
| 6 |
+
for domain in domains:
|
| 7 |
+
try:
|
| 8 |
+
info = whois.whois(domain)
|
| 9 |
+
|
| 10 |
+
# Handle emails safely
|
| 11 |
+
if info.emails:
|
| 12 |
+
if isinstance(info.emails, list):
|
| 13 |
+
email = info.emails[0]
|
| 14 |
+
else:
|
| 15 |
+
email = info.emails
|
| 16 |
+
else:
|
| 17 |
+
email = ""
|
| 18 |
+
|
| 19 |
+
org = info.org if info.org else ""
|
| 20 |
+
country = info.country if info.country else ""
|
| 21 |
+
|
| 22 |
+
# No LinkedIn search to avoid rate limits or external dependencies
|
| 23 |
+
linkedin = "Not searched"
|
| 24 |
+
|
| 25 |
+
enriched.append({
|
| 26 |
+
"domain": domain,
|
| 27 |
+
"email": email,
|
| 28 |
+
"organization": org,
|
| 29 |
+
"country": country,
|
| 30 |
+
"linkedin": linkedin
|
| 31 |
+
})
|
| 32 |
+
|
| 33 |
+
time.sleep(1)
|
| 34 |
+
|
| 35 |
+
except Exception as e:
|
| 36 |
+
enriched.append({
|
| 37 |
+
"domain": domain,
|
| 38 |
+
"error": str(e),
|
| 39 |
+
"email": "",
|
| 40 |
+
"organization": "",
|
| 41 |
+
"country": "",
|
| 42 |
+
"linkedin": ""
|
| 43 |
+
})
|
| 44 |
+
|
| 45 |
+
return enriched
|
requirements.txt
CHANGED
|
@@ -1,3 +1,7 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
streamlit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
requests
|
| 3 |
+
streamlit
|
| 4 |
+
python-dotenv
|
| 5 |
+
googlesearch-python
|
| 6 |
+
duckduckgo-search
|
| 7 |
+
python-whois
|
sample_enriched_full.csv
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
domain,email,organization,country,linkedin,score
|
| 2 |
+
openai.com,contact@openai.com,OpenAI,US,https://linkedin.com/in/sam-altman,100
|
| 3 |
+
github.com,support@github.com,GitHub,US,https://linkedin.com/in/thomas-dohmke,100
|
| 4 |
+
example.com,info@example.com,Example Inc,US,https://linkedin.com/in/example-ceo,90
|
| 5 |
+
stripe.com,hello@stripe.com,Stripe,US,https://linkedin.com/in/patrick-collison,100
|
| 6 |
+
airbnb.com,team@airbnb.com,Airbnb,US,https://linkedin.com/in/brian-chesky,100
|
| 7 |
+
dropbox.com,help@dropbox.com,Dropbox,US,https://linkedin.com/in/drew-houston,100
|
| 8 |
+
notion.so,info@notion.so,Notion,US,https://linkedin.com/in/ivan-zhao,100
|
| 9 |
+
figma.com,contact@figma.com,Figma,US,https://linkedin.com/in/dylan-field,100
|
| 10 |
+
coursera.org,support@coursera.org,Coursera,US,https://linkedin.com/in/jeff-mag,100
|
| 11 |
+
quora.com,info@quora.com,Quora,US,https://linkedin.com/in/adam-dangelo,100
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from enrich import enrich_domain_data
|
| 4 |
+
from utils import score_leads
|
| 5 |
+
|
| 6 |
+
st.set_page_config(page_title="LeadSight AI", layout="wide")
|
| 7 |
+
st.title(" LeadSight AI - Smart Lead Enrichment")
|
| 8 |
+
|
| 9 |
+
# Upload CSV file
|
| 10 |
+
uploaded_file = st.file_uploader("Upload CSV with domain column", type=["csv"])
|
| 11 |
+
|
| 12 |
+
# Initialize session state
|
| 13 |
+
if "leads_df" not in st.session_state:
|
| 14 |
+
st.session_state["leads_df"] = None
|
| 15 |
+
|
| 16 |
+
# Handle file upload
|
| 17 |
+
if uploaded_file:
|
| 18 |
+
df = pd.read_csv(uploaded_file)
|
| 19 |
+
if "domain" not in df.columns:
|
| 20 |
+
st.error(" CSV must contain a 'domain' column.")
|
| 21 |
+
else:
|
| 22 |
+
if st.button(" Enrich Leads"):
|
| 23 |
+
enriched = enrich_domain_data(df["domain"].tolist())
|
| 24 |
+
scored = score_leads(enriched)
|
| 25 |
+
st.session_state["leads_df"] = pd.DataFrame(scored)
|
| 26 |
+
st.success(" Leads enriched and scored!")
|
| 27 |
+
|
| 28 |
+
# Always show results if available
|
| 29 |
+
if st.session_state["leads_df"] is not None:
|
| 30 |
+
df = st.session_state["leads_df"]
|
| 31 |
+
st.subheader(" Enriched and Scored Leads")
|
| 32 |
+
st.dataframe(df)
|
| 33 |
+
|
| 34 |
+
csv = df.to_csv(index=False).encode("utf-8")
|
| 35 |
+
st.download_button(
|
| 36 |
+
label=" Download All Leads",
|
| 37 |
+
data=csv,
|
| 38 |
+
file_name="enriched_leads.csv",
|
| 39 |
+
mime="text/csv"
|
| 40 |
+
)
|
utils.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def score_leads(leads):
|
| 2 |
+
for lead in leads:
|
| 3 |
+
score = 0
|
| 4 |
+
if lead.get("email"): score += 40
|
| 5 |
+
if lead.get("organization"): score += 30
|
| 6 |
+
if lead.get("linkedin"): score += 30
|
| 7 |
+
lead["score"] = score
|
| 8 |
+
return leads
|