import pandas as pd import gradio as gr from sentence_transformers import SentenceTransformer, util # Load merged influencer dataset # Columns: Rank, Name, Followers, ER, Country, Niche, Reach, Source File, Source Path df = pd.read_csv("top_100_influencers_combined.csv") df.fillna("", inplace=True) # Extract platform name from Source File (e.g., 'youtube_data_greece.csv' -> 'Youtube') df['Platform'] = df['Source File'].str.split('_').str[0].str.capitalize() # Prepare text for embedding (include platform) profile_fields = ["Name", "Platform", "Niche", "Country"] df["profile_text"] = df[profile_fields].agg(" - ".join, axis=1) # Load embedding model model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True) # Recommendation logic: find top 3 by cosine similarity def recommend_influencers(brand_description): query_embedding = model.encode(brand_description, convert_to_tensor=True) cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0] top_indices = cosine_scores.topk(3).indices.tolist() recs = [] for idx in top_indices: row = df.iloc[idx] recs.append({ "Name": row["Name"], "Platform": row.get("Platform", ""), "Niche": row["Niche"], "Country": row["Country"], "ER": row.get("ER", "N/A"), "Followers": row["Followers"], "Reach": row.get("Reach", "") }) return recs # Format recommendations into styled HTML cards def format_output(brand_input): recs = recommend_influencers(brand_input) html = "" for i, rec in enumerate(recs, 1): html += f"""
Niche: {rec['Niche']}
Country: {rec['Country']}
Engagement: {rec['ER']}
Followers: {rec['Followers']}
{f"Reach: {rec['Reach']}
" if rec['Reach'] else ""}