yoniif commited on
Commit
aea3859
Β·
verified Β·
1 Parent(s): 04f0f11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -72
app.py CHANGED
@@ -1,87 +1,16 @@
1
- # πŸ”§ Install dependencies (uncomment if running locally)
2
- # !pip install gradio pandas sentence-transformers
3
-
4
- import os
5
- import zipfile
6
- import requests
7
  import pandas as pd
8
  import gradio as gr
9
  from sentence_transformers import SentenceTransformer, util
10
 
11
- ### STEP 1: Download and unzip the influencer dataset from Hugging Face
12
-
13
- # url = "https://huggingface.co/spaces/yoniif/final_assignment/resolve/main/top_influencers.zip"
14
- # zip_path = "top_100_influencers.zip"
15
-
16
- # # Download zip file if not already present
17
- # if not os.path.exists(zip_path):
18
- # print("πŸ“₯ Downloading influencer dataset...")
19
- # headers = {"User-Agent": "Mozilla/5.0"}
20
- # r = requests.get(url, headers=headers)
21
-
22
- # # Confirm file is binary ZIP
23
- # if r.status_code != 200 or b"PK" not in r.content[:10]:
24
- # raise ValueError("❌ Invalid ZIP file downloaded. Check URL or access permissions.")
25
-
26
- # with open(zip_path, "wb") as f:
27
- # f.write(r.content)
28
-
29
- # # Unzip the file into a folder
30
- # unzip_dir = "influencer_data"
31
- # if not os.path.exists(unzip_dir):
32
- # print("πŸ“¦ Unzipping dataset...")
33
- # with zipfile.ZipFile(zip_path, 'r') as zip_ref:
34
- # zip_ref.extractall(unzip_dir)
35
-
36
- # ### STEP 2: Merge all CSVs into one
37
-
38
- # print("πŸ”— Merging influencer files...")
39
- # all_dfs = []
40
- # for file in os.listdir(unzip_dir):
41
- # if file.endswith(".csv"):
42
- # df = pd.read_csv(os.path.join(unzip_dir, file))
43
- # df["Source File"] = file # Optional: keep track of file origin
44
- # all_dfs.append(df)
45
-
46
- # df = pd.concat(all_dfs, ignore_index=True)
47
-
48
-
49
-
50
- # βœ… Load the combined CSV directly
51
  df = pd.read_csv("top_100_influencers_combined_sample.csv")
52
-
53
- # Fill NA just in case
54
  df.fillna("", inplace=True)
55
 
56
- # Combine fields for embeddings
57
  df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
58
 
59
-
60
-
61
- # Basic cleanup
62
- df.drop_duplicates(inplace=True)
63
- df.dropna(subset=["Name", "Niche"], inplace=True)
64
- df.fillna("", inplace=True)
65
-
66
- # Save combined dataset (optional)
67
- df.to_csv("top_100_influencers_combined.csv", index=False)
68
- print("βœ… Combined dataset ready!")
69
-
70
- ### STEP 3: Build the recommender engine
71
-
72
- # Combine fields for semantic embedding
73
- df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
74
-
75
- # Load sentence embedding model
76
- print("🧠 Loading embedding model...")
77
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
78
-
79
- # Precompute influencer embeddings
80
- print("πŸ”’ Encoding influencer profiles...")
81
  influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
82
 
83
- ### STEP 4: Define similarity search + UI
84
-
85
  def recommend_influencers(brand_description):
86
  query_embedding = model.encode(brand_description, convert_to_tensor=True)
87
  cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import gradio as gr
3
  from sentence_transformers import SentenceTransformer, util
4
 
5
+ # βœ… Load the CSV you uploaded into the Space
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  df = pd.read_csv("top_100_influencers_combined_sample.csv")
 
 
7
  df.fillna("", inplace=True)
8
 
 
9
  df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 
 
 
12
  influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
13
 
 
 
14
  def recommend_influencers(brand_description):
15
  query_embedding = model.encode(brand_description, convert_to_tensor=True)
16
  cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]