jaothan commited on
Commit
06f7804
·
verified ·
1 Parent(s): b864a31

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import gradio as gr
4
+ from sentence_transformers import SentenceTransformer
5
+ import pinecone
6
+
7
+ # Initialize Pinecone
8
+ PINECONE_API_KEY = "your-pinecone-api-key" # Replace with your Pinecone API key
9
+ pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
10
+ index_name = 'company-recommendations'
11
+
12
+ # Load the dataset (replace with your dataset)
13
+ def load_data():
14
+ # Example dataset with company descriptions and regions
15
+ data = pd.read_csv('company_data.csv') # Replace with your dataset
16
+ data["id"] = range(len(data))
17
+ return data
18
+
19
+ # Generate embeddings and upload to Pinecone
20
+ def prepare_and_upload_data(data):
21
+ model = SentenceTransformer('all-MiniLM-L6-v2') # Lightweight model for embeddings
22
+ print("Encoding company descriptions...")
23
+ encoded_descriptions = model.encode(data['description'])
24
+ data['description_vector'] = pd.Series(encoded_descriptions.tolist())
25
+
26
+ print("Uploading items to Pinecone...")
27
+ items_to_upload = [(str(row.id), row.description_vector, {"region": row.region}) for _, row in data.iterrows()]
28
+ for i in range(0, len(items_to_upload), 500): # Batch size of 500
29
+ pc.Index(index_name).upsert(vectors=items_to_upload[i:i+500])
30
+
31
+ # Query Pinecone for top 5 matching companies in a specific region
32
+ def get_top_companies(description, region, top_k=5):
33
+ model = SentenceTransformer('all-MiniLM-L6-v2')
34
+ query_vector = model.encode(description)
35
+
36
+ # Query Pinecone with region filter
37
+ res = pc.Index(index_name).query(
38
+ vector=query_vector,
39
+ top_k=top_k,
40
+ filter={"region": region}
41
+ )
42
+
43
+ # Extract results
44
+ ids = [match.id for match in res.matches]
45
+ scores = [match.score for match in res.matches]
46
+ df = pd.DataFrame({
47
+ 'id': ids,
48
+ 'score': scores,
49
+ 'name': [data.loc[int(_id), 'name'] for _id in ids],
50
+ 'description': [data.loc[int(_id), 'description'] for _id in ids],
51
+ 'region': [data.loc[int(_id), 'region'] for _id in ids]
52
+ })
53
+ return df
54
+
55
+ # Gradio Interface
56
+ def gradio_interface(description, region):
57
+ data = load_data()
58
+ prepare_and_upload_data(data)
59
+ top_companies = get_top_companies(description, region)
60
+ return top_companies
61
+
62
+ # Launch Gradio App
63
+ iface = gr.Interface(
64
+ fn=gradio_interface,
65
+ inputs=[
66
+ gr.Textbox(label="Enter your company services description"),
67
+ gr.Dropdown(["North America", "Europe", "Asia", "South America", "Africa", "Australia"], label="Select Region")
68
+ ],
69
+ outputs=gr.Dataframe(label="Top 5 Matching Companies"),
70
+ title="Company Recommendation Engine",
71
+ description="Enter your company services description and select a region to find the top 5 matching companies."
72
+ )
73
+
74
+ iface.launch()