Spaces:

jaothan
/

crunchbase_test1

Sleeping

App Files Files Community

jaothan commited on Jan 31, 2025

Commit

06f7804

verified ·

1 Parent(s): b864a31

Upload app.py

Browse files

Files changed (1) hide show

app.py +74 -0

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import pandas as pd
+import numpy as np
+import gradio as gr
+from sentence_transformers import SentenceTransformer
+import pinecone
+# Initialize Pinecone
+PINECONE_API_KEY = "your-pinecone-api-key"  # Replace with your Pinecone API key
+pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
+index_name = 'company-recommendations'
+# Load the dataset (replace with your dataset)
+def load_data():
+    # Example dataset with company descriptions and regions
+    data = pd.read_csv('company_data.csv')  # Replace with your dataset
+    data["id"] = range(len(data))
+    return data
+# Generate embeddings and upload to Pinecone
+def prepare_and_upload_data(data):
+    model = SentenceTransformer('all-MiniLM-L6-v2')  # Lightweight model for embeddings
+    print("Encoding company descriptions...")
+    encoded_descriptions = model.encode(data['description'])
+    data['description_vector'] = pd.Series(encoded_descriptions.tolist())
+    print("Uploading items to Pinecone...")
+    items_to_upload = [(str(row.id), row.description_vector, {"region": row.region}) for _, row in data.iterrows()]
+    for i in range(0, len(items_to_upload), 500):  # Batch size of 500
+        pc.Index(index_name).upsert(vectors=items_to_upload[i:i+500])
+# Query Pinecone for top 5 matching companies in a specific region
+def get_top_companies(description, region, top_k=5):
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    query_vector = model.encode(description)
+    # Query Pinecone with region filter
+    res = pc.Index(index_name).query(
+        vector=query_vector,
+        top_k=top_k,
+        filter={"region": region}
+    )
+    # Extract results
+    ids = [match.id for match in res.matches]
+    scores = [match.score for match in res.matches]
+    df = pd.DataFrame({
+        'id': ids,
+        'score': scores,
+        'name': [data.loc[int(_id), 'name'] for _id in ids],
+        'description': [data.loc[int(_id), 'description'] for _id in ids],
+        'region': [data.loc[int(_id), 'region'] for _id in ids]
+    })
+    return df
+# Gradio Interface
+def gradio_interface(description, region):
+    data = load_data()
+    prepare_and_upload_data(data)
+    top_companies = get_top_companies(description, region)
+    return top_companies
+# Launch Gradio App
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=[
+        gr.Textbox(label="Enter your company services description"),
+        gr.Dropdown(["North America", "Europe", "Asia", "South America", "Africa", "Australia"], label="Select Region")
+    ],
+    outputs=gr.Dataframe(label="Top 5 Matching Companies"),
+    title="Company Recommendation Engine",
+    description="Enter your company services description and select a region to find the top 5 matching companies."
+)
+iface.launch()