Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| import pinecone | |
| # Initialize Pinecone | |
| PINECONE_API_KEY = "your-pinecone-api-key" # Replace with your Pinecone API key | |
| pc = pinecone.Pinecone(api_key=PINECONE_API_KEY) | |
| index_name = 'company-recommendations' | |
| # Load the dataset (replace with your dataset) | |
| def load_data(): | |
| # Example dataset with company descriptions and regions | |
| data = pd.read_csv('company_data.csv') # Replace with your dataset | |
| data["id"] = range(len(data)) | |
| return data | |
| # Generate embeddings and upload to Pinecone | |
| def prepare_and_upload_data(data): | |
| model = SentenceTransformer('all-MiniLM-L6-v2') # Lightweight model for embeddings | |
| print("Encoding company descriptions...") | |
| encoded_descriptions = model.encode(data['description']) | |
| data['description_vector'] = pd.Series(encoded_descriptions.tolist()) | |
| print("Uploading items to Pinecone...") | |
| items_to_upload = [(str(row.id), row.description_vector, {"region": row.region}) for _, row in data.iterrows()] | |
| for i in range(0, len(items_to_upload), 500): # Batch size of 500 | |
| pc.Index(index_name).upsert(vectors=items_to_upload[i:i+500]) | |
| # Query Pinecone for top 5 matching companies in a specific region | |
| def get_top_companies(description, region, top_k=5): | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| query_vector = model.encode(description) | |
| # Query Pinecone with region filter | |
| res = pc.Index(index_name).query( | |
| vector=query_vector, | |
| top_k=top_k, | |
| filter={"region": region} | |
| ) | |
| # Extract results | |
| ids = [match.id for match in res.matches] | |
| scores = [match.score for match in res.matches] | |
| df = pd.DataFrame({ | |
| 'id': ids, | |
| 'score': scores, | |
| 'name': [data.loc[int(_id), 'name'] for _id in ids], | |
| 'description': [data.loc[int(_id), 'description'] for _id in ids], | |
| 'region': [data.loc[int(_id), 'region'] for _id in ids] | |
| }) | |
| return df | |
| # Gradio Interface | |
| def gradio_interface(description, region): | |
| data = load_data() | |
| prepare_and_upload_data(data) | |
| top_companies = get_top_companies(description, region) | |
| return top_companies | |
| # Launch Gradio App | |
| iface = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=[ | |
| gr.Textbox(label="Enter your company services description"), | |
| gr.Dropdown(["North America", "Europe", "Asia", "South America", "Africa", "Australia"], label="Select Region") | |
| ], | |
| outputs=gr.Dataframe(label="Top 5 Matching Companies"), | |
| title="Company Recommendation Engine", | |
| description="Enter your company services description and select a region to find the top 5 matching companies." | |
| ) | |
| iface.launch() | |