import pandas as pd import numpy as np import gradio as gr from sentence_transformers import SentenceTransformer import pinecone # Initialize Pinecone PINECONE_API_KEY = "your-pinecone-api-key" # Replace with your Pinecone API key pc = pinecone.Pinecone(api_key=PINECONE_API_KEY) index_name = 'company-recommendations' # Load the dataset (replace with your dataset) def load_data(): # Example dataset with company descriptions and regions data = pd.read_csv('company_data.csv') # Replace with your dataset data["id"] = range(len(data)) return data # Generate embeddings and upload to Pinecone def prepare_and_upload_data(data): model = SentenceTransformer('all-MiniLM-L6-v2') # Lightweight model for embeddings print("Encoding company descriptions...") encoded_descriptions = model.encode(data['description']) data['description_vector'] = pd.Series(encoded_descriptions.tolist()) print("Uploading items to Pinecone...") items_to_upload = [(str(row.id), row.description_vector, {"region": row.region}) for _, row in data.iterrows()] for i in range(0, len(items_to_upload), 500): # Batch size of 500 pc.Index(index_name).upsert(vectors=items_to_upload[i:i+500]) # Query Pinecone for top 5 matching companies in a specific region def get_top_companies(description, region, top_k=5): model = SentenceTransformer('all-MiniLM-L6-v2') query_vector = model.encode(description) # Query Pinecone with region filter res = pc.Index(index_name).query( vector=query_vector, top_k=top_k, filter={"region": region} ) # Extract results ids = [match.id for match in res.matches] scores = [match.score for match in res.matches] df = pd.DataFrame({ 'id': ids, 'score': scores, 'name': [data.loc[int(_id), 'name'] for _id in ids], 'description': [data.loc[int(_id), 'description'] for _id in ids], 'region': [data.loc[int(_id), 'region'] for _id in ids] }) return df # Gradio Interface def gradio_interface(description, region): data = load_data() prepare_and_upload_data(data) top_companies = get_top_companies(description, region) return top_companies # Launch Gradio App iface = gr.Interface( fn=gradio_interface, inputs=[ gr.Textbox(label="Enter your company services description"), gr.Dropdown(["North America", "Europe", "Asia", "South America", "Africa", "Australia"], label="Select Region") ], outputs=gr.Dataframe(label="Top 5 Matching Companies"), title="Company Recommendation Engine", description="Enter your company services description and select a region to find the top 5 matching companies." ) iface.launch()