Spaces:

heymenn
/

technology-similarity-api

Runtime error

App Files Files Community

heymenn commited on May 5, 2025

Commit

d2f7eb8

verified ·

1 Parent(s): 53aa5e2

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -244

app.py CHANGED Viewed

@@ -1,247 +1,9 @@
-import pandas as pd
-import gradio as gr
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
-import numpy as np
 import os
-from typing import List, Tuple
-# --- Configuration ---
-EXCEL_FILE_PATH = 'technologies.xlsx'  # Ensure this file is in the same directory as app.py
-MODEL_NAME = 'all-MiniLM-L6-v2'  # Efficient and good general-purpose model
-# --- Data Loading and Preprocessing ---
-technologies_df = None
-embeddings = None
-model = None
-def load_data_and_model():
-    """Loads the Excel data and the sentence transformer model."""
-    global technologies_df, embeddings, model
-    try:
-        # Check if the Excel file exists
-        if not os.path.exists(EXCEL_FILE_PATH):
-            raise FileNotFoundError(f"Error: The file '{EXCEL_FILE_PATH}' was not found.")
-        # Load data from Excel
-        technologies_df = pd.read_excel(EXCEL_FILE_PATH)
-        # Validate necessary columns
-        if 'technology' not in technologies_df.columns or 'description' not in technologies_df.columns:
-            raise ValueError("Excel file must contain 'technology' and 'description' columns.")
-        # Handle potential missing descriptions (fill with empty string or drop)
-        technologies_df['description'] = technologies_df['description'].fillna('')
-        descriptions = technologies_df['description'].tolist()
-        # Load the sentence transformer model
-        print(f"Loading sentence transformer model: {MODEL_NAME}...")
-        model = SentenceTransformer(MODEL_NAME)
-        print("Model loaded.")
-        # Generate embeddings for all technology descriptions
-        print("Generating embeddings for technology descriptions...")
-        embeddings = model.encode(descriptions, show_progress_bar=False) # Disable progress bar for Spaces
-        print("Embeddings generated.")
-    except FileNotFoundError as e:
-        print(e)
-        raise gr.Error(f"Error loading data: {e}")
-    except ValueError as e:
-        print(f"Data validation error: {e}")
-        raise gr.Error(f"Data validation error: {e}")
-    except Exception as e:
-        print(f"An unexpected error occurred during data loading: {e}")
-        raise gr.Error(f"An unexpected error occurred during data loading: {e}")
-# --- Helper Function ---
-def get_top_10_tech(problem_description: str) -> List[Tuple[int, float]]:
-    """
-    Finds the top 10 technologies based on cosine similarity to the problem description.
-    Args:
-        problem_description: The technical problem described by the user.
-    Returns:
-        A list of tuples, where each tuple contains the index of the technology
-        in the original DataFrame and its similarity score, sorted by score descending.
-        Returns an empty list if embeddings are not ready.
-    """
-    if model is None or embeddings is None or technologies_df is None:
-        raise gr.Error("Server not ready, embeddings not loaded.")
-    # Generate embedding for the input problem
-    problem_embedding = model.encode([problem_description]) # Pass as a list
-    # Calculate cosine similarity between the problem and all tech descriptions
-    # Reshape problem_embedding to 2D array for cosine_similarity
-    similarities = cosine_similarity(problem_embedding, embeddings)[0] # Get the first (and only) row
-    # Get indices of top 10 similarities
-    # argsort returns indices that would sort the array in ascending order
-    # We use [-10:] to get the indices of the 10 largest values
-    # Then we reverse it `[::-1]` to have the highest similarity first
-    top_10_indices = np.argsort(similarities)[-10:][::-1]
-    # Create list of (index, score) tuples for the top 10
-    top_10_with_scores = [(idx, similarities[idx]) for idx in top_10_indices]
-    return top_10_with_scores
-# --- Gradio Interface Functions ---
-def predict(problem_description: str):
-    """Gradio function to get the top 2 most similar technologies."""
-    try:
-        top_10_results = get_top_10_tech(problem_description)
-        if not top_10_results:
-            return "No matching technologies found."
-        top_indices = [idx for idx, _ in top_10_results[:2]]
-        result_df = technologies_df.iloc[top_indices]
-        results = []
-        for _, row in result_df.iterrows():
-            results.append(f"**Technology:** {row['technology']}\n**Description:** {row['description']}")
-        return "\n\n".join(results)
-    except gr.Error as e:
-        return str(e)
-    except Exception as e:
-        print(f"Error in prediction: {e}")
-        return "An error occurred while processing your request."
-def predict_worst(problem_description: str):
-    """Gradio function to get the bottom 2 least similar technologies from the top 10."""
-    try:
-        top_10_results = get_top_10_tech(problem_description)
-        if len(top_10_results) < 2:
-            return "Not enough matching technologies to find the least similar."
-        bottom_indices = [idx for idx, _ in top_10_results[-2:]]
-        result_df = technologies_df.iloc[bottom_indices]
-        results = []
-        for _, row in result_df.iterrows():
-            results.append(f"**Technology:** {row['technology']}\n**Description:** {row['description']}")
-        return "\n\n".join(results)
-    except gr.Error as e:
-        return str(e)
-    except Exception as e:
-        print(f"Error in predict_worst: {e}")
-        return "An error occurred while processing your request."
-def predict_most_similar_pairs(problem_description: str):
-    """Gradio function to get the two most similar pairs of technologies from the top 10."""
-    try:
-        top_10_results = get_top_10_tech(problem_description)
-        if len(top_10_results) < 2:
-            return "Not enough matching technologies to form pairs."
-        top_10_indices = [idx for idx, _ in top_10_results]
-        top_10_embeddings = embeddings[top_10_indices]
-        top_10_df = technologies_df.iloc[top_10_indices].reset_index(drop=True)
-        pairwise_similarities = cosine_similarity(top_10_embeddings)
-        pairs = []
-        for i in range(len(top_10_df)):
-            for j in range(i + 1, len(top_10_df)):
-                score = pairwise_similarities[i, j]
-                pairs.append(((i, top_10_df['technology'][i], top_10_df['description'][i]),
-                              (j, top_10_df['technology'][j], top_10_df['description'][j]),
-                              score))
-        sorted_pairs = sorted(pairs, key=lambda x: x[2], reverse=True)
-        top_2_pairs_output = []
-        num_pairs_to_return = min(2, len(sorted_pairs))
-        for i in range(num_pairs_to_return):
-            (idx1, tech1, desc1), (idx2, tech2, desc2), score = sorted_pairs[i]
-            top_2_pairs_output.append(f"**Pair {i+1}:**\n"
-                                      f"**Technology 1:** {tech1}\nDescription: {desc1}\n"
-                                      f"**Technology 2:** {tech2}\nDescription: {desc2}\n"
-                                      f"**Similarity Score:** {score:.4f}\n\n")
-        return "\n".join(top_2_pairs_output) if top_2_pairs_output else "No similar pairs found."
-    except gr.Error as e:
-        return str(e)
-    except Exception as e:
-        print(f"Error in predict_most_similar_pairs: {e}")
-        return "An error occurred while processing the request for similar pairs."
-def predict_least_similar_pairs(problem_description: str):
-    """Gradio function to get the two least similar pairs of technologies from the top 10."""
-    try:
-        top_10_results = get_top_10_tech(problem_description)
-        if len(top_10_results) < 2:
-            return "Not enough matching technologies to form pairs."
-        top_10_indices = [idx for idx, _ in top_10_results]
-        top_10_embeddings = embeddings[top_10_indices]
-        top_10_df = technologies_df.iloc[top_10_indices].reset_index(drop=True)
-        pairwise_similarities = cosine_similarity(top_10_embeddings)
-        pairs = []
-        for i in range(len(top_10_df)):
-            for j in range(i + 1, len(top_10_df)):
-                score = pairwise_similarities[i, j]
-                pairs.append(((i, top_10_df['technology'][i], top_10_df['description'][i]),
-                              (j, top_10_df['technology'][j], top_10_df['description'][j]),
-                              score))
-        sorted_pairs = sorted(pairs, key=lambda x: x[2])
-        bottom_2_pairs_output = []
-        num_pairs_to_return = min(2, len(sorted_pairs))
-        for i in range(num_pairs_to_return):
-            (idx1, tech1, desc1), (idx2, tech2, desc2), score = sorted_pairs[i]
-            bottom_2_pairs_output.append(f"**Pair {i+1}:**\n"
-                                         f"**Technology 1:** {tech1}\nDescription: {desc1}\n"
-                                         f"**Technology 2:** {tech2}\nDescription: {desc2}\n"
-                                         f"**Similarity Score:** {score:.4f}\n\n")
-        return "\n".join(bottom_2_pairs_output) if bottom_2_pairs_output else "No pairs found."
-    except gr.Error as e:
-        return str(e)
-    except Exception as e:
-        print(f"Error in predict_least_similar_pairs: {e}")
-        return "An error occurred while processing the request for least similar pairs."
-# --- Gradio Interface ---
-iface = gr.Interface(
-    fn=predict,
-    inputs=gr.Textbox(label="Enter a technical problem description"),
-    outputs=gr.Textbox(label="Top 2 Most Similar Technologies"),
-    title="Technology Recommender",
-    description="Enter a description of a technical problem to find the top 2 most relevant technologies.",
-    examples=["Troubleshooting network connectivity issues", "Need a database for a small web application"]
-)
-iface_worst = gr.Interface(
-    fn=predict_worst,
-    inputs=gr.Textbox(label="Enter a technical problem description"),
-    outputs=gr.Textbox(label="Bottom 2 Least Similar Technologies (from Top 10)"),
-    title="Find Least Similar Technologies",
-    description="Enter a description of a technical problem to find the bottom 2 least relevant technologies from the top 10 matches.",
-    examples=["Scaling a microservices architecture", "Implementing a new UI framework"]
-)
-iface_mixing_max = gr.Interface(
-    fn=predict_most_similar_pairs,
-    inputs=gr.Textbox(label="Enter a technical problem description"),
-    outputs=gr.Textbox(label="Top 2 Most Similar Pairs of Technologies (from Top 10)"),
-    title="Find Most Similar Technology Pairs",
-    description="Enter a description of a technical problem to find the top 2 most similar pairs of technologies among the top 10 matches.",
-    examples=["Data analysis pipeline", "Machine learning model deployment"]
-)
-iface_mixing_min = gr.Interface(
-    fn=predict_least_similar_pairs,
-    inputs=gr.Textbox(label="Enter a technical problem description"),
-    outputs=gr.Textbox(label="Top 2 Least Similar Pairs of Technologies (from Top 10)"),
-    title="Find Least Similar Technology Pairs",
-    description="Enter a description of a technical problem to find the top 2 least similar pairs of technologies among the top 10 matches.",
-    examples=["Frontend development", "Backend database design"]
-)
-# Combine interfaces into a TabbedInterface
-tabbed_interface = gr.TabbedInterface([iface, iface_worst, iface_mixing_max, iface_mixing_min],
-                                      ["Find Most Similar", "Find Least Similar", "Most Similar Pairs", "Least Similar Pairs"])
-# Load data and model on startup
-load_data_and_model()
-# Launch the Gradio interface
-tabbed_interface.launch()

+import subprocess
 import os
+# Install dependencies
+if os.path.exists("requirements.txt"):
+    subprocess.run(["pip", "install", "-r", "requirements.txt"])
+# Run FastAPI with Uvicorn
+subprocess.run(["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"])