Spaces:

heymenn
/

technical-problem-analyzer

Runtime error

App Files Files Community

heymenn commited on Apr 8, 2025

Commit

dcf3971

verified ·

1 Parent(s): deda0a8

Create app.py

Browse files

Files changed (1) hide show

app.py +218 -0

app.py ADDED Viewed

	@@ -0,0 +1,218 @@

+import gradio as gr
+import pandas as pd
+import json
+from sentence_transformers import SentenceTransformer, util
+import torch
+from duckduckgo_search import DDGS
+import re
+# --- Configuration ---
+CATEGORY_JSON_PATH = "categories.json"
+TECHNOLOGY_EXCEL_PATH = "technologies.xlsx"
+MODEL_NAME = 'all-MiniLM-L6-v2' # A good general-purpose sentence transformer
+SIMILARITY_THRESHOLD = 0.3 # Adjust as needed
+MAX_SEARCH_RESULTS_PER_TECH = 3
+# --- Load Data and Model (Load once at startup) ---
+print("Loading data and model...")
+try:
+    # Load Categories
+    with open(CATEGORY_JSON_PATH, 'r') as f:
+        categories_data = json.load(f)["Category"]
+    # Prepare category texts for embedding (Category Name + Keywords)
+    category_names = list(categories_data.keys())
+    category_texts = [f"{name}: {', '.join(keywords)}" for name, keywords in categories_data.items()]
+    print(f"Loaded {len(category_names)} categories.")
+    # Load Technologies
+    technologies_df = pd.read_excel(TECHNOLOGY_EXCEL_PATH)
+     # Clean the technology category column - handle potential NaN and ensure string type
+    technologies_df['category'] = technologies_df['category'].fillna('').astype(str)
+    print(f"Loaded {len(technologies_df)} technologies.")
+    # Load Sentence Transformer Model
+    model = SentenceTransformer(MODEL_NAME)
+    print(f"Loaded Sentence Transformer model: {MODEL_NAME}")
+    # Pre-compute category embeddings
+    print("Computing category embeddings...")
+    category_embeddings = model.encode(category_texts, convert_to_tensor=True)
+    print("Category embeddings computed.")
+except FileNotFoundError as e:
+    print(f"ERROR: File not found - {e}. Please ensure '{CATEGORY_JSON_PATH}' and '{TECHNOLOGY_EXCEL_PATH}' exist.")
+    # Optionally raise the error or exit if critical files are missing
+    raise e
+except Exception as e:
+    print(f"ERROR loading data or model: {e}")
+    raise e
+# --- Helper Functions ---
+def find_best_category(problem_description):
+    """
+    Finds the most relevant category for the problem description using semantic similarity.
+    """
+    if not problem_description or not category_names:
+        return None
+    try:
+        problem_embedding = model.encode(problem_description, convert_to_tensor=True)
+        # Compute cosine similarities
+        cosine_scores = util.pytorch_cos_sim(problem_embedding, category_embeddings)[0]
+        # Find the highest score and its index
+        best_score, best_idx = torch.max(cosine_scores, dim=0)
+        if best_score.item() >= SIMILARITY_THRESHOLD:
+            return category_names[best_idx.item()], best_score.item()
+        else:
+            return None, None # No category met the threshold
+    except Exception as e:
+        print(f"Error during category finding: {e}")
+        return None, None
+def find_relevant_technologies(category_name):
+    """
+    Filters the technologies DataFrame based on the identified category.
+    Handles categories listed like "Cat1, Cat2".
+    """
+    if not category_name or technologies_df.empty:
+        return pd.DataFrame() # Return empty DataFrame if no category or data
+    relevant_tech = []
+    # Iterate through the DataFrame safely
+    for index, row in technologies_df.iterrows():
+        # Split the 'category' string by comma and strip whitespace
+        tech_categories = [cat.strip() for cat in str(row['category']).split(',')]
+        if category_name in tech_categories:
+            relevant_tech.append(row)
+    if not relevant_tech:
+         return pd.DataFrame() # Return empty if no matches
+    return pd.DataFrame(relevant_tech)
+def search_solutions(problem_description, technologies):
+    """
+    Searches DuckDuckGo for solutions combining the problem and technologies.
+    """
+    results = {}
+    if technologies.empty:
+        return "No relevant technologies found to search for solutions."
+    try:
+        with DDGS() as ddgs:
+            for tech_name in technologies['technology'].unique(): # Use unique names
+                # Clean up tech_name if it has extra info (like title prefixes)
+                # Simple cleaning - might need adjustment based on actual data
+                clean_tech_name = re.sub(r'^- Title\s*:\s*', '', str(tech_name)).strip()
+                if not clean_tech_name: continue # Skip if name is empty after cleaning
+                query = f'"{problem_description[:100]}" using "{clean_tech_name}" solution OR tutorial OR implementation' # Limit query length
+                print(f"Searching for: {query}")
+                search_results = []
+                for i, result in enumerate(ddgs.text(query, max_results=MAX_SEARCH_RESULTS_PER_TECH)):
+                     search_results.append(result) # result is a dict {'title': ..., 'href': ..., 'body': ...}
+                if search_results:
+                     results[clean_tech_name] = search_results
+                else:
+                     results[clean_tech_name] = [] # Indicate no results found for this tech
+    except Exception as e:
+        print(f"Error during web search: {e}")
+        return f"An error occurred during the search: {e}"
+    # Format results for display
+    output = "### Potential Solutions & Resources:\n\n"
+    if not results:
+        output += "No search results found."
+        return output
+    for tech, links in results.items():
+        output += f"**For Technology: {tech}**\n"
+        if links:
+            for link in links:
+                 output += f"- [{link['title']}]({link['href']})\n" #{link['body'][:100]}...\n" # Optionally add body snippet
+        else:
+            output += "- *No specific results found for this technology combination.*\n"
+        output += "\n"
+    return output
+# --- Main Processing Function ---
+def process_problem(problem_description):
+    """
+    Main function called by Gradio interface.
+    Orchestrates the categorization, technology finding, and solution searching.
+    """
+    if not problem_description:
+        return "Please enter a problem description.", "", ""
+    # 1. Categorize Problem
+    category_name, score = find_best_category(problem_description)
+    if category_name:
+        category_output = f"**Identified Category:** {category_name} (Similarity Score: {score:.2f})"
+    else:
+        category_output = "**Could not confidently identify a relevant category.**"
+        # Return early if no category is found? Or proceed with empty tech? Let's proceed for now.
+        # return category_output, "No category identified, cannot find technologies.", "No category identified, cannot search solutions."
+    # 2. Find Relevant Technologies
+    relevant_technologies_df = find_relevant_technologies(category_name) # Pass None if category not found
+    if not relevant_technologies_df.empty:
+        tech_output = "### Relevant Technologies:\n\n"
+        for _, row in relevant_technologies_df.iterrows():
+             # Clean up the description for better display
+             # Assuming description format like "- Title : ... \n - Purpose : ..."
+             desc_lines = str(row['description']).split('<br>') # Split by <br> if present
+             cleaned_desc = "\n".join([line.strip() for line in desc_lines if line.strip()])
+             tech_output += f"**Technology:** {row['technology']}\n**Description:**\n{cleaned_desc}\n\n---\n"
+    elif category_name:
+         tech_output = f"No specific technologies found listed under the '{category_name}' category in the provided data."
+    else:
+         tech_output = "No relevant technologies could be identified as no category was matched."
+    # 3. Search for Solutions
+    solution_output = search_solutions(problem_description, relevant_technologies_df)
+    # 4. Combine Outputs for Gradio
+    # Using Markdown for better formatting
+    final_output = f"## Analysis Results\n\n{category_output}\n\n{tech_output}\n\n{solution_output}"
+    # Gradio currently works best returning separate components if you define multiple outputs.
+    # Let's return a single formatted Markdown string for simplicity here.
+    # If you define 3 Markdown outputs in gr.Interface, you'd return: category_output, tech_output, solution_output
+    return final_output
+# --- Create Gradio Interface ---
+print("Setting up Gradio interface...")
+iface = gr.Interface(
+    fn=process_problem,
+    inputs=gr.Textbox(lines=5, label="Enter Technical Problem Description", placeholder="Describe your technical challenge or requirement here..."),
+    outputs=gr.Markdown(label="Analysis and Potential Solutions"), # Single Markdown output
+    # If using multiple outputs:
+    # outputs=[
+    #     gr.Markdown(label="Identified Category"),
+    #     gr.Markdown(label="Relevant Technologies"),
+    #     gr.Markdown(label="Potential Solutions (Search Results)")
+    # ],
+    title="Technical Problem Analyzer",
+    description="Enter a technical problem. The application will attempt to categorize it, find relevant technologies from a predefined list, and search for potential online solutions using those technologies.",
+    examples=[
+        ["How can I establish reliable communication between low-orbit satellites for continuous global monitoring?"],
+        ["Need a system to automatically detect anomalies in sensor data from industrial machinery using machine learning."],
+        ["Develop a secure authentication method for a distributed IoT network without a central server."]
+    ],
+    allow_flagging='never', # Optional: disable flagging
+    # theme=gr.themes.Soft() # Optional: Apply a theme
+)
+# --- Launch the App ---
+if __name__ == "__main__":
+    print("Launching Gradio app...")
+    iface.launch() # Share=True to create a public link (requires login on Hugging Face Spaces)