Spaces:
Sleeping
Sleeping
| from src.services.utils import * | |
| from src.services.processor import * | |
| global_tech, global_tech_embeddings = load_technologies() | |
| def process_input(data, global_tech, global_tech_embeddings): | |
| prompt = set_prompt(data.problem) | |
| constraints = retrieve_constraints(prompt) | |
| constraints_stemmed = stem(constraints, "constraints") | |
| save_dataframe(constraints_stemmed, "constraints_stemmed.xlsx") | |
| #global_tech, keys, original_tech = preprocess_tech_data(df) | |
| save_dataframe(global_tech, "global_tech.xlsx") | |
| result_similarities, matrix = get_contrastive_similarities(constraints_stemmed, global_tech, global_tech_embeddings, ) | |
| save_to_pickle(result_similarities) | |
| print(f"Matrix : {matrix} \n Constraints : {constraints_stemmed} \n Gloabl tech : {global_tech}") | |
| best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix) | |
| best_technologies_id = select_technologies(best_combinations) | |
| best_technologies = get_technologies_by_id(best_technologies_id,global_tech) | |
| return best_technologies | |
| def process_input_from_constraints(constraints, global_tech, global_tech_embeddings): | |
| constraints_stemmed = stem(constraints, "constraints") | |
| save_dataframe(constraints_stemmed, "constraints_stemmed.xlsx") | |
| #global_tech, keys, original_tech = preprocess_tech_data(df) | |
| save_dataframe(global_tech, "global_tech.xlsx") | |
| result_similarities, matrix = get_contrastive_similarities(constraints_stemmed, global_tech, global_tech_embeddings, ) | |
| save_to_pickle(result_similarities) | |
| print(f"Matrix : {matrix} \n Constraints : {constraints_stemmed} \n Gloabl tech : {global_tech}") | |
| best_combinations = find_best_list_combinations(constraints_stemmed, global_tech, matrix) | |
| best_technologies_id = select_technologies(best_combinations) | |
| best_technologies = get_technologies_by_id(best_technologies_id,global_tech) | |
| return best_technologies | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import random | |
| import json | |
| # --- Dummy Implementations for src.services.utils and src.services.processor --- | |
| # These functions simulate the behavior of your actual services for the Gradio interface. | |
| def load_technologies(): | |
| """ | |
| Dummy function to simulate loading technologies and their embeddings. | |
| Returns a sample DataFrame and a dummy numpy array for embeddings. | |
| """ | |
| tech_data = { | |
| 'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], | |
| 'name': [ | |
| 'Machine Learning', 'Cloud Computing', 'Blockchain', 'Cybersecurity', | |
| 'Data Analytics', 'Artificial Intelligence', 'DevOps', 'Quantum Computing', | |
| 'Edge Computing', 'Robotics' | |
| ], | |
| 'description': [ | |
| 'Algorithms for learning from data.', 'On-demand computing resources.', | |
| 'Decentralized ledger technology.', 'Protecting systems from threats.', | |
| 'Analyzing large datasets.', 'Simulating human intelligence.', | |
| 'Software development and operations.', 'Utilizing quantum mechanics.', | |
| 'Processing data near the source.', 'Automated machines.' | |
| ] | |
| } | |
| global_tech_df = pd.DataFrame(tech_data) | |
| # Simulate embeddings as random vectors | |
| global_tech_embeddings_array = np.random.rand(len(global_tech_df), 128) | |
| return global_tech_df, global_tech_embeddings_array | |
| def set_prompt(problem_description: str) -> str: | |
| """ | |
| Dummy function to simulate prompt generation. | |
| """ | |
| return f"Based on the problem: '{problem_description}', what are the key technical challenges and requirements?" | |
| def retrieve_constraints(prompt: str) -> list[str]: | |
| """ | |
| Dummy function to simulate constraint retrieval. | |
| Returns a few sample constraints based on the prompt. | |
| """ | |
| if "security" in prompt.lower() or "secure" in prompt.lower(): | |
| return ["high security", "data privacy", "authentication"] | |
| elif "performance" in prompt.lower() or "speed" in prompt.lower(): | |
| return ["low latency", "high throughput", "scalability"] | |
| elif "data" in prompt.lower() or "analyze" in prompt.lower(): | |
| return ["data integration", "real-time analytics", "data storage"] | |
| return ["cost-efficiency", "ease of integration", "maintainability", "scalability"] | |
| def stem(text_list: list[str], type_of_text: str) -> list[str]: | |
| """ | |
| Dummy function to simulate stemming. | |
| Simplistically removes 'ing', 's', 'es' from words. | |
| """ | |
| stemmed_list = [] | |
| for text in text_list: | |
| words = text.split() | |
| stemmed_words = [] | |
| for word in words: | |
| word = word.lower() | |
| if word.endswith("ing"): | |
| word = word[:-3] | |
| elif word.endswith("es"): | |
| word = word[:-2] | |
| elif word.endswith("s"): | |
| word = word[:-1] | |
| stemmed_words.append(word) | |
| stemmed_list.append(" ".join(stemmed_words)) | |
| return stemmed_list | |
| def save_dataframe(df: pd.DataFrame, filename: str): | |
| """ | |
| Dummy function to simulate saving a DataFrame. | |
| """ | |
| print(f"Simulating saving DataFrame to {filename}") | |
| # In a real scenario, you might save to Excel: df.to_excel(filename, index=False) | |
| def save_to_pickle(data): | |
| """ | |
| Dummy function to simulate saving data to a pickle file. | |
| """ | |
| print(f"Simulating saving data to pickle: {type(data)}") | |
| def get_contrastive_similarities(constraints_stemmed: list[str], global_tech_df: pd.DataFrame, global_tech_embeddings: np.ndarray): | |
| """ | |
| Dummy function to simulate getting contrastive similarities. | |
| Returns a dummy similarity matrix and result similarities. | |
| """ | |
| num_constraints = len(constraints_stemmed) | |
| num_tech = len(global_tech_df) | |
| # Simulate a similarity matrix | |
| # Each row corresponds to a constraint, each column to a technology | |
| matrix = np.random.rand(num_constraints, num_tech) | |
| matrix = np.round(matrix, 3) # Round for better display | |
| # Simulate result_similarities (e.g., top 3 technologies for each constraint) | |
| result_similarities = {} | |
| for i, constraint in enumerate(constraints_stemmed): | |
| # Get top 3 tech indices for this constraint | |
| top_tech_indices = np.argsort(matrix[i])[::-1][:3] | |
| top_tech_names = [global_tech_df.iloc[idx]['name'] for idx in top_tech_indices] | |
| top_tech_scores = [matrix[i, idx] for idx in top_tech_indices] | |
| result_similarities[constraint] = list(zip(top_tech_names, top_tech_scores)) | |
| return result_similarities, matrix | |
| def find_best_list_combinations(constraints_stemmed: list[str], global_tech_df: pd.DataFrame, matrix: np.ndarray) -> list[dict]: | |
| """ | |
| Dummy function to simulate finding best list combinations. | |
| Returns a few dummy combinations of technologies. | |
| """ | |
| best_combinations = [] | |
| # Simulate finding combinations that best cover constraints | |
| for i in range(min(3, len(constraints_stemmed))): # Create up to 3 dummy combinations | |
| combination = { | |
| "technologies": [], | |
| "score": round(random.uniform(0.7, 0.95), 2), | |
| "covered_constraints": [] | |
| } | |
| num_tech_in_combo = random.randint(2, 4) | |
| selected_tech_ids = random.sample(global_tech_df['id'].tolist(), num_tech_in_combo) | |
| for tech_id in selected_tech_ids: | |
| tech_name = global_tech_df[global_tech_df['id'] == tech_id]['name'].iloc[0] | |
| combination["technologies"].append({"id": tech_id, "name": tech_name}) | |
| # Assign some random constraints to be covered | |
| num_covered_constraints = random.randint(1, len(constraints_stemmed)) | |
| combination["covered_constraints"] = random.sample(constraints_stemmed, num_covered_constraints) | |
| best_combinations.append(combination) | |
| return best_combinations | |
| def select_technologies(best_combinations: list[dict]) -> list[int]: | |
| """ | |
| Dummy function to simulate selecting technologies based on best combinations. | |
| Returns a list of unique technology IDs. | |
| """ | |
| selected_ids = set() | |
| for combo in best_combinations: | |
| for tech in combo["technologies"]: | |
| selected_ids.add(tech["id"]) | |
| return list(selected_ids) | |
| def get_technologies_by_id(tech_ids: list[int], global_tech_df: pd.DataFrame) -> list[dict]: | |
| """ | |
| Dummy function to simulate retrieving technology details by ID. | |
| """ | |
| selected_technologies = [] | |
| for tech_id in tech_ids: | |
| tech_info = global_tech_df[global_tech_df['id'] == tech_id] | |
| if not tech_info.empty: | |
| selected_technologies.append(tech_info.iloc[0].to_dict()) | |
| return selected_technologies | |
| # --- Core Logic (Modified for Gradio Interface) --- | |
| # Load global technologies and embeddings once when the app starts | |
| global_tech_df, global_tech_embeddings_array = load_technologies() | |
| def process_input_gradio(problem_description: str): | |
| """ | |
| Processes the input problem description step-by-step for Gradio. | |
| Returns all intermediate results. | |
| """ | |
| # Step 1: Set Prompt | |
| prompt = set_prompt(problem_description) | |
| # Step 2: Retrieve Constraints | |
| constraints = retrieve_constraints(prompt) | |
| # Step 3: Stem Constraints | |
| constraints_stemmed = stem(constraints, "constraints") | |
| save_dataframe(pd.DataFrame({"stemmed_constraints": constraints_stemmed}), "constraints_stemmed.xlsx") | |
| # Step 4: Global Tech (already loaded, just acknowledge) | |
| # save_dataframe(global_tech_df, "global_tech.xlsx") # This is already done implicitly by loading | |
| # Step 5: Get Contrastive Similarities | |
| result_similarities, matrix = get_contrastive_similarities( | |
| constraints_stemmed, global_tech_df, global_tech_embeddings_array | |
| ) | |
| save_to_pickle(result_similarities) | |
| # Step 6: Find Best List Combinations | |
| best_combinations = find_best_list_combinations(constraints_stemmed, global_tech_df, matrix) | |
| # Step 7: Select Technologies | |
| best_technologies_id = select_technologies(best_combinations) | |
| # Step 8: Get Technologies by ID | |
| best_technologies = get_technologies_by_id(best_technologies_id, global_tech_df) | |
| # Format outputs for Gradio | |
| # Convert numpy array to list of lists for better Gradio display | |
| matrix_display = matrix.tolist() | |
| # Convert result_similarities to a more readable format for Gradio | |
| result_similarities_display = { | |
| k: ", ".join([f"{name} ({score:.3f})" for name, score in v]) | |
| for k, v in result_similarities.items() | |
| } | |
| best_combinations_display = json.dumps(best_combinations, indent=2) | |
| best_technologies_display = json.dumps(best_technologies, indent=2) | |
| return ( | |
| prompt, | |
| ", ".join(constraints), | |
| ", ".join(constraints_stemmed), | |
| "Global technologies loaded and ready.", # Acknowledge tech loading | |
| str(result_similarities_display), # Convert dict to string for display | |
| pd.DataFrame(matrix_display, index=constraints_stemmed, columns=global_tech_df['name']), # Display matrix as DataFrame | |
| best_combinations_display, | |
| ", ".join(map(str, best_technologies_id)), | |
| best_technologies_display | |
| ) | |
| # --- Gradio Interface Setup --- | |
| # Define the input and output components | |
| input_problem = gr.Textbox( | |
| label="Enter Problem Description", | |
| placeholder="e.g., Develop a secure and scalable e-commerce platform with real-time analytics." | |
| ) | |
| output_prompt = gr.Textbox(label="1. Generated Prompt", interactive=False) | |
| output_constraints = gr.Textbox(label="2. Retrieved Constraints", interactive=False) | |
| output_stemmed_constraints = gr.Textbox(label="3. Stemmed Constraints", interactive=False) | |
| output_tech_loaded = gr.Textbox(label="4. Global Technologies Status", interactive=False) | |
| output_similarities = gr.Textbox(label="5. Result Similarities (Constraint -> Top Technologies)", interactive=False) | |
| output_matrix = gr.Dataframe(label="6. Similarity Matrix (Constraints vs. Technologies)", interactive=False) | |
| output_best_combinations = gr.JSON(label="7. Best Technology Combinations Found", interactive=False) | |
| output_selected_ids = gr.Textbox(label="8. Selected Technology IDs", interactive=False) | |
| output_final_technologies = gr.JSON(label="9. Final Best Technologies", interactive=False) | |
| # Create the Gradio Interface | |
| gr.Interface( | |
| fn=process_input_gradio, | |
| inputs=input_problem, | |
| outputs=[ | |
| output_prompt, | |
| output_constraints, | |
| output_stemmed_constraints, | |
| output_tech_loaded, | |
| output_similarities, | |
| output_matrix, | |
| output_best_combinations, | |
| output_selected_ids, | |
| output_final_technologies | |
| ], | |
| title="Insight Finder: Step-by-Step Technology Selection", | |
| description="Enter a problem description to see how relevant technologies are identified through various processing steps.", | |
| allow_flagging="never" | |
| ).launch() |