Spaces:
Sleeping
Sleeping
| import string | |
| def link_schema(user_query, raw_schema): | |
| """ | |
| Scans the user's question and tags database columns that match exactly. | |
| """ | |
| # 1. CLEAN THE QUERY | |
| # Convert to lowercase: "Show me the Budget!" -> "show me the budget!" | |
| query_lower = user_query.lower() | |
| # Remove punctuation using Python's string library | |
| # "show me the budget!" -> "show me the budget" | |
| for punctuation_mark in string.punctuation: | |
| query_lower = query_lower.replace(punctuation_mark, "") | |
| # Split the clean sentence into an array of individual words | |
| # ["show", "me", "the", "budget"] | |
| query_words = set(query_lower.split()) | |
| # 2. PREPARE THE OUTPUT STORAGE | |
| # This array will hold the final, formatted strings for each table | |
| linked_schema_lines = [] | |
| # 3. ITERATE THROUGH THE SCHEMA | |
| # raw_schema is a dictionary where the key is the table name, | |
| # and the value is a list of column names. | |
| for table_name, column_list in raw_schema.items(): | |
| tagged_columns = [] | |
| for col in column_list: | |
| # We convert the column to lowercase just in case | |
| col_clean = col.lower() | |
| # 4. THE MATCHING LOGIC | |
| # If the exact column name exists in the array of user words | |
| if col_clean in query_words: | |
| # Append the tag so the AI knows this is important | |
| tagged_columns.append(f'{col} (Exact Match: "{col}")') | |
| else: | |
| # Otherwise, just keep the column name as normal | |
| tagged_columns.append(col) | |
| # 5. FORMAT THE FINAL STRING | |
| # Glue the tagged columns together with commas | |
| formatted_cols = ", ".join(tagged_columns) | |
| # Build the final string for this specific table | |
| table_string = f"Table: {table_name} | Cols: {formatted_cols}" | |
| # Add it to our output storage | |
| linked_schema_lines.append(table_string) | |
| # 6. RETURN THE RESULT | |
| # Join all the individual table strings together with line breaks | |
| return " \n".join(linked_schema_lines) | |
| # --- TESTING BLOCK --- | |
| if __name__ == "__main__": | |
| # Simulate what the frontend UI will hand to your function | |
| test_question = "What is the location and budget for the marketing department?" | |
| # Simulate a messy database schema | |
| test_schema = { | |
| "employees": ["id", "name", "department_id", "salary"], | |
| "departments": ["id", "name", "location", "budget", "industry"] | |
| } | |
| print("--- INPUTS ---") | |
| print(f"Question: {test_question}") | |
| print("\n--- YOUR OUTPUT ---") | |
| final_result = link_schema(test_question, test_schema) | |
| print(final_result) |