File size: 2,842 Bytes
5e468f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import string

def link_schema(user_query, raw_schema):
    """

    Scans the user's question and tags database columns that match exactly.

    """
    # 1. CLEAN THE QUERY
    # Convert to lowercase: "Show me the Budget!" -> "show me the budget!"
    query_lower = user_query.lower()
    
    # Remove punctuation using Python's string library
    # "show me the budget!" -> "show me the budget"
    for punctuation_mark in string.punctuation:
        query_lower = query_lower.replace(punctuation_mark, "")
        
    # Split the clean sentence into an array of individual words
    # ["show", "me", "the", "budget"]
    query_words = set(query_lower.split())
    
    # 2. PREPARE THE OUTPUT STORAGE
    # This array will hold the final, formatted strings for each table
    linked_schema_lines = []
    
    # 3. ITERATE THROUGH THE SCHEMA
    # raw_schema is a dictionary where the key is the table name, 
    # and the value is a list of column names.
    for table_name, column_list in raw_schema.items():
        
        tagged_columns = []
        
        for col in column_list:
            # We convert the column to lowercase just in case
            col_clean = col.lower()
            
            # 4. THE MATCHING LOGIC
            # If the exact column name exists in the array of user words
            if col_clean in query_words:
                # Append the tag so the AI knows this is important
                tagged_columns.append(f'{col} (Exact Match: "{col}")')
            else:
                # Otherwise, just keep the column name as normal
                tagged_columns.append(col)
                
        # 5. FORMAT THE FINAL STRING
        # Glue the tagged columns together with commas
        formatted_cols = ", ".join(tagged_columns)
        
        # Build the final string for this specific table
        table_string = f"Table: {table_name} | Cols: {formatted_cols}"
        
        # Add it to our output storage
        linked_schema_lines.append(table_string)
        
    # 6. RETURN THE RESULT
    # Join all the individual table strings together with line breaks
    return " \n".join(linked_schema_lines)


# --- TESTING BLOCK ---
if __name__ == "__main__":
    
    # Simulate what the frontend UI will hand to your function
    test_question = "What is the location and budget for the marketing department?"
    
    # Simulate a messy database schema
    test_schema = {
        "employees": ["id", "name", "department_id", "salary"],
        "departments": ["id", "name", "location", "budget", "industry"]
    }
    
    print("--- INPUTS ---")
    print(f"Question: {test_question}")
    
    print("\n--- YOUR OUTPUT ---")
    final_result = link_schema(test_question, test_schema)
    print(final_result)