Yatheshr commited on
Commit
dd4821b
Β·
verified Β·
1 Parent(s): 8e85918

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -93
app.py CHANGED
@@ -1,93 +1,93 @@
1
- import streamlit as st
2
- import pandas as pd
3
- from sklearn.ensemble import RandomForestClassifier
4
- from sklearn.model_selection import train_test_split
5
-
6
- # 1. Load Real SQL Query Logs
7
- # Load query logs from CSV (use your own CSV file here)
8
- @st.cache_data
9
- def load_data():
10
- # Make sure the CSV file is located correctly in the Hugging Face Space
11
- return pd.read_csv("data/sql_query_logs.csv") # Adjust the path if necessary
12
-
13
- # 2. Preprocess Data and Train the Model
14
- def preprocess_and_train_model(df):
15
- # Define 'slow' query threshold (avg_exec_time_ms > 1000 ms)
16
- df['is_slow'] = df['avg_exec_time_ms'] > 1000
17
- features = ['query_length', 'num_joins', 'has_subquery', 'uses_index']
18
- X = df[features]
19
- y = df['is_slow'].astype(int)
20
-
21
- # Train a RandomForest model
22
- model = RandomForestClassifier()
23
- model.fit(X, y)
24
- return model
25
-
26
- # 3. Add a Recommendation Engine
27
- def recommend_tips(query):
28
- tips = []
29
- if query['query_length'] > 800:
30
- tips.append("πŸ” Query is long β€” consider breaking it into smaller chunks.")
31
- if query['num_joins'] > 3:
32
- tips.append("πŸͺ’ Too many JOINs β€” simplify joins or add proper indexing.")
33
- if query['has_subquery']:
34
- tips.append("🧠 Subquery detected β€” flatten subqueries if possible.")
35
- if not query['uses_index']:
36
- tips.append("⚑ Index not used β€” create indexes on filter/join columns.")
37
-
38
- if not tips:
39
- tips.append("βœ… Query structure looks optimized.")
40
- return tips
41
-
42
- # 4. Streamlit App Interface
43
- def main():
44
- st.title("SQL Query Performance Predictor")
45
-
46
- # Step 1: Load the Data
47
- df = load_data()
48
-
49
- # Display a preview of the data
50
- st.subheader("Query Logs Preview")
51
- st.write(df.head())
52
-
53
- # Step 2: Train the Model
54
- model = preprocess_and_train_model(df)
55
-
56
- # Step 3: User Input for Query Analysis
57
- st.subheader("Enter Your SQL Query")
58
- query_text = st.text_area("SQL Query", height=150)
59
-
60
- if query_text:
61
- # Process the query to extract features
62
- query_length = len(query_text)
63
- num_joins = (query_text.lower().count('join') // 4) # Approximation
64
- has_subquery = 1 if 'select' in query_text.lower() and 'from' in query_text.lower() and 'select' in query_text.lower() else 0
65
-
66
- # Dummy logic to determine if an index is used β€” you can extend this logic with actual parsing
67
- uses_index = 1 if "index" in query_text.lower() else 0
68
-
69
- query_features = pd.DataFrame({
70
- 'query_length': [query_length],
71
- 'num_joins': [num_joins],
72
- 'has_subquery': [has_subquery],
73
- 'uses_index': [uses_index]
74
- })
75
-
76
- # Step 4: Prediction
77
- prediction = model.predict(query_features)[0]
78
-
79
- # Show result
80
- if prediction == 1:
81
- st.error("πŸ›‘ This query is likely to be **Slow**.")
82
- else:
83
- st.success("βœ… This query is likely to be **Fast**.")
84
-
85
- # Show optimization recommendations
86
- st.subheader("πŸ› οΈ Optimization Tips")
87
- recommendations = recommend_tips(query_features.iloc[0])
88
- for tip in recommendations:
89
- st.write(tip)
90
-
91
- # Run the Streamlit app
92
- if __name__ == '__main__':
93
- main()
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.ensemble import RandomForestClassifier
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ # 1. Load Real SQL Query Logs
7
+ # Load query logs from CSV (use your own CSV file here)
8
+ @st.cache_data
9
+ def load_data():
10
+ # Make sure the CSV file is located correctly in the Hugging Face Space
11
+ return pd.read_csv("Query-Performance-Predictor/data/sql_query_logs.csv") # Adjust the path if necessary
12
+
13
+ # 2. Preprocess Data and Train the Model
14
+ def preprocess_and_train_model(df):
15
+ # Define 'slow' query threshold (avg_exec_time_ms > 1000 ms)
16
+ df['is_slow'] = df['avg_exec_time_ms'] > 1000
17
+ features = ['query_length', 'num_joins', 'has_subquery', 'uses_index']
18
+ X = df[features]
19
+ y = df['is_slow'].astype(int)
20
+
21
+ # Train a RandomForest model
22
+ model = RandomForestClassifier()
23
+ model.fit(X, y)
24
+ return model
25
+
26
+ # 3. Add a Recommendation Engine
27
+ def recommend_tips(query):
28
+ tips = []
29
+ if query['query_length'] > 800:
30
+ tips.append("πŸ” Query is long β€” consider breaking it into smaller chunks.")
31
+ if query['num_joins'] > 3:
32
+ tips.append("πŸͺ’ Too many JOINs β€” simplify joins or add proper indexing.")
33
+ if query['has_subquery']:
34
+ tips.append("🧠 Subquery detected β€” flatten subqueries if possible.")
35
+ if not query['uses_index']:
36
+ tips.append("⚑ Index not used β€” create indexes on filter/join columns.")
37
+
38
+ if not tips:
39
+ tips.append("βœ… Query structure looks optimized.")
40
+ return tips
41
+
42
+ # 4. Streamlit App Interface
43
+ def main():
44
+ st.title("SQL Query Performance Predictor")
45
+
46
+ # Step 1: Load the Data
47
+ df = load_data()
48
+
49
+ # Display a preview of the data
50
+ st.subheader("Query Logs Preview")
51
+ st.write(df.head())
52
+
53
+ # Step 2: Train the Model
54
+ model = preprocess_and_train_model(df)
55
+
56
+ # Step 3: User Input for Query Analysis
57
+ st.subheader("Enter Your SQL Query")
58
+ query_text = st.text_area("SQL Query", height=150)
59
+
60
+ if query_text:
61
+ # Process the query to extract features
62
+ query_length = len(query_text)
63
+ num_joins = (query_text.lower().count('join') // 4) # Approximation
64
+ has_subquery = 1 if 'select' in query_text.lower() and 'from' in query_text.lower() and 'select' in query_text.lower() else 0
65
+
66
+ # Dummy logic to determine if an index is used β€” you can extend this logic with actual parsing
67
+ uses_index = 1 if "index" in query_text.lower() else 0
68
+
69
+ query_features = pd.DataFrame({
70
+ 'query_length': [query_length],
71
+ 'num_joins': [num_joins],
72
+ 'has_subquery': [has_subquery],
73
+ 'uses_index': [uses_index]
74
+ })
75
+
76
+ # Step 4: Prediction
77
+ prediction = model.predict(query_features)[0]
78
+
79
+ # Show result
80
+ if prediction == 1:
81
+ st.error("πŸ›‘ This query is likely to be **Slow**.")
82
+ else:
83
+ st.success("βœ… This query is likely to be **Fast**.")
84
+
85
+ # Show optimization recommendations
86
+ st.subheader("πŸ› οΈ Optimization Tips")
87
+ recommendations = recommend_tips(query_features.iloc[0])
88
+ for tip in recommendations:
89
+ st.write(tip)
90
+
91
+ # Run the Streamlit app
92
+ if __name__ == '__main__':
93
+ main()