Yatheshr commited on
Commit
8e85918
Β·
verified Β·
1 Parent(s): 67efad1

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +93 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.ensemble import RandomForestClassifier
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ # 1. Load Real SQL Query Logs
7
+ # Load query logs from CSV (use your own CSV file here)
8
+ @st.cache_data
9
+ def load_data():
10
+ # Make sure the CSV file is located correctly in the Hugging Face Space
11
+ return pd.read_csv("data/sql_query_logs.csv") # Adjust the path if necessary
12
+
13
+ # 2. Preprocess Data and Train the Model
14
+ def preprocess_and_train_model(df):
15
+ # Define 'slow' query threshold (avg_exec_time_ms > 1000 ms)
16
+ df['is_slow'] = df['avg_exec_time_ms'] > 1000
17
+ features = ['query_length', 'num_joins', 'has_subquery', 'uses_index']
18
+ X = df[features]
19
+ y = df['is_slow'].astype(int)
20
+
21
+ # Train a RandomForest model
22
+ model = RandomForestClassifier()
23
+ model.fit(X, y)
24
+ return model
25
+
26
+ # 3. Add a Recommendation Engine
27
+ def recommend_tips(query):
28
+ tips = []
29
+ if query['query_length'] > 800:
30
+ tips.append("πŸ” Query is long β€” consider breaking it into smaller chunks.")
31
+ if query['num_joins'] > 3:
32
+ tips.append("πŸͺ’ Too many JOINs β€” simplify joins or add proper indexing.")
33
+ if query['has_subquery']:
34
+ tips.append("🧠 Subquery detected β€” flatten subqueries if possible.")
35
+ if not query['uses_index']:
36
+ tips.append("⚑ Index not used β€” create indexes on filter/join columns.")
37
+
38
+ if not tips:
39
+ tips.append("βœ… Query structure looks optimized.")
40
+ return tips
41
+
42
+ # 4. Streamlit App Interface
43
+ def main():
44
+ st.title("SQL Query Performance Predictor")
45
+
46
+ # Step 1: Load the Data
47
+ df = load_data()
48
+
49
+ # Display a preview of the data
50
+ st.subheader("Query Logs Preview")
51
+ st.write(df.head())
52
+
53
+ # Step 2: Train the Model
54
+ model = preprocess_and_train_model(df)
55
+
56
+ # Step 3: User Input for Query Analysis
57
+ st.subheader("Enter Your SQL Query")
58
+ query_text = st.text_area("SQL Query", height=150)
59
+
60
+ if query_text:
61
+ # Process the query to extract features
62
+ query_length = len(query_text)
63
+ num_joins = (query_text.lower().count('join') // 4) # Approximation
64
+ has_subquery = 1 if 'select' in query_text.lower() and 'from' in query_text.lower() and 'select' in query_text.lower() else 0
65
+
66
+ # Dummy logic to determine if an index is used β€” you can extend this logic with actual parsing
67
+ uses_index = 1 if "index" in query_text.lower() else 0
68
+
69
+ query_features = pd.DataFrame({
70
+ 'query_length': [query_length],
71
+ 'num_joins': [num_joins],
72
+ 'has_subquery': [has_subquery],
73
+ 'uses_index': [uses_index]
74
+ })
75
+
76
+ # Step 4: Prediction
77
+ prediction = model.predict(query_features)[0]
78
+
79
+ # Show result
80
+ if prediction == 1:
81
+ st.error("πŸ›‘ This query is likely to be **Slow**.")
82
+ else:
83
+ st.success("βœ… This query is likely to be **Fast**.")
84
+
85
+ # Show optimization recommendations
86
+ st.subheader("πŸ› οΈ Optimization Tips")
87
+ recommendations = recommend_tips(query_features.iloc[0])
88
+ for tip in recommendations:
89
+ st.write(tip)
90
+
91
+ # Run the Streamlit app
92
+ if __name__ == '__main__':
93
+ main()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ scikit-learn