Spaces:

Yatheshr
/

Query-Performance-Predictor

Sleeping

App Files Files Community

Yatheshr commited on Apr 25, 2025

Commit

8e85918

verified ·

1 Parent(s): 67efad1

Upload 2 files

Browse files

Files changed (2) hide show

app.py +93 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import streamlit as st
+import pandas as pd
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+# 1. Load Real SQL Query Logs
+# Load query logs from CSV (use your own CSV file here)
+@st.cache_data
+def load_data():
+    # Make sure the CSV file is located correctly in the Hugging Face Space
+    return pd.read_csv("data/sql_query_logs.csv")  # Adjust the path if necessary
+# 2. Preprocess Data and Train the Model
+def preprocess_and_train_model(df):
+    # Define 'slow' query threshold (avg_exec_time_ms > 1000 ms)
+    df['is_slow'] = df['avg_exec_time_ms'] > 1000
+    features = ['query_length', 'num_joins', 'has_subquery', 'uses_index']
+    X = df[features]
+    y = df['is_slow'].astype(int)
+    # Train a RandomForest model
+    model = RandomForestClassifier()
+    model.fit(X, y)
+    return model
+# 3. Add a Recommendation Engine
+def recommend_tips(query):
+    tips = []
+    if query['query_length'] > 800:
+        tips.append("🔍 Query is long — consider breaking it into smaller chunks.")
+    if query['num_joins'] > 3:
+        tips.append("🪢 Too many JOINs — simplify joins or add proper indexing.")
+    if query['has_subquery']:
+        tips.append("🧠 Subquery detected — flatten subqueries if possible.")
+    if not query['uses_index']:
+        tips.append("⚡ Index not used — create indexes on filter/join columns.")
+    if not tips:
+        tips.append("✅ Query structure looks optimized.")
+    return tips
+# 4. Streamlit App Interface
+def main():
+    st.title("SQL Query Performance Predictor")
+    # Step 1: Load the Data
+    df = load_data()
+    # Display a preview of the data
+    st.subheader("Query Logs Preview")
+    st.write(df.head())
+    # Step 2: Train the Model
+    model = preprocess_and_train_model(df)
+    # Step 3: User Input for Query Analysis
+    st.subheader("Enter Your SQL Query")
+    query_text = st.text_area("SQL Query", height=150)
+    if query_text:
+        # Process the query to extract features
+        query_length = len(query_text)
+        num_joins = (query_text.lower().count('join') // 4)  # Approximation
+        has_subquery = 1 if 'select' in query_text.lower() and 'from' in query_text.lower() and 'select' in query_text.lower() else 0
+        # Dummy logic to determine if an index is used — you can extend this logic with actual parsing
+        uses_index = 1 if "index" in query_text.lower() else 0
+        query_features = pd.DataFrame({
+            'query_length': [query_length],
+            'num_joins': [num_joins],
+            'has_subquery': [has_subquery],
+            'uses_index': [uses_index]
+        })
+        # Step 4: Prediction
+        prediction = model.predict(query_features)[0]
+        # Show result
+        if prediction == 1:
+            st.error("🛑 This query is likely to be **Slow**.")
+        else:
+            st.success("✅ This query is likely to be **Fast**.")
+        # Show optimization recommendations
+        st.subheader("🛠️ Optimization Tips")
+        recommendations = recommend_tips(query_features.iloc[0])
+        for tip in recommendations:
+            st.write(tip)
+# Run the Streamlit app
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit
+pandas
+scikit-learn