Delete sql_queries.py
Browse files- sql_queries.py +0 -149
sql_queries.py
DELETED
|
@@ -1,149 +0,0 @@
|
|
| 1 |
-
# -*- coding: utf-8 -*-
|
| 2 |
-
"""SQL_Queries.ipynb
|
| 3 |
-
|
| 4 |
-
Automatically generated by Colab.
|
| 5 |
-
|
| 6 |
-
Original file is located at
|
| 7 |
-
https://colab.research.google.com/drive/1JMMq3yCv2xWTEZbs9S28qx4lXdF88M5d
|
| 8 |
-
"""
|
| 9 |
-
|
| 10 |
-
sql_queries = {
|
| 11 |
-
'Count the number of records in the dataset': 'SELECT COUNT(*) FROM ved_test.synthetic_data;',
|
| 12 |
-
'Get the average Weighted_Avg_PRB_Util_UL': 'SELECT AVG(Weighted_Avg_PRB_Util_UL) FROM ved_test.synthetic_data;',
|
| 13 |
-
'Get the minimum UE_Pwr_Restricted_Pct value': 'SELECT MIN(UE_Pwr_Restricted_Pct) FROM ved_test.synthetic_data;',
|
| 14 |
-
'Calculate the total UE_Pwr_Unrestricted_Pct_Num': 'SELECT SUM(UE_Pwr_Unrestricted_Pct_Num) FROM ved_test.synthetic_data;',
|
| 15 |
-
'Get the distinct values of Network_Engineer': 'SELECT DISTINCT Network_Engineer FROM ved_test.synthetic_data;',
|
| 16 |
-
'Count the number of records for each Network_Engineer': 'SELECT Network_Engineer, COUNT(*) FROM ved_test.synthetic_data GROUP BY Network_Engineer;',
|
| 17 |
-
'Calculate the average RTT for each Network_Engineer': 'SELECT Network_Engineer, AVG(RTT) AS avg_rtt FROM ved_test.synthetic_data GROUP BY Network_Engineer;',
|
| 18 |
-
'Calculate the average Jitter for weekends and weekdays': 'SELECT WEEKEND, AVG(Jitter) AS avg_jitter FROM ved_test.synthetic_data GROUP BY WEEKEND;',
|
| 19 |
-
'Get the average Jitter and Packet Loss for each hour': 'SELECT Hour, AVG(Jitter) AS avg_jitter, AVG(`DL Packet Loss Pct`) AS avg_packet_loss FROM ved_test.synthetic_data GROUP BY Hour ORDER BY Hour;',
|
| 20 |
-
'Find the top 5 records with the highest UPTP_Mbps': 'SELECT * FROM ved_test.synthetic_data ORDER BY UPTP_Mbps DESC LIMIT 5;',
|
| 21 |
-
'Calculate the average HARQ_BLER_Pct for each 5G_Reliability_Category': 'SELECT `5G Reliability Category`, AVG(HARQ_BLER_Pct) AS avg_harq_bler_pct FROM ved_test.synthetic_data GROUP BY `5G Reliability Category`;',
|
| 22 |
-
'Find the average RTT and Jitter for each combination of Market and Hour': 'SELECT Market, Hour, AVG(RTT) AS avg_rtt, AVG(Jitter) AS avg_jitter FROM ved_test.synthetic_data GROUP BY Market, Hour;',
|
| 23 |
-
'Get the top 3 Network_Engineers with the highest average 5G_Reliability_Score': 'SELECT Network_Engineer, AVG(`5G Reliability Score`) AS avg_reliability_score FROM ved_test.synthetic_data GROUP BY Network_Engineer ORDER BY avg_reliability_score DESC LIMIT 3;',
|
| 24 |
-
'Calculate the average Weighted_Avg_PRB_Util_UL and DL_MAC_Vol_Scell_Pct for each day of the week': 'SELECT EXTRACT(DAYOFWEEK FROM Timestamp) AS day_of_week, AVG(Weighted_Avg_PRB_Util_UL) AS avg_prb_util, AVG(`DL MAC Vol Scell Pct`) AS avg_dl_mac_vol FROM ved_test.synthetic_data GROUP BY day_of_week;',
|
| 25 |
-
'Get the average HARQ_BLER_Pct for each Market on weekends': 'SELECT Market, AVG(HARQ_BLER_Pct) AS avg_harq_bler_pct FROM ved_test.synthetic_data WHERE WEEKEND = 1 GROUP BY Market;',
|
| 26 |
-
'Calculate the total Bearer Releases for records where Bearer_Setup_Failure_Pct is greater than 80% and group by Market': 'SELECT Market, SUM(`Bearer Releases`) AS total_bearer_release FROM ved_test.synthetic_data WHERE Bearer_Setup_Failure_Pct > 0.8 GROUP BY Market;',
|
| 27 |
-
'Find the Market with the highest total UE_Pwr_Unrestricted_Pct_Num and the total value': 'SELECT Market, SUM(UE_Pwr_Unrestricted_Pct_Num) AS total_unrestricted_pwr FROM ved_test.synthetic_data GROUP BY Market ORDER BY total_unrestricted_pwr DESC LIMIT 1;',
|
| 28 |
-
'Get the average UPTP_Mbps and RRC_Reestab_Attempts for each Market for weekdays and weekends': 'SELECT Market, WEEKEND, AVG(UPTP_Mbps) AS avg_uptp_mbps, AVG(RRC_Reestab_Attempts) AS avg_rrc_attempts FROM ved_test.synthetic_data GROUP BY Market, WEEKEND;',
|
| 29 |
-
'Find the correlation between RTT and Jitter for each Market': 'SELECT Market, CORR(RTT, Jitter) AS rtt_jitter_correlation FROM ved_test.synthetic_data GROUP BY Market;',
|
| 30 |
-
'Calculate the average Weighted_Avg_PRB_Util_UL for each day of the week': 'SELECT EXTRACT(DAYOFWEEK FROM Timestamp) AS day_of_week, AVG(Weighted_Avg_PRB_Util_UL) AS avg_prb_util FROM ved_test.synthetic_data GROUP BY day_of_week;',
|
| 31 |
-
'Calculate the average UPTP_Mbps for each 5G_Reliability_Category during weekdays and weekends': 'SELECT `5G Reliability Category`, WEEKEND, AVG(UPTP_Mbps) AS avg_uptp_mbps FROM ved_test.synthetic_data GROUP BY `5G Reliability Category`, WEEKEND ORDER BY `5G Reliability Category`, WEEKEND;',
|
| 32 |
-
'Identify the hour with the highest average HARQ_BLER_Pct': 'SELECT Hour, AVG(HARQ_BLER_Pct) AS avg_harq_bler_pct FROM ved_test.synthetic_data GROUP BY Hour ORDER BY avg_harq_bler_pct DESC LIMIT 1;',
|
| 33 |
-
'Calculate the standard deviation of 5G_Reliability_Score for each Network_Engineer': 'SELECT Network_Engineer, STDDEV(`5G Reliability Score`) AS stddev_reliability_score FROM ved_test.synthetic_data GROUP BY Network_Engineer;',
|
| 34 |
-
'Find the top 3 hours with the highest total UE_Pwr_Unrestricted_Pct_Num during weekends': 'SELECT Hour, SUM(UE_Pwr_Unrestricted_Pct_Num) AS total_unrestricted_pwr FROM ved_test.synthetic_data WHERE WEEKEND = 1 GROUP BY Hour ORDER BY total_unrestricted_pwr DESC LIMIT 3;',
|
| 35 |
-
'Determine the Network_Engineer with the highest average DL MAC Vol Scell Pct and the average value': 'SELECT Network_Engineer, AVG(`DL MAC Vol Scell Pct`) AS avg_dl_mac_vol FROM ved_test.synthetic_data GROUP BY Network_Engineer ORDER BY avg_dl_mac_vol DESC LIMIT 1;',
|
| 36 |
-
'Find the variance in Jitter for each 5G_Reliability_Category': 'SELECT `5G Reliability Category`, VARIANCE(Jitter) AS variance_jitter FROM ved_test.synthetic_data GROUP BY `5G Reliability Category`;',
|
| 37 |
-
'Calculate the correlation between RTT and 5G Reliability Value for each Reliability Category': 'SELECT `5G Reliability Category`, CORR(RTT, `5G Reliability Value`) AS rtt_reliability_correlation FROM ved_test.synthetic_data GROUP BY `5G Reliability Category`;',
|
| 38 |
-
'Calculate the average and median MTTR for each Network Engineer filtered by 5G Reliability Value being above the overall average score': '''SELECT Network_Engineer, AVG(MTTR) AS avg_mttr, (
|
| 39 |
-
SELECT AVG(middle_vals)
|
| 40 |
-
FROM (
|
| 41 |
-
SELECT MTTR AS middle_vals, ROW_NUMBER() OVER (PARTITION BY Network_Engineer ORDER BY MTTR) AS rnk, COUNT(*) OVER (PARTITION BY Network_Engineer) AS cnt
|
| 42 |
-
FROM ved_test.synthetic_data
|
| 43 |
-
WHERE `5G Reliability Value` > (SELECT AVG(`5G Reliability Value`) FROM ved_test.synthetic_data)
|
| 44 |
-
) AS subquery
|
| 45 |
-
WHERE rnk IN (FLOOR((cnt + 1) / 2.0), FLOOR((cnt + 2) / 2.0))
|
| 46 |
-
) AS median_mttr
|
| 47 |
-
FROM ved_test.synthetic_data
|
| 48 |
-
WHERE `5G Reliability Value` > (SELECT AVG(`5G Reliability Value`) FROM ved_test.synthetic_data)
|
| 49 |
-
GROUP BY Network_Engineer;''',
|
| 50 |
-
'Find the count of records per 5G_Reliability_Category where 5G_Reliability_Value is below the average for the category': '''SELECT `5G Reliability Category`, COUNT(*) as count
|
| 51 |
-
FROM ved_test.synthetic_data AS s1
|
| 52 |
-
WHERE `5G Reliability Value` < (
|
| 53 |
-
SELECT AVG(`5G Reliability Value`)
|
| 54 |
-
FROM ved_test.synthetic_data AS s2
|
| 55 |
-
WHERE s2.`5G Reliability Category` = s1.`5G Reliability Category`
|
| 56 |
-
)
|
| 57 |
-
GROUP BY `5G Reliability Category`;''',
|
| 58 |
-
'Find the top 5 records with the highest HO Attempts for each Network_Engineer': '''SELECT s1.*
|
| 59 |
-
FROM ved_test.synthetic_data s1
|
| 60 |
-
JOIN (
|
| 61 |
-
SELECT Network_Engineer, `HO Attempts`
|
| 62 |
-
FROM (
|
| 63 |
-
SELECT Network_Engineer, `HO Attempts`, ROW_NUMBER() OVER (PARTITION BY Network_Engineer ORDER BY `HO Attempts` DESC) AS rn
|
| 64 |
-
FROM ved_test.synthetic_data
|
| 65 |
-
) temp
|
| 66 |
-
WHERE rn <= 5
|
| 67 |
-
) s2
|
| 68 |
-
ON s1.Network_Engineer = s2.Network_Engineer AND s1.`HO Attempts` = s2.`HO Attempts`;''',
|
| 69 |
-
'Calculate the exponential moving average of 5G_Reliability_Value for each Network_Engineer with a smoothing factor of 0.1': '''WITH ema AS (
|
| 70 |
-
SELECT Timestamp, Network_Engineer, `5G Reliability Value`, CAST(NULL AS FLOAT64) AS ema_value,
|
| 71 |
-
ROW_NUMBER() OVER (PARTITION BY Network_Engineer ORDER BY Timestamp) AS row_num
|
| 72 |
-
FROM ved_test.synthetic_data
|
| 73 |
-
)
|
| 74 |
-
SELECT a.Timestamp, a.Network_Engineer, a.`5G Reliability Value`,
|
| 75 |
-
CASE
|
| 76 |
-
WHEN a.row_num = 1 THEN a.`5G Reliability Value`
|
| 77 |
-
ELSE (0.1 * a.`5G Reliability Value` + 0.9 * b.ema_value)
|
| 78 |
-
END AS ema_value
|
| 79 |
-
FROM ema a
|
| 80 |
-
LEFT JOIN
|
| 81 |
-
|
| 82 |
-
ema b
|
| 83 |
-
ON a.Network_Engineer = b.Network_Engineer AND a.row_num = b.row_num + 1;''',
|
| 84 |
-
"Find the records with the highest HARQ_BLER_Pct for each 5G_Reliability_Category": '''SELECT s1.*
|
| 85 |
-
FROM ved_test.synthetic_data s1
|
| 86 |
-
JOIN (
|
| 87 |
-
SELECT `5G Reliability Category`, MAX(HARQ_BLER_Pct) AS max_harq_bler_pct
|
| 88 |
-
FROM ved_test.synthetic_data
|
| 89 |
-
GROUP BY `5G Reliability Category`
|
| 90 |
-
) s2
|
| 91 |
-
ON s1.`5G Reliability Category` = s2.`5G Reliability Category` AND s1.HARQ_BLER_Pct = s2.max_harq_bler_pct;''',
|
| 92 |
-
|
| 93 |
-
"Identify the top 3 hours with the highest average UPTP_Mbps for each Market, including the variance in Jitter during these hours": '''WITH avg_uptp AS (
|
| 94 |
-
SELECT Market, Hour, AVG(UPTP_Mbps) AS avg_uptp_mbps, VARIANCE(Jitter) AS jitter_variance,
|
| 95 |
-
ROW_NUMBER() OVER (PARTITION BY Market ORDER BY AVG(UPTP_Mbps) DESC) AS rn
|
| 96 |
-
FROM ved_test.synthetic_data
|
| 97 |
-
GROUP BY Market, Hour
|
| 98 |
-
)
|
| 99 |
-
SELECT Market, Hour, avg_uptp_mbps, jitter_variance
|
| 100 |
-
FROM avg_uptp
|
| 101 |
-
WHERE rn <= 3;''',
|
| 102 |
-
|
| 103 |
-
"Determine the Sector with the highest variance in 5G Reliability Value and its corresponding average Context Drop Percent": '''WITH variance_scores AS (
|
| 104 |
-
SELECT Sector, VARIANCE(`5G Reliability Value`) AS score_variance, AVG(Context_Drop_Pct) AS avg_context_drop
|
| 105 |
-
FROM ved_test.synthetic_data
|
| 106 |
-
GROUP BY Sector
|
| 107 |
-
)
|
| 108 |
-
SELECT Sector, score_variance, avg_context_drop
|
| 109 |
-
FROM variance_scores
|
| 110 |
-
ORDER BY score_variance DESC
|
| 111 |
-
LIMIT 1;''',
|
| 112 |
-
|
| 113 |
-
"Find hours where the average UPTP_Mbps is significantly different than the daily average (more than 2 standard deviations away from the mean)": '''WITH daily_stats AS (
|
| 114 |
-
SELECT DATE(Timestamp) AS day, AVG(UPTP_Mbps) AS daily_avg_uptp, STDDEV(UPTP_Mbps) AS daily_stddev_uptp
|
| 115 |
-
FROM ved_test.synthetic_data
|
| 116 |
-
GROUP BY day
|
| 117 |
-
),
|
| 118 |
-
hourly_stats AS (
|
| 119 |
-
SELECT DATE(Timestamp) AS day, EXTRACT(HOUR FROM Timestamp) AS hour, AVG(UPTP_Mbps) AS hourly_avg_uptp
|
| 120 |
-
FROM ved_test.synthetic_data
|
| 121 |
-
GROUP BY day, hour
|
| 122 |
-
)
|
| 123 |
-
SELECT
|
| 124 |
-
hs.day, hs.hour, hs.hourly_avg_uptp, ds.daily_avg_uptp, ds.daily_stddev_uptp
|
| 125 |
-
FROM hourly_stats hs
|
| 126 |
-
JOIN daily_stats ds ON hs.day = ds.day
|
| 127 |
-
WHERE hs.hourly_avg_uptp > (ds.daily_avg_uptp + 2 * ds.daily_stddev_uptp)
|
| 128 |
-
OR hs.hourly_avg_uptp < (ds.daily_avg_uptp - 2 * ds.daily_stddev_uptp)
|
| 129 |
-
ORDER BY hs.day, hs.hour;''',
|
| 130 |
-
|
| 131 |
-
"Identify Days where more than 10% of the records have RRC Setup Failure above 25% for each region": '''WITH daily_rrc_failures AS (
|
| 132 |
-
SELECT
|
| 133 |
-
Region, DATE(Timestamp) AS day, COUNT(*) AS total_records,
|
| 134 |
-
SUM(CASE WHEN `RRC Setup Failure% 5G`> 0.25 THEN 1 ELSE 0 END) AS high_failure_count
|
| 135 |
-
FROM ved_test.synthetic_data
|
| 136 |
-
GROUP BY Region, day
|
| 137 |
-
),
|
| 138 |
-
daily_rrc_failure_ratio AS (
|
| 139 |
-
SELECT
|
| 140 |
-
Region, day, total_records, high_failure_count, (high_failure_count / total_records) * 100 AS failure_ratio
|
| 141 |
-
FROM daily_rrc_failures
|
| 142 |
-
)
|
| 143 |
-
SELECT
|
| 144 |
-
Region, day, total_records, high_failure_count, failure_ratio
|
| 145 |
-
FROM daily_rrc_failure_ratio
|
| 146 |
-
WHERE failure_ratio > 10
|
| 147 |
-
ORDER BY Region, day;'''
|
| 148 |
-
|
| 149 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|