| from fastapi import FastAPI, UploadFile, File |
| from fastapi.responses import HTMLResponse |
| import pandas as pd |
| import io |
| import time |
|
|
| app = FastAPI() |
|
|
| @app.get("/", response_class=HTMLResponse) |
| async def analyze_logs(): |
| return """ |
| <html> |
| <body> |
| <form action="/upload/" enctype="multipart/form-data" method="post"> |
| <input name="file" type="file"> |
| <input type="submit"> |
| </form> |
| </body> |
| </html> |
| """ |
|
|
| @app.post("/upload/") |
| async def upload_file(file: UploadFile = File(...)): |
| contents = await file.read() |
| logs_df = pd.read_parquet(io.BytesIO(contents)) |
|
|
| processing_message = "\n\n Processing files...\n\n" |
| time.sleep(3) |
|
|
| logs_df['datetime'] = pd.to_datetime(logs_df['datetime'], format='%d/%m/%Y:%H:%M:%S') |
| logs_df['day'] = logs_df['datetime'].apply(lambda x: x.day) |
| logs_df['hour'] = logs_df['datetime'].apply(lambda x: x.hour) |
| logs_df['minute'] = logs_df['datetime'].apply(lambda x: x.minute) |
|
|
| ip_address_count_df = ( |
| logs_df.groupby(['method', 'client'], as_index=False) |
| .size() |
| .rename(columns={'size': 'count'}) |
| .sort_values('count', ascending=False) |
| ) |
|
|
| ip_address_count_df = ip_address_count_df.assign( |
| perc=ip_address_count_df['count'].div(ip_address_count_df['count'].sum()), |
| cum_perc=lambda df: df['perc'].cumsum(), |
| ) |
|
|
| dataframe_styled = ( |
| ip_address_count_df.head(1000) |
| .style.background_gradient(subset=['count', 'perc', 'cum_perc'], cmap='cividis') |
| .format({'count': '{:,}', 'perc': '{:.1%}', 'cum_perc': '{:.1%}'}) |
| ) |
| |
| |
| styled_html = dataframe_styled.render() |
|
|
| result = ( |
| "<h1>Redundant IP Requests....</h1>" |
| "<p>The Total API Requests from the sample logs are : {total_requests}</p>" |
| "<p>The Redundant API Requests from the sample logs are : {redundant_requests}</p>" |
| "<p>The percentage of Redundant API Requests from the sample logs is : {redundant_percentage:.2f}%</p>" |
| "{dataframe_html}" |
| ).format( |
| total_requests=logs_df.shape[0], |
| redundant_requests=ip_address_count_df.shape[0], |
| redundant_percentage=(ip_address_count_df.shape[0] / logs_df.shape[0]) * 100, |
| dataframe_html=styled_html, |
| ) |
|
|
| return HTMLResponse(content=result) |
|
|