MFF212 commited on
Commit
02848dc
·
1 Parent(s): 97bb996

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +60 -82
main.py CHANGED
@@ -1,87 +1,65 @@
1
- from fastapi import FastAPI, UploadFile, File
2
- from fastapi.responses import HTMLResponse
3
- from fastapi.responses import HTMLResponse, FileResponse
4
- import pandas as pd
5
- import io
6
- import requests
7
- import time
8
 
9
  app = FastAPI()
10
 
11
- @app.get("/", response_class=HTMLResponse)
12
- async def analyze_logs():
13
- return """
14
- <html>
15
- <body>
16
- <form action="/upload/" enctype="multipart/form-data" method="post">
17
- <input name="file" type="file">
18
- <input type="submit">
19
- </form>
20
- </body>
21
- </html>
22
- """
23
 
24
  @app.post("/upload/")
25
- async def upload_file(file: UploadFile = File(...)):
26
- contents = await file.read()
27
- logs_df = pd.read_parquet(io.BytesIO(contents))
28
-
29
- processing_message = "\n\n Processing files...\n\n"
30
- time.sleep(3) # Simulate processing time (3 seconds)
31
-
32
- logs_df['datetime'] = pd.to_datetime(logs_df['datetime'], format='%d/%m/%Y:%H:%M:%S')
33
- logs_df['day'] = logs_df['datetime'].apply(lambda x: x.day)
34
- logs_df['hour'] = logs_df['datetime'].apply(lambda x: x.hour)
35
- logs_df['minute'] = logs_df['datetime'].apply(lambda x: x.minute)
36
-
37
- ip_address_count_df = (
38
- logs_df.groupby(['method', 'client'], as_index=False)
39
- .size()
40
- .rename(columns={'size': 'count'})
41
- .sort_values('count', ascending=False)
42
- )
43
-
44
- ip_address_count_df = ip_address_count_df.assign(
45
- perc=ip_address_count_df['count'].div(ip_address_count_df['count'].sum()),
46
- cum_perc=lambda df: df['perc'].cumsum(),
47
- )
48
-
49
- result = (
50
- "<h1>Redundant IP Requests....</h1>"
51
- "<p>The Total API Requests from the sample logs are : {total_requests}</p>"
52
- "<p>The Redundant API Requests from the sample logs are : {redundant_requests}</p>"
53
- "<p>The percentage of Redundant API Requests from the sample logs is : {redundant_percentage:.2f}%</p>"
54
- "{dataframe_html}"
55
- ).format(
56
- total_requests=logs_df.shape[0],
57
- redundant_requests=ip_address_count_df.shape[0],
58
- redundant_percentage=(ip_address_count_df.shape[0] / logs_df.shape[0]) * 100,
59
- dataframe_html=ip_address_count_df.head(1000)
60
- .style.background_gradient(subset=['count', 'perc', 'cum_perc'], cmap='cividis')
61
- .format({'count': '{:,}', 'perc': '{:.1%}', 'cum_perc': '{:.1%}'})
62
- .to_html(), # Corrected here
63
- )
64
- # Save the HTML content locally
65
- file_location = "result.html"
66
- with open(file_location, "w") as html_file:
67
- html_file.write(result)
68
-
69
- # Print the location of the saved file
70
- print(f"HTML result saved to: {file_location}")
71
-
72
- return HTMLResponse(content=result)
73
-
74
- @app.get("/view-result/", response_class=HTMLResponse)
75
- async def view_result():
76
- try:
77
- with open("result.html", "r") as html_file:
78
- content = html_file.read()
79
- return HTMLResponse(content=content)
80
- except FileNotFoundError:
81
- return HTMLResponse(content="Result HTML not found")
82
-
83
-
84
-
85
-
86
-
87
-
 
1
+ from fastapi import FastAPI, File, UploadFile
2
+ from fastapi.responses import HTMLResponse # Import HTMLResponse class
3
+ import csv
4
+ from typing import List
5
+ from io import StringIO # Import StringIO to create an in-memory CSV file
 
 
6
 
7
  app = FastAPI()
8
 
9
+ # Dictionaries to store URLs, API counts, and associated names
10
+ url_counts = {}
11
+ api_counts = {}
12
+ url_names = {}
13
+ api_names = {}
 
 
 
 
 
 
 
14
 
15
  @app.post("/upload/")
16
+ async def upload_csv(file: UploadFile):
17
+ if file.content_type == "text/csv":
18
+ content = await file.read()
19
+ content = content.decode('utf-8').splitlines()
20
+ total_records = 0
21
+
22
+ csvreader = csv.reader(content)
23
+ header = next(csvreader) # Skip the header row
24
+ for row in csvreader:
25
+ url = row[3] # Assuming the URL is in the 4th column (index 3)
26
+ api = row[4] # Assuming the API is in the 5th column (index 4)
27
+ url_name = row[2] # Assuming the name associated with URLs is in the 3rd column (index 2)
28
+ api_name = row[6] # Assuming the name associated with APIs is in the 7th column (index 6)
29
+ total_records += 1
30
+
31
+ # Count URLs and associated names
32
+ if url in url_counts:
33
+ url_counts[url] += 1
34
+ else:
35
+ url_counts[url] = 1
36
+ url_names[url] = url_name
37
+
38
+ # Count APIs and associated names
39
+ if api in api_counts:
40
+ api_counts[api] += 1
41
+ else:
42
+ api_counts[api] = 1
43
+ api_names[api] = api_name
44
+
45
+ # Find redundant URLs and APIs with counts greater than 1
46
+ redundant_urls = [url for url, count in url_counts.items() if count > 1]
47
+ redundant_apis = [api for api, count in api_counts.items() if count > 1]
48
+
49
+ # Calculate the percentage of redundant URLs
50
+ percentage_redundant_urls = (len(redundant_urls) / total_records) * 100 if total_records > 0 else 0
51
+
52
+ # Create an HTML table
53
+ html_table = "<table>"
54
+ html_table += "<tr><th>GET Request</th><th>Count of Repetition</th></tr>"
55
+ for url, count in url_counts.items():
56
+ html_table += f"<tr><td>{url}</td><td>{count}</td></tr>"
57
+ html_table += "</table>"
58
+
59
+ # Return both JSON and HTML responses
60
+ return HTMLResponse(content=f"<h2>Analysis Results:</h2>{html_table}<br>"
61
+ f"<b>Total GET Records:</b> {total_records}<br>"
62
+ f"<b>% of Redundant GET Requests:</b> {round(percentage_redundant_urls, 2)}%",
63
+ status_code=200)
64
+ else:
65
+ return {"error": "Invalid file format. Please upload a CSV file."}