MFF212 commited on
Commit
fdcbb94
·
1 Parent(s): cfce23d

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockefile +11 -0
  2. app.py +68 -0
  3. requirements.txt +2 -0
Dockefile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ from fastapi.responses import HTMLResponse
3
+ import pandas as pd
4
+ import io
5
+ import requests
6
+ import time
7
+
8
+ app = FastAPI()
9
+
10
+ @app.get("/", response_class=HTMLResponse)
11
+ async def analyze_logs():
12
+ return """
13
+ <html>
14
+ <body>
15
+ <form action="/upload/" enctype="multipart/form-data" method="post">
16
+ <input name="file" type="file">
17
+ <input type="submit">
18
+ </form>
19
+ </body>
20
+ </html>
21
+ """
22
+
23
+ @app.post("/upload/")
24
+ async def upload_file(file: UploadFile = File(...)):
25
+ contents = await file.read()
26
+ logs_df = pd.read_parquet(io.BytesIO(contents))
27
+
28
+ processing_message = "\n\n Processing files...\n\n"
29
+ time.sleep(3) # Simulate processing time (3 seconds)
30
+
31
+ logs_df['datetime'] = pd.to_datetime(logs_df['datetime'], format='%d/%m/%Y:%H:%M:%S')
32
+ logs_df['day'] = logs_df['datetime'].apply(lambda x: x.day)
33
+ logs_df['hour'] = logs_df['datetime'].apply(lambda x: x.hour)
34
+ logs_df['minute'] = logs_df['datetime'].apply(lambda x: x.minute)
35
+
36
+ ip_address_count_df = (
37
+ logs_df.groupby(['method', 'client'], as_index=False)
38
+ .size()
39
+ .rename(columns={'size': 'count'})
40
+ .sort_values('count', ascending=False)
41
+ )
42
+
43
+ ip_address_count_df = ip_address_count_df.assign(
44
+ perc=ip_address_count_df['count'].div(ip_address_count_df['count'].sum()),
45
+ cum_perc=lambda df: df['perc'].cumsum(),
46
+ )
47
+
48
+ result = (
49
+ "<h1>Redundant IP Requests....</h1>"
50
+ "<p>The Total API Requests from the sample logs are : {total_requests}</p>"
51
+ "<p>The Redundant API Requests from the sample logs are : {redundant_requests}</p>"
52
+ "<p>The percentage of Redundant API Requests from the sample logs is : {redundant_percentage:.2f}%</p>"
53
+ "{dataframe_html}"
54
+ ).format(
55
+ total_requests=logs_df.shape[0],
56
+ redundant_requests=ip_address_count_df.shape[0],
57
+ redundant_percentage=(ip_address_count_df.shape[0] / logs_df.shape[0]) * 100,
58
+ dataframe_html=ip_address_count_df.head(1000)
59
+ .style.background_gradient(subset=['count', 'perc', 'cum_perc'], cmap='cividis')
60
+ .format({'count': '{:,}', 'perc': '{:.1%}', 'cum_perc': '{:.1%}'})
61
+ .render(),
62
+ )
63
+
64
+ # Save result in a new HTML file
65
+ with open("result.html", "w") as f:
66
+ f.write(result)
67
+
68
+ return "Result saved in 'result.html'"
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ fastapi
2
+ pandas