Spaces:

MFF212
/

gt

Runtime error

App Files Files Community

MFF212 commited on Sep 11, 2023

Commit

02848dc

1 Parent(s): 97bb996

Update main.py

Browse files

Files changed (1) hide show

main.py +60 -82

main.py CHANGED Viewed

@@ -1,87 +1,65 @@
-from fastapi import FastAPI, UploadFile, File
-from fastapi.responses import HTMLResponse
-from fastapi.responses import HTMLResponse, FileResponse
-import pandas as pd
-import io
-import requests
-import time
 app = FastAPI()
-@app.get("/", response_class=HTMLResponse)
-async def analyze_logs():
-    return """
-    <html>
-        <body>
-            <form action="/upload/" enctype="multipart/form-data" method="post">
-                <input name="file" type="file">
-                <input type="submit">
-            </form>
-        </body>
-    </html>
-    """
 @app.post("/upload/")
-async def upload_file(file: UploadFile = File(...)):
-    contents = await file.read()
-    logs_df = pd.read_parquet(io.BytesIO(contents))
-    processing_message = "\n\n Processing files...\n\n"
-    time.sleep(3)  # Simulate processing time (3 seconds)
-    logs_df['datetime'] = pd.to_datetime(logs_df['datetime'], format='%d/%m/%Y:%H:%M:%S')
-    logs_df['day'] = logs_df['datetime'].apply(lambda x: x.day)
-    logs_df['hour'] = logs_df['datetime'].apply(lambda x: x.hour)
-    logs_df['minute'] = logs_df['datetime'].apply(lambda x: x.minute)
-    ip_address_count_df = (
-        logs_df.groupby(['method', 'client'], as_index=False)
-        .size()
-        .rename(columns={'size': 'count'})
-        .sort_values('count', ascending=False)
-    )
-    ip_address_count_df = ip_address_count_df.assign(
-        perc=ip_address_count_df['count'].div(ip_address_count_df['count'].sum()),
-        cum_perc=lambda df: df['perc'].cumsum(),
-    )
-    result = (
-        "<h1>Redundant IP Requests....</h1>"
-        "<p>The Total API Requests from the sample logs are : {total_requests}</p>"
-        "<p>The Redundant API Requests from the sample logs are : {redundant_requests}</p>"
-        "<p>The percentage of Redundant API Requests from the sample logs is : {redundant_percentage:.2f}%</p>"
-        "{dataframe_html}"
-    ).format(
-        total_requests=logs_df.shape[0],
-        redundant_requests=ip_address_count_df.shape[0],
-        redundant_percentage=(ip_address_count_df.shape[0] / logs_df.shape[0]) * 100,
-        dataframe_html=ip_address_count_df.head(1000)
-        .style.background_gradient(subset=['count', 'perc', 'cum_perc'], cmap='cividis')
-        .format({'count': '{:,}', 'perc': '{:.1%}', 'cum_perc': '{:.1%}'})
-        .to_html(),  # Corrected here
-    )
-     # Save the HTML content locally
-    file_location = "result.html"
-    with open(file_location, "w") as html_file:
-        html_file.write(result)
-    # Print the location of the saved file
-    print(f"HTML result saved to: {file_location}")
-    return HTMLResponse(content=result)
-@app.get("/view-result/", response_class=HTMLResponse)
-async def view_result():
-    try:
-        with open("result.html", "r") as html_file:
-            content = html_file.read()
-            return HTMLResponse(content=content)
-    except FileNotFoundError:
-        return HTMLResponse(content="Result HTML not found")

+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import HTMLResponse  # Import HTMLResponse class
+import csv
+from typing import List
+from io import StringIO  # Import StringIO to create an in-memory CSV file
 app = FastAPI()
+# Dictionaries to store URLs, API counts, and associated names
+url_counts = {}
+api_counts = {}
+url_names = {}
+api_names = {}
 @app.post("/upload/")
+async def upload_csv(file: UploadFile):
+    if file.content_type == "text/csv":
+        content = await file.read()
+        content = content.decode('utf-8').splitlines()
+        total_records = 0
+        csvreader = csv.reader(content)
+        header = next(csvreader)  # Skip the header row
+        for row in csvreader:
+            url = row[3]  # Assuming the URL is in the 4th column (index 3)
+            api = row[4]  # Assuming the API is in the 5th column (index 4)
+            url_name = row[2]  # Assuming the name associated with URLs is in the 3rd column (index 2)
+            api_name = row[6]  # Assuming the name associated with APIs is in the 7th column (index 6)
+            total_records += 1
+            # Count URLs and associated names
+            if url in url_counts:
+                url_counts[url] += 1
+            else:
+                url_counts[url] = 1
+                url_names[url] = url_name
+            # Count APIs and associated names
+            if api in api_counts:
+                api_counts[api] += 1
+            else:
+                api_counts[api] = 1
+                api_names[api] = api_name
+        # Find redundant URLs and APIs with counts greater than 1
+        redundant_urls = [url for url, count in url_counts.items() if count > 1]
+        redundant_apis = [api for api, count in api_counts.items() if count > 1]
+        # Calculate the percentage of redundant URLs
+        percentage_redundant_urls = (len(redundant_urls) / total_records) * 100 if total_records > 0 else 0
+        # Create an HTML table
+        html_table = "<table>"
+        html_table += "<tr><th>GET Request</th><th>Count of Repetition</th></tr>"
+        for url, count in url_counts.items():
+            html_table += f"<tr><td>{url}</td><td>{count}</td></tr>"
+        html_table += "</table>"
+        # Return both JSON and HTML responses
+        return HTMLResponse(content=f"<h2>Analysis Results:</h2>{html_table}<br>"
+                                    f"<b>Total GET Records:</b> {total_records}<br>"
+                                    f"<b>% of Redundant GET Requests:</b> {round(percentage_redundant_urls, 2)}%",
+                            status_code=200)
+    else:
+        return {"error": "Invalid file format. Please upload a CSV file."}