berangerthomas commited on
Commit
f972805
·
1 Parent(s): 31fea3a

Add pandas to sqlite conversion

Browse files
Files changed (2) hide show
  1. app.py +102 -24
  2. utils/pandas2sql.py +88 -0
app.py CHANGED
@@ -1,36 +1,114 @@
 
 
 
 
1
  import tempfile
 
2
 
3
  import streamlit as st
4
 
5
  from config.log_definitions import log_definitions
6
  from utils.log2pandas import LogParser
 
7
 
8
- st.title("Log Analyzer")
 
 
9
 
10
- # Upload area by drag and drop
11
- uploaded_file = st.file_uploader("Drop your log file here")
12
 
13
- # Dropdown menu to choose the log type
 
14
 
15
- # Extract log types from the configuration file
16
  log_types = list(log_definitions.keys())
 
 
 
17
 
18
- log_type = st.selectbox("Select log type", options=log_types)
19
-
20
- # Analyze button
21
- if st.button("Analyze"):
22
- if uploaded_file is not None:
23
- # Temporarily save the uploaded file
24
- with tempfile.NamedTemporaryFile(delete=False, suffix=".log") as tmp_file:
25
- tmp_file.write(uploaded_file.getbuffer())
26
- tmp_file_path = tmp_file.name
27
-
28
- # Create an instance of LogParser with the temporary path and log type
29
- parser = LogParser(tmp_file_path, log_type)
30
- # Parse the file and get the DataFrame
31
- parsed_df = parser.parse_file()
32
- # Display the first rows of the resulting DataFrame
33
- st.write("Resulting DataFrame:")
34
- st.dataframe(parsed_df)
35
- else:
36
- st.error("Please upload a log file.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #####################################################
2
+ #### Imports ####
3
+ #####################################################
4
+ import os
5
  import tempfile
6
+ from datetime import datetime
7
 
8
  import streamlit as st
9
 
10
  from config.log_definitions import log_definitions
11
  from utils.log2pandas import LogParser
12
+ from utils.pandas2sql import Pandas2SQL
13
 
14
+ #####################################################
15
+ #### Interface Setup ####
16
+ #####################################################
17
 
18
+ st.title("ShadowLog - Log File Analyzer")
19
+ st.write("Upload a log file to analyze and/or convert it to SQLite")
20
 
21
+ # File upload widget
22
+ uploaded_file = st.file_uploader("Choose a log file")
23
 
24
+ # Get available log types from log_definitions
25
  log_types = list(log_definitions.keys())
26
+ # Set default log type if not already in session state
27
+ if "log_type" not in st.session_state:
28
+ st.session_state.log_type = log_types[0] # Default to first log type
29
 
30
+ st.session_state.log_type = st.selectbox(
31
+ "Select log type", log_types, index=log_types.index(st.session_state.log_type)
32
+ )
33
+
34
+ # Store the parsed dataframe in the session state
35
+ if "parsed_df" not in st.session_state:
36
+ st.session_state.parsed_df = None
37
+
38
+ if uploaded_file is not None:
39
+ # Create two columns for the buttons
40
+ col1, col2 = st.columns(2)
41
+
42
+ with col1:
43
+ # Button to parse the log file
44
+ if st.button("Parse the log file"):
45
+ with st.spinner("Analyzing the file..."):
46
+ # Create a temporary file
47
+ with tempfile.NamedTemporaryFile(
48
+ delete=False, suffix=".log"
49
+ ) as tmp_file:
50
+ tmp_file.write(uploaded_file.getvalue())
51
+ tmp_path = tmp_file.name
52
+
53
+ try:
54
+ # Parse the log file
55
+ parser = LogParser(tmp_path, st.session_state.log_type)
56
+ st.session_state.parsed_df = parser.parse_file()
57
+
58
+ # Display a success message and the dataframe
59
+ st.success("Log file successfully analyzed!")
60
+ # st.dataframe(st.session_state.parsed_df)
61
+ except Exception as e:
62
+ st.error(f"Error analyzing the file: {e}")
63
+ finally:
64
+ # Clean up the temporary file
65
+ os.unlink(tmp_path)
66
+
67
+ with col2:
68
+ # Button to convert to SQLite and download
69
+ if st.button("Convert to SQLite"):
70
+ if st.session_state.parsed_df is not None:
71
+ with st.spinner("Converting to SQLite..."):
72
+ try:
73
+ # Create a temporary SQLite file
74
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
75
+ sqlite_path = os.path.join(
76
+ tempfile.gettempdir(), f"log_data_{timestamp}.sqlite"
77
+ )
78
+
79
+ # Create the SQL converter
80
+ sql_converter = Pandas2SQL(sqlite_path)
81
+
82
+ # Convert the dataframe to SQLite
83
+ sql_converter.create_table(
84
+ st.session_state.parsed_df, st.session_state.log_type
85
+ )
86
+
87
+ # Read the SQLite file for download
88
+ with open(sqlite_path, "rb") as file:
89
+ sqlite_data = file.read()
90
+
91
+ # Success message and immediate download
92
+ st.success("SQLite file created successfully!")
93
+
94
+ # Download button
95
+ st.download_button(
96
+ label="Download SQLite file",
97
+ data=sqlite_data,
98
+ file_name=f"log_file_{st.session_state.log_type}_{timestamp}.sqlite",
99
+ mime="application/octet-stream",
100
+ key="auto_download",
101
+ )
102
+ except Exception as e:
103
+ st.error(f"Error converting to SQLite: {e}")
104
+ finally:
105
+ # Clean up the temporary file
106
+ if os.path.exists(sqlite_path):
107
+ os.unlink(sqlite_path)
108
+ else:
109
+ st.warning("Please parse the log file first.")
110
+
111
+ # Display the dataframe if available
112
+ if st.session_state.parsed_df is not None:
113
+ st.subheader("Analyzed log data")
114
+ st.dataframe(st.session_state.parsed_df)
utils/pandas2sql.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+
3
+ import pandas as pd
4
+
5
+
6
+ class Pandas2SQL:
7
+ """
8
+ Classe pour convertir un DataFrame pandas en table SQLite
9
+ avec détection automatique des types de colonnes.
10
+ """
11
+
12
+ def __init__(self, db_path=":memory:"):
13
+ """
14
+ Initialise la connexion à la base de données SQLite
15
+
16
+ Args:
17
+ db_path (str): Chemin vers le fichier de base de données SQLite
18
+ (par défaut utilise une base de données en mémoire)
19
+ """
20
+ self.db_path = db_path
21
+
22
+ def _get_sqlite_type(self, pandas_dtype):
23
+ """
24
+ Convertit un type pandas en type SQLite approprié
25
+
26
+ Args:
27
+ pandas_dtype: Type pandas
28
+
29
+ Returns:
30
+ str: Type SQLite correspondant
31
+ """
32
+ if pd.api.types.is_integer_dtype(pandas_dtype):
33
+ return "INTEGER"
34
+ elif pd.api.types.is_float_dtype(pandas_dtype):
35
+ return "REAL"
36
+ elif pd.api.types.is_bool_dtype(pandas_dtype):
37
+ return "INTEGER" # SQLite n'a pas de type booléen, utilise INTEGER (0/1)
38
+ elif pd.api.types.is_datetime64_dtype(pandas_dtype):
39
+ return "TIMESTAMP"
40
+ else:
41
+ return "TEXT" # Pour les types object, string, category, etc.
42
+
43
+ def create_table(self, df, table_name, if_exists="replace", primary_key=None):
44
+ """
45
+ Crée une table SQLite basée sur un DataFrame pandas
46
+
47
+ Args:
48
+ df (pandas.DataFrame): DataFrame à convertir
49
+ table_name (str): Nom de la table à créer
50
+ if_exists (str): Action si la table existe ('fail', 'replace', 'append')
51
+ primary_key (str): Nom de la colonne à définir comme clé primaire (optionnel)
52
+ """
53
+ # Création du schéma de table basé sur les types de colonnes
54
+ columns = []
55
+ for col_name, dtype in df.dtypes.items():
56
+ sqlite_type = self._get_sqlite_type(dtype)
57
+ col_def = f'"{col_name}" {sqlite_type}'
58
+ if primary_key and col_name == primary_key:
59
+ col_def += " PRIMARY KEY"
60
+ columns.append(col_def)
61
+
62
+ # Création de la requête SQL
63
+ create_query = f'CREATE TABLE "{table_name}" ({", ".join(columns)})'
64
+
65
+ # Connexion et création de la table
66
+ conn = sqlite3.connect(self.db_path)
67
+ cursor = conn.cursor()
68
+
69
+ try:
70
+ if if_exists == "replace":
71
+ cursor.execute(f'DROP TABLE IF EXISTS "{table_name}"')
72
+ elif if_exists == "fail":
73
+ cursor.execute(
74
+ f'SELECT name FROM sqlite_master WHERE type="table" AND name="{table_name}"'
75
+ )
76
+ if cursor.fetchone():
77
+ raise ValueError(f"La table '{table_name}' existe déjà.")
78
+
79
+ cursor.execute(create_query)
80
+
81
+ # Insertion des données
82
+ df.to_sql(table_name, conn, if_exists="append", index=False)
83
+ conn.commit()
84
+ except Exception as e:
85
+ conn.rollback()
86
+ raise e
87
+ finally:
88
+ conn.close()