| | import pandas as pd
|
| | import sqlite3
|
| | import csv
|
| |
|
| | def get_delimiter(file_path, bytes = 4096):
|
| | sniffer = csv.Sniffer()
|
| | data = open(file_path, "r").read(bytes)
|
| | delimiter = sniffer.sniff(data).delimiter
|
| | return delimiter
|
| |
|
| | def read_file(file):
|
| | if file.endswith(('.csv', '.tsv', '.txt')) :
|
| | df = pd.read_csv(file, sep=get_delimiter(file))
|
| | elif file.endswith('.json'):
|
| | df = pd.read_json(file)
|
| | elif file.endswith('.xml'):
|
| | df = pd.read_xml(file)
|
| | elif file.endswith(('.xls','xlsx')):
|
| | df = pd.read_excel(file)
|
| | else:
|
| | raise ValueError(f'Unsupported filetype: {file}')
|
| | return df
|
| |
|
| | def process_data_upload(data_file, session_hash):
|
| | df = read_file(data_file)
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | df.columns = df.columns.str.replace(' ', '_')
|
| | df.columns = df.columns.str.replace('/', '_')
|
| |
|
| | for column in df.columns:
|
| | if "date" in column.lower() or "time" in column.lower():
|
| | df[column] = pd.to_datetime(df[column])
|
| |
|
| | connection = sqlite3.connect(f'data_source_{session_hash}.db')
|
| | print("Opened database successfully");
|
| | print(df.columns)
|
| |
|
| | df.to_sql('data_source', connection, if_exists='replace', index = False)
|
| |
|
| | connection.commit()
|
| | connection.close() |