Spaces:
Sleeping
Sleeping
| import json | |
| import pandas as pd | |
| import numpy as np | |
| from sqlalchemy import create_engine | |
| from scripts.literature import literature_search | |
| from scripts.uniprot import get_protein_location | |
| from scripts.utils import * | |
| def create_database(db_uri): | |
| with open('data/uniprot/mouse.json') as f: | |
| d = json.load(f) | |
| df_uniprot = pd.json_normalize(d['results']) | |
| proteins = d['results'] | |
| df = pd.read_csv('data/test_table.csv') | |
| df = df[~df['Accession'].isna()] | |
| df['locations'] = df['Accession'].apply(lambda x: get_location_from_acession(x, proteins)) | |
| df['nucleus'] = df['locations'].apply(is_in_nucleus) | |
| df['transcription_factor'] = df['Accession'].apply(lambda x: is_transcription_factor(x, proteins)) | |
| threshold = 10 | |
| df['region'] = np.where((df['Hippocampus']/df['VCN'] > threshold) & (df['Hippocampus']/df['Cortex'] > threshold), 'Hippocampus', 'inconclusive') | |
| df['region'] = np.where((df['VCN']/df['Hippocampus'] > threshold) & (df['VCN']/df['Cortex'] > threshold), 'VCN', df['region']) | |
| df['region'] = np.where((df['Cortex']/df['VCN'] > threshold) & (df['Cortex']/df['Hippocampus'] > threshold), 'Cortex', df['region']) | |
| engine = create_engine(db_uri, echo=False) | |
| df.to_sql(name='proteins', con=engine, if_exists='replace') | |