BOTeome / scripts /db.py
juan
Revert "file upload"
6dea890
import json
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from scripts.literature import literature_search
from scripts.uniprot import get_protein_location
from scripts.utils import *
def create_database(db_uri):
with open('data/uniprot/mouse.json') as f:
d = json.load(f)
df_uniprot = pd.json_normalize(d['results'])
proteins = d['results']
df = pd.read_csv('data/test_table.csv')
df = df[~df['Accession'].isna()]
df['locations'] = df['Accession'].apply(lambda x: get_location_from_acession(x, proteins))
df['nucleus'] = df['locations'].apply(is_in_nucleus)
df['transcription_factor'] = df['Accession'].apply(lambda x: is_transcription_factor(x, proteins))
threshold = 10
df['region'] = np.where((df['Hippocampus']/df['VCN'] > threshold) & (df['Hippocampus']/df['Cortex'] > threshold), 'Hippocampus', 'inconclusive')
df['region'] = np.where((df['VCN']/df['Hippocampus'] > threshold) & (df['VCN']/df['Cortex'] > threshold), 'VCN', df['region'])
df['region'] = np.where((df['Cortex']/df['VCN'] > threshold) & (df['Cortex']/df['Hippocampus'] > threshold), 'Cortex', df['region'])
engine = create_engine(db_uri, echo=False)
df.to_sql(name='proteins', con=engine, if_exists='replace')