| | import numpy as np
|
| | import pandas as pd
|
| | import matplotlib.pyplot as plt
|
| | from sympy import symbols, Eq, solve
|
| | from scipy.optimize import minimize, Bounds
|
| | from scipy.optimize import minimize_scalar
|
| | from scipy import special
|
| | import sympy as sp
|
| | import math
|
| |
|
| |
|
| |
|
| |
|
| | general_T = 293
|
| |
|
| |
|
| |
|
| |
|
| | T = 298
|
| |
|
| |
|
| |
|
| |
|
| | l_k = 8
|
| |
|
| | b = 3.8
|
| |
|
| | l_b_20 = 7.12
|
| | l_b = l_b_20 * (general_T / T)
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | def mM_to_A(mM):
|
| | return mM * 10**(-3) * 6.022*10**(23) * (1/1000) * 1 / (10**8)**3
|
| |
|
| | def kl_func(salt):
|
| | return np.sqrt(8 * math.pi * l_b * mM_to_A(salt)) * b
|
| |
|
| |
|
| | w3 = .2
|
| |
|
| |
|
| |
|
| | amino_acid_data = {
|
| | "A": 0,
|
| | "R": 1,
|
| | "N": 0,
|
| | "D": -1,
|
| | "C": 0,
|
| | "E": -1,
|
| | "Q": 0,
|
| | "G": 0,
|
| | "H": .5,
|
| | "I": 0,
|
| | "L": 0,
|
| | "K": 1,
|
| | "M": 0,
|
| | "F": 0,
|
| | "P": 0,
|
| | "S": 0,
|
| | "T": 0,
|
| | "W": 0,
|
| | "Y": 0,
|
| | "V": 0,
|
| | "B": 2,
|
| | "Z": -2
|
| | }
|
| |
|
| | pKa_values = {
|
| | "R": 12.3,
|
| | "D": 3.5,
|
| | "C": 6.8,
|
| | "E": 4.2,
|
| | "H": 6.6,
|
| | "K": 10.5,
|
| | "Y": 10.3,
|
| | "B": 7.7,
|
| | "Z": 3.3
|
| | }
|
| |
|
| | s_values = {
|
| | "R": 1,
|
| | "D": -1,
|
| | "C": -1,
|
| | "E": -1,
|
| | "H": 1,
|
| | "K": 1,
|
| | "Y": -1,
|
| | "B": 1,
|
| | "Z": -1
|
| | }
|
| |
|
| |
|
| | def adjust_pH(pH):
|
| | for key in pKa_values:
|
| | if key in amino_acid_data:
|
| | s = s_values[key]
|
| | amino_acid_data[key] = convert_charge_pH(pH, key, s)
|
| |
|
| | def convert_charge_pH(pH, key, s):
|
| | pKa = pKa_values[key]
|
| | return s / (1 + 10**(s*(pH - pKa)))
|
| |
|
| | def get_x(Ree, N):
|
| | return Ree**2 / (N * b * l_k)
|
| |
|
| |
|
| | def get_Ree(x, N):
|
| | return np.sqrt(x * (N * b * l_k)) * .1
|
| |
|
| |
|
| | def get_Rg(x, N):
|
| | return np.sqrt(x * N * b * l_k / 6) * .1
|
| |
|
| | def process_seq(seq, n, c, idp):
|
| | new_seq = seq
|
| |
|
| | n_aa = new_seq[0]
|
| | c_aa = new_seq[-1]
|
| |
|
| | if n or idp:
|
| | if n_aa == "R" or n_aa == "K": new_seq = "B" + new_seq[1:]
|
| | elif n_aa == "D" or n_aa == "E": new_seq = "A" + new_seq[1:]
|
| | else: new_seq = "R" + new_seq[1:]
|
| |
|
| | if c or idp:
|
| | if c_aa == "R" or c_aa == "K": new_seq = new_seq[:-1] + "A"
|
| | elif c_aa == "D" or c_aa == "E": new_seq = new_seq[:-1] + "Z"
|
| | else: new_seq = new_seq[:-1] + "D"
|
| |
|
| | return new_seq
|
| |
|
| | def get_charge(m, n, seq):
|
| | aam = seq[m-1]
|
| | aan = seq[n-1]
|
| |
|
| | qm = amino_acid_data.get(aam)
|
| | qn = amino_acid_data.get(aan)
|
| |
|
| | return qm * qn
|
| |
|
| | def set_vars(data, i):
|
| | name = data.iloc[i, 0]
|
| |
|
| | raw_seq = data.iloc[i, 2]
|
| | N = int(data.iloc[i, 3])
|
| | nterm = data.iloc[i, 4]
|
| | cterm = data.iloc[i, 5]
|
| | nandc = data.iloc[i, 6]
|
| |
|
| | x = data.iloc[i, 9]
|
| | w2 = data.iloc[i, 10]
|
| | if w2 != "None":
|
| | w2 = float(w2)
|
| |
|
| | seq = process_seq(raw_seq, nterm, cterm, nandc)
|
| |
|
| | return N, w2, seq, x, name
|
| |
|
| | def calc_x_w_load(N, w2, seq, seed, O_term, B_term, salt, pH):
|
| | adjust_pH(pH)
|
| | print("salt:", salt, "kappa:", kl_func(salt))
|
| | mn_array = mnArray_Q_prime(N, seq)
|
| | bounds = Bounds(.01, 10, keep_feasible=False)
|
| | result = minimize(function_to_solve, seed, method="Nelder-Mead", args=(N, w2, seq, mn_array, O_term, B_term, salt), bounds=bounds)
|
| | x = result.x[0]
|
| | return x, get_Ree(x, N), get_Rg(x, N)
|
| |
|
| |
|
| | def Omega(N, w2):
|
| | result = 0.0
|
| | for m in range(2, N + 1):
|
| | for n in range(1, m):
|
| | result += w2 * ((m - n) ** (-0.5))
|
| | return 1/N * result
|
| |
|
| | def mn_Omega(N):
|
| | result = 0.0
|
| | for m in range(2, N + 1):
|
| | for n in range(1, m):
|
| | result += (m - n) ** (-0.5)
|
| | return 1/N * result
|
| |
|
| | def B(N):
|
| | result = 0.0
|
| | for p in range(3, N+1):
|
| | for m in range(2, p):
|
| | for n in range(1, m):
|
| | result += (p - n)/(((p-m)*(m-n))**(3/2))
|
| | return 1/N * result
|
| |
|
| |
|
| | def Q(N, seq):
|
| | result = 0.0
|
| | for m in range(2, N+1):
|
| | for n in range(1, m):
|
| | result += get_charge(m, n, seq) * ((m - n) ** (0.5))
|
| | output = 1/N * result
|
| |
|
| | return output
|
| |
|
| | def Q_prime(N, seq, x):
|
| | result = 0.0
|
| | for m in range(2, N+1):
|
| | for n in range(1, m):
|
| | result += get_charge(m, n, seq) * ((m-n)**2) * A_prime(m, n, x)
|
| | output = 1/N * result
|
| | return output
|
| |
|
| | def mn_Q_prime(N, seq, x, mn_array, salt):
|
| | result = 0.0
|
| | i = 0
|
| | for m in range(2, N+1):
|
| | for n in range(1, m):
|
| | result += mn_array[i] * A_prime(m, n, x, salt)
|
| | i += 1
|
| | output = 1/N * result
|
| | return output
|
| |
|
| | def mnArray_Q_prime(N, seq):
|
| | result = []
|
| | for m in range(2, N+1):
|
| | for n in range(1, m):
|
| | result.append(get_charge(m, n, seq) * ((m-n)**2))
|
| | return result
|
| |
|
| | def A_prime(m, n, x, salt):
|
| | term1 = 1/2 * (6*math.pi/x)**(1/2) * (1/(m-n)**(3/2))
|
| | term2 = kl_func(salt) * (math.pi/2) * (1/(m-n))
|
| | term3 = special.erfcx(np.sqrt(kl_func(salt)**2 * x * (m-n) / 6))
|
| | return term1 - term2 * term3
|
| |
|
| | def free_energy(x, N, w2, seq, mn_array, O_term, B_term, salt):
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | Q_term = mn_Q_prime(N, seq, x, mn_array, salt)
|
| |
|
| | eq = ( (
|
| | 3/2 * (x - np.log(x))
|
| | + (3/(2*math.pi))**(3/2) * O_term * (1/(x**(3/2)))
|
| | + (w3*(3/(2*math.pi))**(3))/2 * B_term * (1/(x**3))
|
| | + (l_b / b) * 2/math.pi * Q_term
|
| | ))
|
| |
|
| | return eq
|
| |
|
| | def function_to_solve(argument, N, w2, seq, mn_array, O_term, B_term, salt):
|
| | """function, to be solved."""
|
| |
|
| | x = argument
|
| |
|
| | sol = free_energy(x, N, w2, seq, mn_array, O_term, B_term, salt)
|
| | return sol
|
| |
|
| |
|
| | def Q_prime_derv(N, seq, x):
|
| | result = 0.0
|
| | for m in range(2, N+1):
|
| | for n in range(1, m):
|
| | result += get_charge(m, n, seq) * ((m-n)**2) * A_prime_derv(m, n, x)
|
| | output = 1/N * result
|
| | return output
|
| |
|
| | def A_prime_derv(m, n, x):
|
| | term1 = (np.sqrt(math.pi)/4) * (6/x)**(3/2) * (1/(m-n)**(3/2))
|
| | term2 = kl**2 * (np.sqrt(math.pi)/2) * (6/x)**(1/2) * (1/(m-n)**(1/2))
|
| | term3 = kl**3 * (math.pi/2) * special.erfcx(np.sqrt(kl**2 * x * (m-n) / 6))
|
| | return (-1/6) * (term1 - term2 + term3)
|
| |
|
| | def solve_for_w2_eq(w2, N, x, seq, mn_O_term, B_term, Q_term):
|
| | O_term = mn_O_term * w2
|
| |
|
| |
|
| | eq = (
|
| | 3/2 * (x - 1)/x
|
| | + (-9 * np.sqrt(3/2)/(4*(math.pi)**(3/2))) * O_term * (1/(x**(5/2)))
|
| | + (-w3 * 81) / (16 * (math.pi)**(3)) * B_term * (1/(x**4))
|
| | + (l_b / b) * 2/math.pi * Q_term
|
| | )
|
| |
|
| | return eq
|
| |
|
| | def solve_for_w2(N, seq, x):
|
| |
|
| | B_term = B(N, seq)
|
| | Q_term = Q_prime_derv(N, seq, x)
|
| | mn_O_term = mn_Omega(N)
|
| |
|
| |
|
| | w2 = sp.Symbol('w2')
|
| |
|
| |
|
| | equation = solve_for_w2_eq(w2, N, x, seq, mn_O_term, B_term, Q_term)
|
| |
|
| |
|
| | solutions = sp.solve(equation, w2)
|
| | return solutions[0]
|
| |
|