import numpy as np import numba as nb from numpy import einsum from scipy.signal import fftconvolve from scipy.fft import next_fast_len @nb.jit(nopython=True) def hermite_polynomial(x, degree, a=1): if degree == 1: return -2*a*x elif degree == 2: x1 = (a*x)**2 return 4*x1 - 2*a elif degree == 3: x1 = (a*x)**3 return -8*x1 + 12*a*x elif degree == 4: x1 = (a*x)**4 x2 = (a*x)**2 return 16*x1 - 48*x2 + 12*a**2 elif degree == 5: x1 = (a*x)**5 x2 = (a*x)**3 return -32*x1 + 160*x2 - 120*(a*x) @nb.jit(nopython=True) def chebyshev_polynomial(x, degree): if degree==1: return x elif degree==2: return 2*(x**2) - 1 elif degree==3: return 4*(x**3) - 3*x elif degree==4: return 8*((x**4) - (x**2)) + 1 elif degree==5: return 16*(x**5) - 20*(x**3) + 5*x @nb.jit(nopython=True) def fcut(Rij, rcut): #checked return 0.5*(np.cos((np.pi*Rij)/rcut)+1) @nb.jit(nopython=True) def fcut_with_grad(Rij, rcut): arg = (np.pi*Rij)/rcut return 0.5*(np.cos(arg)+1), (-np.pi*np.sin(arg))/(2*rcut) @nb.jit(nopython=True) def generate_data_with_gradients(size,charges,coods,rconvs_arr, aconvs_arr,cutoff_r=12.0,n_atm = 2.0): rconvs, drconvs = rconvs_arr[0], rconvs_arr[1] aconvs, daconvs = aconvs_arr[0], aconvs_arr[1] m1, n1 = rconvs.shape[0], rconvs.shape[1] m2, n2 = aconvs.shape[0], aconvs.shape[1] nrs = m1*n1 nAs = m2*n2 rstep = cutoff_r/rconvs.shape[-1] astep = np.pi/(aconvs.shape[-1]) twob = np.zeros((size,size,nrs)) threeb = np.zeros((size,size,size,nAs)) twob_temps = np.zeros((size,size,8)) threeb_temps = np.zeros((size,size,size,9)) for i in range(size): z, atom = charges[i], coods[i] for j in range(i+1,size): rij=atom-coods[j] rij_norm=np.linalg.norm(rij) if rij_norm!=0 and rij_norm2: for k in range(j+1, size): rik=atom-coods[k] rik_norm=np.linalg.norm(rik) if rik_norm!=0 and rik_norm 1, "No implementation for mono-atomics yet" if gradients: dmat = np.zeros((pad,desc_size,pad,3)) twob, twob_grad, threeb, threeb_grad = generate_data_with_gradients(size, charges,coods,rconvs, aconvs,rcut,n_atm) mat[:size,:nr] = einsum('ij... -> i...',twob) mat[:size,nr:] = einsum('ijk... -> i...',threeb) dmat[:size, :nr, :size, :] = twob_grad dmat[:size, nr:, :size, :] = threeb_grad return mat, dmat else: twob, threeb = generate_data(size,charges,coods,rconvs,aconvs,rcut,n_atm) mat[:size,:nr] = einsum('ij... -> i...',twob) mat[:size,nr:] = einsum('ijk... -> i...',threeb) return mat def get_cmbdf_global(charges, coods, asize,rep_size,keys, convs, rcut=10.0,n_atm = 1.0): """ returns the flattened, bagged cMBDF feature vector for a molecule """ rconvs, aconvs = convs size = len(charges) assert size > 2, "No implementation for mono and diatomics yet" elements = {k:[] for k in keys} for i in range(size): elements[charges[i]].append(i) m1, n1 = rconvs[0].shape[0], rconvs[0].shape[1] m2, n2 = aconvs[0].shape[0], aconvs[0].shape[1] nr = m1*n1 na = m2*n2 desc_size = nr+na mat, ind = np.zeros((rep_size,desc_size)), 0 #alc = alc_scaling(charges) twob, threeb = generate_data(size,charges,coods,rconvs,aconvs,rcut,n_atm) for key in keys: num = len(elements[key]) bags = np.zeros((num,desc_size)) if num!=0: bags[:,:nr] = einsum('ij... -> i...',twob[elements[key]]) bags[:,nr:] = einsum('ijk... -> i...',threeb[elements[key]]) mat[ind:ind+num] = -np.sort(-bags,axis=0) ind += asize[key] return mat.ravel(order='F') def get_convolutions(rstep=0.0008,rcut=10.0,alpha_list=[1.5,5.0],n_list=[3.0,5.0],order=4,a1=2.0,a2=2.0,astep=0.0002,nAs=4,gradients=False): """ returns cMBDF convolutions evaluated via Fast Fourier Transforms """ step_r = rcut/next_fast_len(int(rcut/rstep)) astep = np.pi/next_fast_len(int(np.pi/astep)) rgrid = np.arange(0.0,rcut, step_r) rgrid2 = np.arange(-rcut,rcut, step_r) agrid = np.arange(0.0,np.pi,astep) agrid2 = np.arange(-np.pi,np.pi,astep) size = len(rgrid) gaussian = np.exp(-a1*(rgrid2**2)) m = order+1 temp1, temp2 = [], [] dtemp1, dtemp2 = [], [] fms = [gaussian, *[gaussian*hermite_polynomial(rgrid2,i,a1) for i in range(1,m+1)]] for i in range(m): fm = fms[i] #fm1 = fms[i+1] temp, dtemp = [], [] for alpha in alpha_list: gn = np.exp(-alpha*rgrid) arr = fftconvolve(gn, fm, mode='same')*step_r arr = arr/np.max(np.abs(arr)) temp.append(arr) darr = np.gradient(arr,step_r) #darr = -fftconvolve(gn, fm1, mode='full')[:size]*step_r dtemp.append(darr) temp1.append(np.array(temp)) dtemp1.append(np.array(dtemp)) temp, dtemp = [], [] for n in n_list: gn = 2.2508*((rgrid+1)**n) arr = fftconvolve(1/gn, fm, mode='same')[:size]*step_r arr = arr/np.max(np.abs(arr)) temp.append(arr) darr = np.gradient(arr,step_r) #darr = -fftconvolve(1/gn, fm1, mode='full')[:size]*step_r dtemp.append(darr) temp2.append(np.array(temp)) dtemp2.append(np.array(dtemp)) rconvs = np.concatenate((np.asarray(temp1),np.asarray(temp2)),axis=1) drconvs = np.concatenate((np.asarray(dtemp1),np.asarray(dtemp2)),axis=1) size = len(agrid) gaussian = np.exp(-a2*(agrid2**2)) m = order+1 temp1, dtemp1 = [], [] fms = [gaussian, *[gaussian*hermite_polynomial(agrid2,i,a2) for i in range(1,m+1)]] for i in range(m): fm = fms[i] temp, dtemp = [], [] for n in range(1,nAs+1): gn = np.cos(n*agrid) #gn = chebyshev_polynomial(agrid,n) arr = fftconvolve(gn, fm, mode='same')*astep arr = arr/np.max(np.abs(arr)) temp.append(arr) darr = np.gradient(arr,astep) dtemp.append(darr) temp1.append(np.array(temp)) dtemp1.append(np.array(dtemp)) aconvs, daconvs = np.asarray(temp1), np.asarray(dtemp1) return np.asarray([rconvs, drconvs]), np.asarray([aconvs, daconvs]) from joblib import Parallel, delayed def generate_mbdf(nuclear_charges,coords,convs,alpha_list=[1.5,5.0],n_list=[3.0,5.0],gradients=False,local=True,n_jobs=-1,a1=2.0,pad=None,rstep=0.0008,rcut=10.0,astep=0.0002,nAs=4,order=4,progress_bar=False,a2=2.0,n_atm = 2.0): assert nuclear_charges.shape[0] == coords.shape[0], "charges and coordinates array length mis-match" lengths, charges = [], [] for i in range(len(nuclear_charges)): q, r = nuclear_charges[i], coords[i] assert q.shape[0] == r.shape[0], "charges and coordinates array length mis-match for molecule at index" + str(i) lengths.append(len(q)) charges.append(q.astype(np.float64)) if pad==None: pad = max(lengths) #convs = get_convolutions(rstep,rcut,alpha_list,n_list,order,a1,a2,astep,nAs,gradients) if local: if gradients: if progress_bar: from tqdm import tqdm mbdf = Parallel(n_jobs=n_jobs)(delayed(get_cmbdf)(charge, cood, convs, pad, rcut,n_atm, gradients=True) for charge,cood in tqdm(list(zip(charges,coords)))) else: mbdf = Parallel(n_jobs=n_jobs)(delayed(get_cmbdf)(charge, cood, convs, pad, rcut,n_atm, gradients=True) for charge,cood in list(zip(charges,coords))) A, dA = [], [] for i in range(len(mbdf)): A.append(mbdf[i][0]) dA.append(mbdf[i][1]) A, dA = np.array(A), np.array(dA) return A, dA else: if progress_bar: from tqdm import tqdm reps = Parallel(n_jobs=n_jobs)(delayed(get_cmbdf)(charge, cood, convs, pad, rcut,n_atm, gradients=False) for charge,cood in tqdm(list(zip(charges,coords)))) else: reps = Parallel(n_jobs=n_jobs)(delayed(get_cmbdf)(charge, cood, convs, pad, rcut,n_atm, gradients=False) for charge,cood in list(zip(charges,coords))) return np.asarray(reps) else: keys = np.unique(np.concatenate(charges)) asize = {key:max([(mol == key).sum() for mol in charges]) for key in keys} rep_size = sum(asize.values()) if progress_bar: from tqdm import tqdm reps = Parallel(n_jobs=n_jobs)(delayed(get_cmbdf_global)(charge, cood,asize,rep_size,keys, convs, rcut,n_atm) for charge,cood in tqdm(list(zip(charges,coords)))) else: reps = Parallel(n_jobs=n_jobs)(delayed(get_cmbdf_global)(charge, cood, asize,rep_size,keys, convs,rcut,n_atm) for charge,cood in list(zip(charges,coords))) return np.asarray(reps)