python - Parameters estimation with multiple data sets using Gekko

Question

Welcome To Ask or Share your Answers For Others

python - Parameters estimation with multiple data sets using Gekko

posted Oct 7, 2021 in Technique[技术] by 深蓝 (71.8m points)

python - Parameters estimation with multiple data sets using Gekko

Following @John Hedengren ideas I modified my code (Parameters estimation and least squares minimization - Python and Gekko) and added another set of data following the explanations from here: How to set up GEKKO for parameter estimation from multiple independent sets of data? However, I am not reaching a good solution, particularly for measurement 4. Have you got any ideas about how can I adjust it better? Thank you for your time and help.

from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt 
import math as math
import pandas as pd

#data set 1
t_data1 = [0.08333,0.5,1,4,22.61667]
x_data1 = [7.77e-5,7e-5,7e-5,6.22e-5,3.89e-5]
x_data1mgl = [4,3.6,3.6,3.2,2]

#data set 2
t_data4 = [0.08333,0.5,1,4,22.61667]
x_data4 = [2.92e-4,2.72e-4,2.92e-4,2.72e-4,2.14e-4]
x_data4mgl = [15,14,15,14,11]

#combine data and time in the same dataframe
data1 = pd.DataFrame({'time':t_data1,'x1':x_data1})
data4 = pd.DataFrame({'time':t_data4,'x4':x_data4})
data1.set_index('time', inplace=True)
data4.set_index('time', inplace=True)


# merge dataframes
data = data1.join(data4, how='outer')

# simulation time points
dftp = pd.DataFrame({'time':np.linspace(0,25,51)})
dftp.set_index('time', inplace=True)

# merge dataframes
df = data.join(dftp,how='outer')

# get True (1) or False (0) for measurement
#df['meas'] = (df['x'].values==df['x'].values).astype(int)
z1 = (df['x1']==df['x1']).astype(int)
z4 = (df['x4']==df['x4']).astype(int)


# replace NaN with zeros
df0 = df.fillna(value=0)

#Estimator Model
m = GEKKO(remote=True)
#m = GEKKO(remote=False) # remote=False to produce local folder with results

m.time = df.index.values

# measurements - Gekko arrays to store measured data
xm = m.Array(m.Param,2)
xm[0].value = df0['x1'].values
xm[1].value = df0['x4'].values


cl=m.Array(m.Var,2)
cl[0].value= 4e-5
cl[1].value= 1.33e-4


TOC=m.Array(m.Var,2)
TOC[0].value= 11.85
TOC[1].value= 11.85


C0=m.Array(m.Var,2)
C1=m.Array(m.Var,2)
cC1=m.Array(m.Var,2)
C2=m.Array(m.Var,2)
C2m=m.Array(m.Var,2)
cC2=m.Array(m.Var,2)
C3=m.Array(m.Var,2)
C4=m.Array(m.Var,2)
C5=m.Array(m.Var,2)
C6=m.Array(m.Var,2)
C7=m.Array(m.Var,2)
C8=m.Array(m.Var,2)
C9=m.Array(m.Var,2)
C10=m.Array(m.Var,2)
C11=m.Array(m.Var,2)
C12=m.Array(m.Var,2)

r1=m.Array(m.Var,2)
r2=m.Array(m.Var,2)
r3=m.Array(m.Var,2)
r4=m.Array(m.Var,2)
r5=m.Array(m.Var,2)
r6=m.Array(m.Var,2)
r7=m.Array(m.Var,2)
r8=m.Array(m.Var,2)
r9=m.Array(m.Var,2)
r10=m.Array(m.Var,2)
r11=m.Array(m.Var,2)
r12=m.Array(m.Var,2)

k5=m.Array(m.Var,2)

#Define GEKKO variables that determine if time point contains data to be used in regression
#index for objective (0=not measured, 1=measured)
zm = m.Array(m.Param,2)
zm[0].value=z1
zm[1].value=z4


# fit to measurement
x=m.Array(m.Var,2)                         
x[0].value=x_data1[0]
x[1].value=x_data4[0]


#adjustable parameters
kdoc1 = 0 #m-1h-1
kdoc2 = m.FV(1,lb=0.01,ub=10) #m-1h-1
S1 = 0
S2 = m.FV(1,lb=0.01,ub=10)

#Define Variables
for i in range(2):

#State variables
# M (mol/L)
    #C0 = m.Var(cl[i])
    C0[i] = m.Var(value=cl[i],lb=0,ub=1e-3)
    C1[i] = m.Var(value=6.9e-6,lb=0,ub=2e-3)
    C2[i] = m.Var(value=x[i],lb=0, ub=1e-3)
    C2m[i] = m.Param(xm[i],lb=0)
    meas = m.Param(zm[i])
    m.Minimize(meas*((C2[i]-C2m[i]))**2)
    C3[i] = m.Var(value=0)
    C4[i] = m.Var(value=3.16e-8)
    C5[i] = m.Var(value=3.83e-7)
    C6[i] = m.Var(value=0)
    C7[i]= m.Var(value=0)
    C8[i] = m.Var(value=0)
    C9[i] = m.Var(value=0)
    C10[i] = m.Var(value=0)
    C11[i] = m.Var(value=0)
    C12[i] = m.Var(value=3.21e-3)
    TOC[i] = m.Var(value=5.93)#mg-C/L 
    cC1[i] = m.Var(value=0)
    cC2[i] = m.Var(value=0)

#temperature oC
    T = 20 

#Rate constants
    k1 = 2040000000*math.exp(-1887/(273.15+T))*3600
    k2 = 138000000*math.exp(-8800/(273.15+T))*3600
    k3 = 300000*math.exp(-2010/(273.15+T))*3600
    k4 = 0.00000065*3600
    k6 = 26700*3600
    k7 = 167*3600
    k8 = 27700*3600
    k9 = 8300*3600
    k10 = 0
    k5[i] = 37800000000*math.exp(-2169/(273.15+T))*C4[i]
         +2.52e25*math.exp(-16860/(273.15+T))*C10[i]
         +0.87*math.exp(-503/(273.15+T))*C9[i]
 
    r1[i] = k1 * C0[i] * C1[i]
    r2[i] = k2 * C2[i]
    r3[i] = k3 * C0[i] * C2[i]
    r4[i] = k4 * C3[i]
    r5[i] = k5[i] * C2[i] * C2[i]
    r6[i] = k6 * C3[i] * C1[i]* C4[i]
    r7[i] = k7 * C3[i] * C5[i]
    r8[i] = k8 * C6[i] * C3[i]
    r9[i] = k9 * C6[i] * C2[i]
    r10[i] = k10 * C2[i] * C3[i]
    r11[i] = kdoc1*S1*TOC[i]*C2[i]/12000
    r12[i] = kdoc2*S2*TOC[i]*C0[i]/12000

    t = m.Param(value=m.time)
    m.Equation(C0[i].dt()== -r1[i] + r2[i] - r3[i] + r4[i] + r8[i] - r12[i])
    m.Equation(C1[i].dt()== -r1[i] + r2[i] + r5[i] - r6[i] + r11[i])
    m.Equation(C2[i].dt()== r1[i] - r2[i] - r3[i] + r4[i] - r5[i] + r6[i] - r9[i] - r10[i] - r11[i])
    m.Equation(C3[i].dt()== r3[i] - r4[i] + r5[i] - r6[i] - r7[i] - r8[i] - r10[i])
    m.Equation(C4[i].dt()== 0)
    m.Equation(C5[i] == 1e-14/C4[i])
    m.Equation(C6[i].dt()== r7[i] - r8[i] - r9[i])
    m.Equation(C7[i] == (3.16e-8*C0[i])/C4[i])
    m.Equation(C1[i] == (5.01e-10*C8[i])/C4[i])
    m.Equation(C11[i] == (5.01e-11*C10[i])/C4[i])
    m.Equation(C9[i] == (5.01e-7*C9[i])/C4[i])
    m.Equation(C10[i] == C12[i] - 2*C11[i] - C5[i] + C4[i])
    m.Equation(C12[i].dt()== 0)
    m.Equation(TOC[i].dt()== 0)
    m.Equation(cC1[i] == 17000*C1[i])
    m.Equation(cC2[i] == 51500*C2[i])
    m.Equation(S1+S2<1)

#Application options
m.options.SOLVER = 3 #IPOPT solver
m.options.IMODE = 5 #Dynamic Simultaneous - estimation = MHE
m.options.EV_TYPE = 2 #absolute error
m.options.NODES = 3 #collocation nodes (2,5)

if True:
    #kdoc1.STATUS=1
    kdoc2.STATUS=1
    #S1.STATUS=1
    S2.STATUS=1

#Solve
#m.open_folder() # open folder if remote=False to see infeasibilities.txt
m.solve(disp=True)


print('Final SSE Objective: ' + str(m.options.objfcnval))

print('Solution')
#print('kdoc1 = ' + str(kdoc1.value[0]))
print('kdoc2 = ' + str(kdoc2.value[0]))
#print('S1 = ' + str(S1.value[0]))
print('S2 = ' + str(S2.value[0]))

plt.figure(1,figsize=(8,5))
plt.subplot(2,1,1)
plt.plot(m.time,C2[0],'b.--',label='Predicted 1')
plt.plot(m.time,C2[1],'r.--',label='Predicted 4')


plt.plot(t_data1,x_data1,'bx',label='Meas 1')
plt.plot(t_data4,x_data4,'rx',label='Meas 4')
plt.legend(loc='best')

plt.subplot(2,1,2)
plt.plot(m.time,cC2[0].value,'b',label ='C2_M1')
plt.plot(m.time,cC2[1].value,'r',label ='C2_M4')
plt.plot(t_data1,x_data1mgl,'bx',label='Meas 1')
plt.plot(t_data4,x_data4mgl,'rx',label='Meas 4')


#plt.plot(m.time,cC1.value,label ='C1')
plt.legend(loc='best')
plt.xlabel('time (h)')

plt.figure(2,figsize=(12,8))
plt.subplot(4,3,1)
plt.plot(m.time,C0[i].value,label ='C0')
plt.legend()

plt.subplot(4,3,2)
plt.plot(m.time,C1[i].value,label ='C1')
plt.legend()

plt.subplot(4,3,3)
plt.plot(m.time,C3[i].value,label ='C3')
plt.legend()

plt.subplot(4,3,4)
plt.plot(m.time,C4[i].value,label ='C4')
plt.legend()

plt.subplot(4,3,5)
plt.plot(m.time,C5[i].value,label ='C5')
plt.legend()

plt.subplot(4,3,6)
plt.plot(m.time,C6[i].value,label ='C6')
plt.legend()

plt.subplot(4,3,7)
plt.plot(m.time,C7[i].value,label ='C7')
plt.legend()

plt.subplot(4,3,8)
plt.plot(m.time,C8[i].value,label ='C8')
plt.legend()

plt.subplot(4,3,9)
plt.plot(m.time,C9[i].value,label ='C9')
plt.legend()
plt.xlabel('time (h)')

plt.subplot(4,3,10)
plt.plot(m.time,C10[i].value,label ='C10')
plt.legend()
plt.xlabel('time (h)')

plt.subplot(4,3,11)
plt.plot(m.time,C11[i].value,label ='C11')
plt.legend()
plt.xlabel('time (h)')

plt.subplot(4,3,12)
plt.plot(m.time,C12[i].value,label ='C12')
plt.legend()
plt.xlabel('time (h)')

plt.show()

question from:https://stackoverflow.com/questions/65852474/parameters-estimation-with-multiple-data-sets-using-gekko

与恶龙缠斗过久,自身亦成为恶龙；凝视深渊过久,深渊将回以凝视…

1 Reply

深蓝 · Answer 1 · 2021-10-06T19:28:48+0000

Here are the results from your script:

There are a couple issues that may improve the fit:

There are very fast dynamics in the first time step. The concentrations of certain starting species has a big influence on how much these values change. You will likely need to use your knowledge of the experiment to estimate the concentrations of the initial conditions for the chemical compositions.
If you don't know the initial conditions or want the solver to estimate those then use

m.free_initial(x)

for any initial concentration that you'd like to calculate, substituting x for the name of the variable. Unfortunately, I don't have a lot more to offer on this problem because it would require more knowledge about the experiment and the equations used to model the dynamics.

Categories

python - Parameters estimation with multiple data sets using Gekko

python - Parameters estimation with multiple data sets using Gekko

Please log in or register to add a comment.

Please log in or register to reply this article.

1 Reply

Please log in or register to add a comment.

Just Browsing Browsing

Most popular tags