import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from scipy.optimize import differential_evolution


# Load in data
prod_data_url = r'https://raw.githubusercontent.com/ajmz1/DCA_optimizer/master/20210818_spe_rta_prod_data.csv'
df = pd.read_csv(prod_data_url)


# First 5 rows of data for inspection
df.head()


# "Summary statistics" of full dataframe
df.describe()


# Determine which well lease has the largest amount of days on production for analysis
max_DOP = df.groupby('Lease')['Time (Days)'].max().sort_values(ascending=False)
max_DOP[:5]

Lease
LORIKEET     4031.0
EGRET        3692.0
OSTRICH      3158.0
GOOSE        2846.0
CASSOWARY    2393.0
Name: Time (Days), dtype: float64


# Filter main dataframe to keep only the well with the greatest production history data
df1 = df[df['Lease'] == max_DOP.index[0]]
df1.head()


# Filter the dataframe to keep only the most important columns for analysis
df1_cols = df1.columns
cols = [df1_cols[1], df1_cols[3], df1_cols[5], df1_cols[7], df1_cols[8], df1_cols[12]]
pressure_calc_method = df1_cols[11]

df2 = df1[cols].reset_index(drop=True)
df2['Gas Volume Cumulative (MMscf)'] = df2['Gas Volume (MMscf)'].cumsum()
df2


# Make total plot
def production_y_pressure_plot():
    fig = make_subplots(specs=[[{'secondary_y': True}]])

    secondary_y = [False, False, False, True, True, True, True, False]
    for idx, col_name in enumerate(df2.columns[1:]):
        if idx == 0:
            fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2[col_name]*1000, name=col_name), secondary_y=secondary_y[idx])
        else:
            fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2[col_name], name=col_name), secondary_y=secondary_y[idx])

    fig.update_layout(title='Production and Pressure Plot', 
                 legend={
                     'orientation': 'h',
                     'yanchor': 'top',
                     'xanchor': 'right',
                     'x': 0.75,
                     'y': -0.15,
                 })
    fig.update_yaxes(title_text='Gas Volume (Mscf) <br> Water Volume (stb) <br> Cumulative Gas Volume (MMscf)', secondary_y=False,
                    range=[0, 10000])
    fig.update_yaxes(title_text='Casing Pressure (psi) <br> Tubing Pressure (psi) <br> Calculated Sandface Pressure (psi)',
                    secondary_y=True, range=[0, 5000])    
    return fig.show()

# Make the specific production plot that is a subplot visual
def production_plot():
    fig = make_subplots(rows=2, cols=2, subplot_titles=('Cartesian Production Volume', 'Semilog-y Production Volume',
                                                       'Cumulative Production Volume', 'Log-Log Production Volume'))
    fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], name='Gas Volume (MMscf)'), row=1, col=1)
    fig.update_layout(
                 legend={
                     'orientation': 'h',
                     'yanchor': 'top',
                     'xanchor': 'right',
                     'x': 0.75,
                     'y': -0.15,
                 })
    
    fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], name='Gas Volume (MMscf)'), row=1, col=2)
    fig.update_yaxes(type='log', row=1, col=2)
    
    fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume Cumulative (MMscf)'], name='Cumulative Gas Volume (MMscf)'), row=2, col=1)
    
    fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], name='Cumulative Gas Volume (MMscf)'), row=2, col=2)
    fig.update_yaxes(type='log', row=2, col=2)
    fig.update_xaxes(type='log', row=2, col=2)
    return fig.show()


production_y_pressure_plot()


production_plot()


# Define the arrays that will be varied for the plotting
qi = 12
b = np.linspace(0.1, 2.0, 12)

# Main hyperbolic decline function that is utilized
def hyperbolic_func(qi, b, Di):
    q_model = qi / (1 + b * Di * df2['Time (Days)'])**(1/b)
    return q_model

# Output the varied b-factor to illustrate sensitivity in the curvature
fig = go.Figure()
for i in range(len(b)):
    if i == 0:
        q_vary_b = hyperbolic_func(qi, b[i], 0.005)
        fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=q_vary_b, name=f'b= {b[i]:.2f}'))
    else:
        q_vary_b = np.column_stack((q_vary_b, hyperbolic_func(qi, b[i], 0.005)))
        fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=q_vary_b[:, i], name=f'b= {b[i]:.2f}'))
fig.update_layout(title=r'b-factor sensitivity with q<sub>i</sub>=12 and D<sub>i</sub>=0.005', 
                 xaxis_title='Days on Production', yaxis_title='Gas Volume (MMscf)')
    
fig.show()


qi = 12
Di = np.logspace(-4, -1, 12)

# Output the varied Di-factor to illustrate sensitivity in the decline parameter
fig = go.Figure()
for i in range(len(Di)):
    if i == 0:
        q_vary_d = hyperbolic_func(qi, 1, Di[i])
        fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=q_vary_d, name=f'D<sub>i</sub>= {Di[i]:.2e}'))
    else:
        q_vary_d = np.column_stack((q_vary_d, hyperbolic_func(qi, 1, Di[i])))
        fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=q_vary_d[:, i], name=f'D<sub>i</sub>= {Di[i]:.2e}'))
fig.update_layout(title=r'D<sub>i</sub>-factor sensitivity with q<sub>i</sub>=12 and b=1', 
                 xaxis_title='Days on Production', yaxis_title='Gas Volume (MMscf)')
    
fig.show()


# ==========================================================
# Input the test values here for your model
qi_test1 = 12
b_test1 = 1.05
Di_test1 = 0.029

qi_test2 = 8
b_test2 = 0.7
Di_test2 = 0.01

# ==========================================================


# Compute the model and plot the 4 visuals we will analyze
test1 = hyperbolic_func(qi_test1, b_test1, Di_test1)
test2 = hyperbolic_func(qi_test2, b_test2, Di_test2)

fig = make_subplots(rows=4, cols=1, subplot_titles=('Cartesian Daily Production', 'Semilog-y Daily Production', 'Log-Log Daily Production', 'Cartesian Cumulative Production'))

# Figure 1: Cartesian plot of daily gas volume v. time
fig.add_trace(
    go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
              marker=dict(
                  size=8,
                  color='rgba(135, 206, 250, 0.)',
                  line=dict(
                      color='black',
                      width=1
                  )
              )
            ), row=1, col=1
)

fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5)), row=1, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5)), row=1, col=1)


# Figure 2: Semilog-y plot of daily gas volume v. time
fig.add_trace(
    go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
              marker=dict(
                  size=8,
                  color='rgba(135, 206, 250, 0.)',
                  line=dict(
                      color='black',
                      width=1
                  )
              ), showlegend=False
            ), row=2, col=1
)

fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=2, col=1)


# Figure 3: log-log plot of daily gas volume v. time
fig.add_trace(
    go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
              marker=dict(
                  size=8,
                  color='rgba(135, 206, 250, 0.)',
                  line=dict(
                      color='black',
                      width=1
                  )
              ), showlegend=False
            ), row=3, col=1
)

fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=3, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=3, col=1)


# Figure 4: Cartesian plot of cumulative gas volume v. time
fig.add_trace(
    go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume Cumulative (MMscf)'], mode='markers', name='Data',
              marker=dict(
                  size=8,
                  color='rgba(135, 206, 250, 0.)',
                  line=dict(
                      color='black',
                      width=1
                  )
              ), showlegend=False
            ), row=4, col=1
)

fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1.cumsum(), name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=4, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2.cumsum(), name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=4, col=1)


fig.update_yaxes(type='log', tickformat='.0e', row=2, col=1)
fig.update_xaxes(type='log', tickformat='.0e', row=3, col=1)
fig.update_yaxes(type='log', tickformat='.0e', row=3, col=1)
fig.update_layout(width=2000, height=1500, margin=dict(l=0, r=0, b=25, t=25))


fig.show()


# Eggholder function that reports the MSE that we aim to minimize
def eggholder(args):
    qi, b, Di = args[0], args[1], args[2]
    q_model = hyperbolic_func(qi, b, Di)
    q_model_cum = q_model.cumsum()
    
    mse = (((q_model - df2['Gas Volume (MMscf)'])**2).sum() / len(q_model)) + (((q_model_cum - df2['Gas Volume Cumulative (MMscf)'])**2).sum() / len(q_model_cum))
    return mse

# Run the optimization function with errors provided
bounds = [(4, 15), (0.01, 2), (0, 1)]
result = differential_evolution(eggholder, bounds)

# Illustrate the output of the optimization results
result

     fun: 43.935862119948396
     jac: array([1.37653374, 3.36911297, 1.97578487])
 message: 'Optimization terminated successfully.'
    nfev: 3528
     nit: 75
 success: True
       x: array([7.29921299, 0.88783905, 0.01149926])


# Generate the scipy model hyperbolic decline
scipy_model = hyperbolic_func(result.x[0], result.x[1], result.x[2])

# Create the 4 subplots
fig = make_subplots(rows=2, cols=2, 
                   subplot_titles=('Cartesian Daily Production', 'Semilog-y Daily Production', 'Log-Log Daily Production',
                                  'Cartesian Cumulative Production'))

# Figure 1: Cartesian plot of daily gas volume v. time
fig.add_trace(
    go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
              marker=dict(
                  size=8,
                  color='rgba(135, 206, 250, 0.)',
                  line=dict(
                      color='black',
                      width=1
                  )
              )
            ), row=1, col=1
)

fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5)), row=1, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5)), row=1, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=scipy_model, name='Scipy Model', line=dict(color='lightgreen', width=3.5)), row=1, col=1)


# Figure 2: Semilog-y plot of daily gas volume v. time
fig.add_trace(
    go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
              marker=dict(
                  size=8,
                  color='rgba(135, 206, 250, 0.)',
                  line=dict(
                      color='black',
                      width=1
                  )
              ), showlegend=False
            ), row=1, col=2
)

fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=1, col=2)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=1, col=2)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=scipy_model, name='Scipy Model', line=dict(color='lightgreen', width=3.5), showlegend=False), row=1, col=2)


# Figure 3: log-log plot of daily gas volume v. time
fig.add_trace(
    go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
              marker=dict(
                  size=8,
                  color='rgba(135, 206, 250, 0.)',
                  line=dict(
                      color='black',
                      width=1
                  )
              ), showlegend=False
            ), row=2, col=1
)

fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=scipy_model, name='Scipy Model', line=dict(color='lightgreen', width=3.5), showlegend=False), row=2, col=1)


# Figure 4: Cartesian plot of cumulative gas volume v. time
fig.add_trace(
    go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume Cumulative (MMscf)'], mode='markers', name='Data',
              marker=dict(
                  size=8,
                  color='rgba(135, 206, 250, 0.)',
                  line=dict(
                      color='black',
                      width=1
                  )
              ), showlegend=False
            ), row=2, col=2
)

fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1.cumsum(), name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=2, col=2)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2.cumsum(), name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=2, col=2)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=scipy_model.cumsum(), name='Scipy Model', line=dict(color='lightgreen', width=3.5), showlegend=False), row=2, col=2)


fig.update_yaxes(type='log', tickformat='.0e', row=1, col=2)
fig.update_xaxes(type='log', tickformat='.0e', row=2, col=1)
fig.update_yaxes(type='log', tickformat='.0e', row=2, col=1)
fig.update_layout(width=2000, height=1000, margin=dict(l=0, r=0, b=25, t=25))


fig.show()

	Lease	Time (Days)	Choke Size	Gas Volume (MMscf)	Oil Volume (stb)	Water Volume (stb)	Gas Lift Inj Volume (MMscf)	Casing Pressure (psi(a))	Tubing Pressure (psi(a))	Active Pressure (psi(a))	Line Pressure (psi(a))	Pressure Source	Calculated Sandface Pressure (psi(a))
0	OSPREY	1.0	NaN	0.145	504.39	718.0	NaN	2064.695943	14.695943	2064.695943	14.695943	Casing Pressure	5050.159793
1	OSPREY	2.0	NaN	0.186	564.76	922.0	NaN	1989.695943	14.695943	1989.695943	14.695943	Casing Pressure	5009.599839
2	OSPREY	3.0	NaN	0.231	653.51	753.0	NaN	1864.695943	14.695943	1864.695943	14.695943	Casing Pressure	4795.991972
3	OSPREY	4.0	NaN	0.268	740.71	700.0	NaN	1814.695943	14.695943	1814.695943	14.695943	Casing Pressure	4696.626023
4	OSPREY	5.0	NaN	0.261	678.06	530.0	NaN	1714.695943	14.695943	1714.695943	14.695943	Casing Pressure	4546.990059

	Time (Days)	Choke Size	Gas Volume (MMscf)	Oil Volume (stb)	Water Volume (stb)	Gas Lift Inj Volume (MMscf)	Casing Pressure (psi(a))	Tubing Pressure (psi(a))	Active Pressure (psi(a))	Line Pressure (psi(a))	Calculated Sandface Pressure (psi(a))
count	60967.000000	2288.000000	60748.000000	52537.000000	60958.000000	7651.000000	60967.000000	60967.000000	60967.000000	60967.000000	60137.000000
mean	852.282382	27.877185	5.982365	36.450496	77.349569	0.219279	1283.679384	759.501393	1390.495107	696.823718	2299.493810
std	824.906531	9.385130	7.607748	105.729334	149.923524	0.158324	1514.521635	899.819732	1465.415127	412.611400	1833.520500
min	0.000000	8.000000	0.000000	0.000000	0.000000	0.000000	-14.650000	-14.650000	-14.650000	0.000000	6.780602
25%	255.000000	22.000000	0.235618	0.000000	1.800000	0.000000	254.178000	0.000000	671.849500	405.369500	1161.582338
50%	574.000000	26.000000	3.878150	0.000000	9.625000	0.294000	891.922000	701.079000	972.141000	823.305000	1572.866109
75%	1178.000000	32.000000	8.115673	0.000000	78.000000	0.350000	1327.225500	1095.000000	1350.579000	1048.623500	2640.255039
max	4031.000000	48.000000	61.548720	1249.000000	2287.000000	0.532000	10239.180000	8499.389000	8700.180000	1310.234000	11093.735290

	Lease	Time (Days)	Choke Size	Gas Volume (MMscf)	Water Volume (stb)	Gas Lift Inj Volume (MMscf)	Casing Pressure (psi(a))	Active Pressure (psi(a))	Line Pressure (psi(a))	Pressure Source	Calculated Sandface Pressure (psi(a))
12239	LORIKEET	1.0	NaN	4.45783	332.0	NaN	7275.902	7275.902	1176.263	Casing Pressure	9243.492426
12240	LORIKEET	2.0	NaN	10.23198	482.0	NaN	6158.324	6158.324	1234.895	Casing Pressure	7786.533870
12241	LORIKEET	3.0	NaN	11.90695	541.0	NaN	5665.502	5665.502	1224.994	Casing Pressure	7222.107405
12242	LORIKEET	4.0	NaN	11.73224	556.0	NaN	5339.542	5339.542	1218.640	Casing Pressure	6869.085268
12243	LORIKEET	5.0	NaN	10.88005	538.0	NaN	5044.625	5044.625	1204.742	Casing Pressure	6550.734048

	Time (Days)	Gas Volume (MMscf)	Water Volume (stb)	Casing Pressure (psi(a))	Tubing Pressure (psi(a))	Calculated Sandface Pressure (psi(a))	Gas Volume Cumulative (MMscf)
0	1.0	4.45783	332.00	7275.902	0.000	9243.492426	4.45783
1	2.0	10.23198	482.00	6158.324	0.000	7786.533870	14.68981
2	3.0	11.90695	541.00	5665.502	0.000	7222.107405	26.59676
3	4.0	11.73224	556.00	5339.542	0.000	6869.085268	38.32900
4	5.0	10.88005	538.00	5044.625	0.000	6550.734048	49.20905
...	...	...	...	...	...	...	...
4026	4027.0	0.14167	0.00	1380.025	1224.612	1512.040190	2123.90747
4027	4028.0	0.06459	0.14	1404.532	1250.867	2666.041664	2123.97206
4028	4029.0	0.14653	1.01	1411.368	1245.581	2403.007754	2124.11859
4029	4030.0	0.06990	0.00	1372.138	1221.453	1507.705822	2124.18849
4030	4031.0	0.14285	0.00	1405.675	1248.167	1541.461046	2124.33134

DCA Analysis: Optimizing Hyperbolic Decline Models¶

Import necessary libraries¶

Load, review, and filter production data¶

Generate overview plots for the well data¶

Hyperbolic decline primer¶

Determining an initial guess for optimization in the decline model¶

Leveraging differential evolution for finding global minimum¶

Visualize the results¶