Anthony Jimenez
18 August 2021
http://www.fa-jimenez.com/
This data is sourced from the Society of Petroleum Engineers Data Repository as part of the SPE Bleeding Edge of RTA Group (SPE-BERG).
SPE Data Repository: Data Set: dataset_1, Well Number: all_wells. From URL: https://www.spe.org/datasets/dataset_1/csv_files/dataset_1_all_wells/production_data
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from scipy.optimize import differential_evolution
Steps accomplished in this section:
Analysis will be conducted on the LORIKEET well since this has 4031 days of production (DOP)
# Load in data
prod_data_url = r'https://raw.githubusercontent.com/ajmz1/DCA_optimizer/master/20210818_spe_rta_prod_data.csv'
df = pd.read_csv(prod_data_url)
# First 5 rows of data for inspection
df.head()
Lease | Time (Days) | Choke Size | Gas Volume (MMscf) | Oil Volume (stb) | Water Volume (stb) | Gas Lift Inj Volume (MMscf) | Casing Pressure (psi(a)) | Tubing Pressure (psi(a)) | Active Pressure (psi(a)) | Line Pressure (psi(a)) | Pressure Source | Calculated Sandface Pressure (psi(a)) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | OSPREY | 1.0 | NaN | 0.145 | 504.39 | 718.0 | NaN | 2064.695943 | 14.695943 | 2064.695943 | 14.695943 | Casing Pressure | 5050.159793 |
1 | OSPREY | 2.0 | NaN | 0.186 | 564.76 | 922.0 | NaN | 1989.695943 | 14.695943 | 1989.695943 | 14.695943 | Casing Pressure | 5009.599839 |
2 | OSPREY | 3.0 | NaN | 0.231 | 653.51 | 753.0 | NaN | 1864.695943 | 14.695943 | 1864.695943 | 14.695943 | Casing Pressure | 4795.991972 |
3 | OSPREY | 4.0 | NaN | 0.268 | 740.71 | 700.0 | NaN | 1814.695943 | 14.695943 | 1814.695943 | 14.695943 | Casing Pressure | 4696.626023 |
4 | OSPREY | 5.0 | NaN | 0.261 | 678.06 | 530.0 | NaN | 1714.695943 | 14.695943 | 1714.695943 | 14.695943 | Casing Pressure | 4546.990059 |
# "Summary statistics" of full dataframe
df.describe()
Time (Days) | Choke Size | Gas Volume (MMscf) | Oil Volume (stb) | Water Volume (stb) | Gas Lift Inj Volume (MMscf) | Casing Pressure (psi(a)) | Tubing Pressure (psi(a)) | Active Pressure (psi(a)) | Line Pressure (psi(a)) | Calculated Sandface Pressure (psi(a)) | |
---|---|---|---|---|---|---|---|---|---|---|---|
count | 60967.000000 | 2288.000000 | 60748.000000 | 52537.000000 | 60958.000000 | 7651.000000 | 60967.000000 | 60967.000000 | 60967.000000 | 60967.000000 | 60137.000000 |
mean | 852.282382 | 27.877185 | 5.982365 | 36.450496 | 77.349569 | 0.219279 | 1283.679384 | 759.501393 | 1390.495107 | 696.823718 | 2299.493810 |
std | 824.906531 | 9.385130 | 7.607748 | 105.729334 | 149.923524 | 0.158324 | 1514.521635 | 899.819732 | 1465.415127 | 412.611400 | 1833.520500 |
min | 0.000000 | 8.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -14.650000 | -14.650000 | -14.650000 | 0.000000 | 6.780602 |
25% | 255.000000 | 22.000000 | 0.235618 | 0.000000 | 1.800000 | 0.000000 | 254.178000 | 0.000000 | 671.849500 | 405.369500 | 1161.582338 |
50% | 574.000000 | 26.000000 | 3.878150 | 0.000000 | 9.625000 | 0.294000 | 891.922000 | 701.079000 | 972.141000 | 823.305000 | 1572.866109 |
75% | 1178.000000 | 32.000000 | 8.115673 | 0.000000 | 78.000000 | 0.350000 | 1327.225500 | 1095.000000 | 1350.579000 | 1048.623500 | 2640.255039 |
max | 4031.000000 | 48.000000 | 61.548720 | 1249.000000 | 2287.000000 | 0.532000 | 10239.180000 | 8499.389000 | 8700.180000 | 1310.234000 | 11093.735290 |
# Determine which well lease has the largest amount of days on production for analysis
max_DOP = df.groupby('Lease')['Time (Days)'].max().sort_values(ascending=False)
max_DOP[:5]
Lease LORIKEET 4031.0 EGRET 3692.0 OSTRICH 3158.0 GOOSE 2846.0 CASSOWARY 2393.0 Name: Time (Days), dtype: float64
# Filter main dataframe to keep only the well with the greatest production history data
df1 = df[df['Lease'] == max_DOP.index[0]]
df1.head()
Lease | Time (Days) | Choke Size | Gas Volume (MMscf) | Oil Volume (stb) | Water Volume (stb) | Gas Lift Inj Volume (MMscf) | Casing Pressure (psi(a)) | Tubing Pressure (psi(a)) | Active Pressure (psi(a)) | Line Pressure (psi(a)) | Pressure Source | Calculated Sandface Pressure (psi(a)) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
12239 | LORIKEET | 1.0 | NaN | 4.45783 | 0.0 | 332.0 | NaN | 7275.902 | 0.0 | 7275.902 | 1176.263 | Casing Pressure | 9243.492426 |
12240 | LORIKEET | 2.0 | NaN | 10.23198 | 0.0 | 482.0 | NaN | 6158.324 | 0.0 | 6158.324 | 1234.895 | Casing Pressure | 7786.533870 |
12241 | LORIKEET | 3.0 | NaN | 11.90695 | 0.0 | 541.0 | NaN | 5665.502 | 0.0 | 5665.502 | 1224.994 | Casing Pressure | 7222.107405 |
12242 | LORIKEET | 4.0 | NaN | 11.73224 | 0.0 | 556.0 | NaN | 5339.542 | 0.0 | 5339.542 | 1218.640 | Casing Pressure | 6869.085268 |
12243 | LORIKEET | 5.0 | NaN | 10.88005 | 0.0 | 538.0 | NaN | 5044.625 | 0.0 | 5044.625 | 1204.742 | Casing Pressure | 6550.734048 |
# Filter the dataframe to keep only the most important columns for analysis
df1_cols = df1.columns
cols = [df1_cols[1], df1_cols[3], df1_cols[5], df1_cols[7], df1_cols[8], df1_cols[12]]
pressure_calc_method = df1_cols[11]
df2 = df1[cols].reset_index(drop=True)
df2['Gas Volume Cumulative (MMscf)'] = df2['Gas Volume (MMscf)'].cumsum()
df2
Time (Days) | Gas Volume (MMscf) | Water Volume (stb) | Casing Pressure (psi(a)) | Tubing Pressure (psi(a)) | Calculated Sandface Pressure (psi(a)) | Gas Volume Cumulative (MMscf) | |
---|---|---|---|---|---|---|---|
0 | 1.0 | 4.45783 | 332.00 | 7275.902 | 0.000 | 9243.492426 | 4.45783 |
1 | 2.0 | 10.23198 | 482.00 | 6158.324 | 0.000 | 7786.533870 | 14.68981 |
2 | 3.0 | 11.90695 | 541.00 | 5665.502 | 0.000 | 7222.107405 | 26.59676 |
3 | 4.0 | 11.73224 | 556.00 | 5339.542 | 0.000 | 6869.085268 | 38.32900 |
4 | 5.0 | 10.88005 | 538.00 | 5044.625 | 0.000 | 6550.734048 | 49.20905 |
... | ... | ... | ... | ... | ... | ... | ... |
4026 | 4027.0 | 0.14167 | 0.00 | 1380.025 | 1224.612 | 1512.040190 | 2123.90747 |
4027 | 4028.0 | 0.06459 | 0.14 | 1404.532 | 1250.867 | 2666.041664 | 2123.97206 |
4028 | 4029.0 | 0.14653 | 1.01 | 1411.368 | 1245.581 | 2403.007754 | 2124.11859 |
4029 | 4030.0 | 0.06990 | 0.00 | 1372.138 | 1221.453 | 1507.705822 | 2124.18849 |
4030 | 4031.0 | 0.14285 | 0.00 | 1405.675 | 1248.167 | 1541.461046 | 2124.33134 |
4031 rows × 7 columns
# Make total plot
def production_y_pressure_plot():
fig = make_subplots(specs=[[{'secondary_y': True}]])
secondary_y = [False, False, False, True, True, True, True, False]
for idx, col_name in enumerate(df2.columns[1:]):
if idx == 0:
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2[col_name]*1000, name=col_name), secondary_y=secondary_y[idx])
else:
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2[col_name], name=col_name), secondary_y=secondary_y[idx])
fig.update_layout(title='Production and Pressure Plot',
legend={
'orientation': 'h',
'yanchor': 'top',
'xanchor': 'right',
'x': 0.75,
'y': -0.15,
})
fig.update_yaxes(title_text='Gas Volume (Mscf) <br> Water Volume (stb) <br> Cumulative Gas Volume (MMscf)', secondary_y=False,
range=[0, 10000])
fig.update_yaxes(title_text='Casing Pressure (psi) <br> Tubing Pressure (psi) <br> Calculated Sandface Pressure (psi)',
secondary_y=True, range=[0, 5000])
return fig.show()
# Make the specific production plot that is a subplot visual
def production_plot():
fig = make_subplots(rows=2, cols=2, subplot_titles=('Cartesian Production Volume', 'Semilog-y Production Volume',
'Cumulative Production Volume', 'Log-Log Production Volume'))
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], name='Gas Volume (MMscf)'), row=1, col=1)
fig.update_layout(
legend={
'orientation': 'h',
'yanchor': 'top',
'xanchor': 'right',
'x': 0.75,
'y': -0.15,
})
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], name='Gas Volume (MMscf)'), row=1, col=2)
fig.update_yaxes(type='log', row=1, col=2)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume Cumulative (MMscf)'], name='Cumulative Gas Volume (MMscf)'), row=2, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], name='Cumulative Gas Volume (MMscf)'), row=2, col=2)
fig.update_yaxes(type='log', row=2, col=2)
fig.update_xaxes(type='log', row=2, col=2)
return fig.show()
production_y_pressure_plot()
production_plot()
One of the most widely used decline models currently is the hyperbolic decline.
Outlined by J.J. Arps, the general form of the hyperbolic decline is:
$$q(t) = \frac{q_i}{(1 + b D_i t)^{1/b}}$$The components of the decline model are:
# Define the arrays that will be varied for the plotting
qi = 12
b = np.linspace(0.1, 2.0, 12)
# Main hyperbolic decline function that is utilized
def hyperbolic_func(qi, b, Di):
q_model = qi / (1 + b * Di * df2['Time (Days)'])**(1/b)
return q_model
# Output the varied b-factor to illustrate sensitivity in the curvature
fig = go.Figure()
for i in range(len(b)):
if i == 0:
q_vary_b = hyperbolic_func(qi, b[i], 0.005)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=q_vary_b, name=f'b= {b[i]:.2f}'))
else:
q_vary_b = np.column_stack((q_vary_b, hyperbolic_func(qi, b[i], 0.005)))
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=q_vary_b[:, i], name=f'b= {b[i]:.2f}'))
fig.update_layout(title=r'b-factor sensitivity with q<sub>i</sub>=12 and D<sub>i</sub>=0.005',
xaxis_title='Days on Production', yaxis_title='Gas Volume (MMscf)')
fig.show()
qi = 12
Di = np.logspace(-4, -1, 12)
# Output the varied Di-factor to illustrate sensitivity in the decline parameter
fig = go.Figure()
for i in range(len(Di)):
if i == 0:
q_vary_d = hyperbolic_func(qi, 1, Di[i])
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=q_vary_d, name=f'D<sub>i</sub>= {Di[i]:.2e}'))
else:
q_vary_d = np.column_stack((q_vary_d, hyperbolic_func(qi, 1, Di[i])))
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=q_vary_d[:, i], name=f'D<sub>i</sub>= {Di[i]:.2e}'))
fig.update_layout(title=r'D<sub>i</sub>-factor sensitivity with q<sub>i</sub>=12 and b=1',
xaxis_title='Days on Production', yaxis_title='Gas Volume (MMscf)')
fig.show()
Although we will ultimately use a scipy.optimize package to evaluate the decline model, we want to know how we should bound our model so the optimizer will have the greatest chance of being successful.
This next sections is where I was testing a few different values, however for brevity, I will only show two options that I tried.
# ==========================================================
# Input the test values here for your model
qi_test1 = 12
b_test1 = 1.05
Di_test1 = 0.029
qi_test2 = 8
b_test2 = 0.7
Di_test2 = 0.01
# ==========================================================
# Compute the model and plot the 4 visuals we will analyze
test1 = hyperbolic_func(qi_test1, b_test1, Di_test1)
test2 = hyperbolic_func(qi_test2, b_test2, Di_test2)
fig = make_subplots(rows=4, cols=1, subplot_titles=('Cartesian Daily Production', 'Semilog-y Daily Production', 'Log-Log Daily Production', 'Cartesian Cumulative Production'))
# Figure 1: Cartesian plot of daily gas volume v. time
fig.add_trace(
go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
marker=dict(
size=8,
color='rgba(135, 206, 250, 0.)',
line=dict(
color='black',
width=1
)
)
), row=1, col=1
)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5)), row=1, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5)), row=1, col=1)
# Figure 2: Semilog-y plot of daily gas volume v. time
fig.add_trace(
go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
marker=dict(
size=8,
color='rgba(135, 206, 250, 0.)',
line=dict(
color='black',
width=1
)
), showlegend=False
), row=2, col=1
)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=2, col=1)
# Figure 3: log-log plot of daily gas volume v. time
fig.add_trace(
go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
marker=dict(
size=8,
color='rgba(135, 206, 250, 0.)',
line=dict(
color='black',
width=1
)
), showlegend=False
), row=3, col=1
)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=3, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=3, col=1)
# Figure 4: Cartesian plot of cumulative gas volume v. time
fig.add_trace(
go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume Cumulative (MMscf)'], mode='markers', name='Data',
marker=dict(
size=8,
color='rgba(135, 206, 250, 0.)',
line=dict(
color='black',
width=1
)
), showlegend=False
), row=4, col=1
)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1.cumsum(), name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=4, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2.cumsum(), name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=4, col=1)
fig.update_yaxes(type='log', tickformat='.0e', row=2, col=1)
fig.update_xaxes(type='log', tickformat='.0e', row=3, col=1)
fig.update_yaxes(type='log', tickformat='.0e', row=3, col=1)
fig.update_layout(width=2000, height=1500, margin=dict(l=0, r=0, b=25, t=25))
fig.show()
From the scipy.optimize package, we will use the differential evolution algorithm for finding the global minimum. To do so however, we need an objective function that we aim to minimize.
The strategy we will employ will take into account the following two measures:
The eggholder function will be defined whose sole purpose is to test the different optimized variables and return the mean squared error. The return of the eggholder function is defined as follows:
$$MSE_{total} = MSE_{daily} + MSE_{cumulative}$$where,
$$MSE = \frac{1}{n} \sum_{i=1}^{n} (q_{model, i} - q_{data, i})^2$$# Eggholder function that reports the MSE that we aim to minimize
def eggholder(args):
qi, b, Di = args[0], args[1], args[2]
q_model = hyperbolic_func(qi, b, Di)
q_model_cum = q_model.cumsum()
mse = (((q_model - df2['Gas Volume (MMscf)'])**2).sum() / len(q_model)) + (((q_model_cum - df2['Gas Volume Cumulative (MMscf)'])**2).sum() / len(q_model_cum))
return mse
# Run the optimization function with errors provided
bounds = [(4, 15), (0.01, 2), (0, 1)]
result = differential_evolution(eggholder, bounds)
# Illustrate the output of the optimization results
result
fun: 43.935862119948396 jac: array([1.37653374, 3.36911297, 1.97578487]) message: 'Optimization terminated successfully.' nfev: 3528 nit: 75 success: True x: array([7.29921299, 0.88783905, 0.01149926])
To review our models, we replicate the 4 plots from above but now with all the models.
From personal analysis, there was a tendency to want to select an initial producion volume value near 12 MMscf/d, but the scipy optimized value is reduced closer to 7 MMscf/d.
If being used in company evaluations, should there be a desire for a higher initial production value, the bounds that are fed to the optimizer can be adjusted.
In future works, we can also use some of the other optimization packages from scipy like:
# Generate the scipy model hyperbolic decline
scipy_model = hyperbolic_func(result.x[0], result.x[1], result.x[2])
# Create the 4 subplots
fig = make_subplots(rows=2, cols=2,
subplot_titles=('Cartesian Daily Production', 'Semilog-y Daily Production', 'Log-Log Daily Production',
'Cartesian Cumulative Production'))
# Figure 1: Cartesian plot of daily gas volume v. time
fig.add_trace(
go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
marker=dict(
size=8,
color='rgba(135, 206, 250, 0.)',
line=dict(
color='black',
width=1
)
)
), row=1, col=1
)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5)), row=1, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5)), row=1, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=scipy_model, name='Scipy Model', line=dict(color='lightgreen', width=3.5)), row=1, col=1)
# Figure 2: Semilog-y plot of daily gas volume v. time
fig.add_trace(
go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
marker=dict(
size=8,
color='rgba(135, 206, 250, 0.)',
line=dict(
color='black',
width=1
)
), showlegend=False
), row=1, col=2
)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=1, col=2)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=1, col=2)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=scipy_model, name='Scipy Model', line=dict(color='lightgreen', width=3.5), showlegend=False), row=1, col=2)
# Figure 3: log-log plot of daily gas volume v. time
fig.add_trace(
go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume (MMscf)'], mode='markers', name='Data',
marker=dict(
size=8,
color='rgba(135, 206, 250, 0.)',
line=dict(
color='black',
width=1
)
), showlegend=False
), row=2, col=1
)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1, name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2, name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=scipy_model, name='Scipy Model', line=dict(color='lightgreen', width=3.5), showlegend=False), row=2, col=1)
# Figure 4: Cartesian plot of cumulative gas volume v. time
fig.add_trace(
go.Scatter(x=df2['Time (Days)'], y=df2['Gas Volume Cumulative (MMscf)'], mode='markers', name='Data',
marker=dict(
size=8,
color='rgba(135, 206, 250, 0.)',
line=dict(
color='black',
width=1
)
), showlegend=False
), row=2, col=2
)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test1.cumsum(), name='Model 1', line=dict(color='red', width=3.5), showlegend=False), row=2, col=2)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=test2.cumsum(), name='Model 2', line=dict(color='gold', width=3.5), showlegend=False), row=2, col=2)
fig.add_trace(go.Scatter(x=df2['Time (Days)'], y=scipy_model.cumsum(), name='Scipy Model', line=dict(color='lightgreen', width=3.5), showlegend=False), row=2, col=2)
fig.update_yaxes(type='log', tickformat='.0e', row=1, col=2)
fig.update_xaxes(type='log', tickformat='.0e', row=2, col=1)
fig.update_yaxes(type='log', tickformat='.0e', row=2, col=1)
fig.update_layout(width=2000, height=1000, margin=dict(l=0, r=0, b=25, t=25))
fig.show()