Rian Rachmanto commited on
Commit
6afbd8c
·
1 Parent(s): 2a325f2

changing from google to duckdb

Browse files
Files changed (2) hide show
  1. app.py +43 -55
  2. duckdcaapp.py +173 -0
app.py CHANGED
@@ -5,20 +5,18 @@ import seaborn as sns
5
  from google.cloud import bigquery
6
  from google.oauth2 import service_account
7
  import os
8
- import json # Import the json module for parsing JSON
9
  import matplotlib.pyplot as plt
10
  from scipy.optimize import curve_fit
11
 
12
- # Load the BigQuery credentials from the environment variable
13
- json_key = os.environ["BIGQUERY_KEY"]
 
14
 
15
- # Parse the JSON key string into a dictionary
16
- credentials_info = json.loads(json_key)
17
-
18
- # Create API client.
19
  credentials = service_account.Credentials.from_service_account_info(credentials_info)
20
  client = bigquery.Client(credentials=credentials)
21
 
 
22
  QUERY = """
23
  SELECT
24
  DATEPRD,
@@ -33,35 +31,33 @@ WHERE
33
  AND BORE_WAT_VOL IS NOT NULL
34
  ORDER BY
35
  NPD_WELL_BORE_NAME ASC, DATEPRD DESC;
36
- """
37
 
38
- # Run the query using the client
39
  query_job = client.query(QUERY)
40
 
41
  st.set_option('deprecation.showPyplotGlobalUse', False)
42
- # Streamlit app
43
  st.title("DECLINE CURVE ANALYSIS (DCA)")
44
 
45
-
46
-
47
  # Create data handler function
48
  def data_handler(query_job):
49
  results = query_job.result()
50
  df = results.to_dataframe()
51
  st.write(df.head())
52
 
53
- df_fil = df[(df['BORE_OIL_VOL'] > 0) & (df['BORE_GAS_VOL'] > 0) & (df['BORE_WAT_VOL'] > 0)]
 
 
 
54
  sns.set_theme(style="darkgrid")
55
  st.write(sns.relplot(
56
  data=df_fil,
57
  x="DATEPRD", y="BORE_OIL_VOL", col="NPD_WELL_BORE_NAME", hue="NPD_WELL_BORE_NAME",
58
  kind="line", palette="crest", linewidth=4, zorder=5,
59
- col_wrap=2, height=3, aspect=1.5, legend=False,
60
  ).fig)
61
 
62
- df_fil['DATEPRD'] = pd.to_datetime(df_fil['DATEPRD'])
63
-
64
- # Create a dataframe where 'BORE_OIL_VOL', 'BORE_GAS_VOL', 'BORE_WAT_VOL' is in monthly average
65
  df_monthly = df_fil.groupby(['NPD_WELL_BORE_NAME', pd.Grouper(key='DATEPRD', freq='M')]).mean()
66
  df_monthly = df_monthly.reset_index()
67
  df_monthly_24 = df_monthly[df_monthly['DATEPRD'] >= '2015-01-01']
@@ -71,7 +67,7 @@ def data_handler(query_job):
71
  data=df_monthly_24,
72
  x="DATEPRD", y="BORE_OIL_VOL", col="NPD_WELL_BORE_NAME", hue="NPD_WELL_BORE_NAME",
73
  kind="line", palette="crest", linewidth=4, zorder=5,
74
- col_wrap=2, height=3, aspect=1.5, legend=False,
75
  ).fig)
76
 
77
  return df_monthly_24
@@ -82,12 +78,12 @@ df_monthly_24 = data_handler(query_job)
82
  if st.button("Forecast"):
83
  # Create an empty dictionary to store dataframes
84
  well_dataframes = {}
85
-
86
  # Iterate through unique well names and filter the data
87
  for well_name in df_monthly_24['NPD_WELL_BORE_NAME'].unique():
88
  well_df = df_monthly_24[df_monthly_24['NPD_WELL_BORE_NAME'] == well_name]
89
  well_dataframes[well_name] = well_df
90
-
91
  # Initialize forecast variables
92
  t_forecast_dict = {}
93
  q_forecast_dict = {}
@@ -96,88 +92,80 @@ if st.button("Forecast"):
96
  # Iterate through unique well names and perform forecasting for each well
97
  for well_name, well_df in well_dataframes.items():
98
  st.write(f"Forecasting for Well: {well_name}")
99
-
100
  # Create a 't' array where t is DATEPRD
101
  t = well_df['DATEPRD'].values
102
-
103
  # Create a 'q' array where q is BORE_OIL_VOL
104
  q = well_df['BORE_OIL_VOL'].values
105
-
106
  # Subtract one datetime from another for 't'
107
  timedelta_t = [j - i for i, j in zip(t[:-1], t[1:])]
108
  timedelta_t = np.array(timedelta_t)
109
  timedelta_t = timedelta_t / np.timedelta64(1, 'D') # Convert timedelta to days
110
-
111
  # Take cumulative sum over timedeltas for 't'
112
  t = np.cumsum(timedelta_t)
113
  t = np.append(0, t)
114
  t = t.astype(float)
115
-
116
  # Normalize 't' and 'q' data
117
  t_normalized = t / max(t)
118
  q_normalized = q / max(q)
119
-
120
  # Function for exponential decline
121
  def exponential(t, qi, di):
122
  return qi * np.exp(-di * t)
123
-
124
  # Fit the exponential decline model to the normalized data
125
  popt, pcov = curve_fit(exponential, t_normalized, q_normalized)
126
  qi, di = popt
127
-
128
  # Check if di is <= 0.0, if so, skip this well
129
  if di <= 0.0:
130
  print(f'Skipping well {well_name} due to di <= 0.0')
131
  continue
132
-
133
  # De-normalize qi and di
134
  qi = qi * max(q)
135
  di = di / max(t)
136
-
137
- print(f'Well Name: {well_name}')
138
- print('Initial production rate:', np.round(qi, 3), 'BOPD')
139
- print('Initial decline rate:', np.round(di, 3), 'BBL OIL/D')
140
-
141
- def cumpro(q_forecast, qi, di):
142
- return (qi - q_forecast) / di
143
-
144
  # Initialize forecast variables
145
  t_forecast = []
146
  q_forecast = []
147
  Qp_forecast = []
148
-
149
  # Initial values
150
  t_current = 0
151
  q_current = exponential(t_current, qi, di)
 
 
 
 
 
152
  Qp_current = cumpro(q_current, qi, di)
153
-
154
  # Start forecasting until q_forecast reaches 25
155
  while q_current >= 25:
156
  t_forecast.append(t_current)
157
  q_forecast.append(q_current)
158
  Qp_forecast.append(Qp_current)
159
-
160
  # Increment time step
161
  t_current += 1
162
  q_current = exponential(t_current, qi, di)
163
  Qp_current = cumpro(q_current, qi, di)
164
-
165
  # Convert lists to numpy arrays for convenience
166
  t_forecast = np.array(t_forecast)
167
  q_forecast = np.array(q_forecast)
168
  Qp_forecast = np.array(Qp_forecast)
169
-
170
- # Print results
171
  st.write('Final Rate:', np.round(q_forecast[-1], 3), 'BOPD')
172
- st.write('Final Cumulative Production:', Qp_forecast[-1], 'BBL OIL')
173
- st.write()
174
-
175
- # Store forecasts in dictionaries
176
- t_forecast_dict[well_name] = t_forecast
177
- q_forecast_dict[well_name] = q_forecast
178
- Qp_forecast_dict[well_name] = Qp_forecast
179
-
180
- # Replace plt.show() with st.pyplot() to display the plots in Streamlit
181
  plt.figure(figsize=(15, 5))
182
  plt.subplot(1, 2, 1)
183
  plt.plot(t, q, '.', color='red', label='Production Data')
@@ -188,7 +176,7 @@ if st.button("Forecast"):
188
  plt.xlim(left=0)
189
  plt.ylim(bottom=0)
190
  plt.legend()
191
-
192
  plt.subplot(1, 2, 2)
193
  plt.plot(t_forecast, Qp_forecast)
194
  plt.title('OIL Cumulative Production Result of DCA', size=13, pad=15)
@@ -196,6 +184,6 @@ if st.button("Forecast"):
196
  plt.ylabel('Production (BBL OIL)')
197
  plt.xlim(left=0)
198
  plt.ylim(bottom=0)
199
-
200
- # Display the Matplotlib figure for this well using st.pyplot()
201
  st.pyplot()
 
5
  from google.cloud import bigquery
6
  from google.oauth2 import service_account
7
  import os
8
+ import json
9
  import matplotlib.pyplot as plt
10
  from scipy.optimize import curve_fit
11
 
12
+ json_key_path = "/Users/rianrachmanto/pypro/bigquery/intricate-idiom-379506-1454314d9d25.json"
13
+ with open(json_key_path) as f:
14
+ credentials_info = json.load(f)
15
 
 
 
 
 
16
  credentials = service_account.Credentials.from_service_account_info(credentials_info)
17
  client = bigquery.Client(credentials=credentials)
18
 
19
+
20
  QUERY = """
21
  SELECT
22
  DATEPRD,
 
31
  AND BORE_WAT_VOL IS NOT NULL
32
  ORDER BY
33
  NPD_WELL_BORE_NAME ASC, DATEPRD DESC;
34
+ """
35
 
36
+ # Run the query using the client
37
  query_job = client.query(QUERY)
38
 
39
  st.set_option('deprecation.showPyplotGlobalUse', False)
 
40
  st.title("DECLINE CURVE ANALYSIS (DCA)")
41
 
 
 
42
  # Create data handler function
43
  def data_handler(query_job):
44
  results = query_job.result()
45
  df = results.to_dataframe()
46
  st.write(df.head())
47
 
48
+ # Ensure df_fil is a copy to avoid SettingWithCopyWarning
49
+ df_fil = df[(df['BORE_OIL_VOL'] > 0) & (df['BORE_GAS_VOL'] > 0) & (df['BORE_WAT_VOL'] > 0)].copy()
50
+ df_fil.loc[:, 'DATEPRD'] = pd.to_datetime(df_fil['DATEPRD'])
51
+
52
  sns.set_theme(style="darkgrid")
53
  st.write(sns.relplot(
54
  data=df_fil,
55
  x="DATEPRD", y="BORE_OIL_VOL", col="NPD_WELL_BORE_NAME", hue="NPD_WELL_BORE_NAME",
56
  kind="line", palette="crest", linewidth=4, zorder=5,
57
+ col_wrap=2, height=3, aspect=1.5, legend=False
58
  ).fig)
59
 
60
+ # Create a dataframe for monthly average
 
 
61
  df_monthly = df_fil.groupby(['NPD_WELL_BORE_NAME', pd.Grouper(key='DATEPRD', freq='M')]).mean()
62
  df_monthly = df_monthly.reset_index()
63
  df_monthly_24 = df_monthly[df_monthly['DATEPRD'] >= '2015-01-01']
 
67
  data=df_monthly_24,
68
  x="DATEPRD", y="BORE_OIL_VOL", col="NPD_WELL_BORE_NAME", hue="NPD_WELL_BORE_NAME",
69
  kind="line", palette="crest", linewidth=4, zorder=5,
70
+ col_wrap=2, height=3, aspect=1.5, legend=False
71
  ).fig)
72
 
73
  return df_monthly_24
 
78
  if st.button("Forecast"):
79
  # Create an empty dictionary to store dataframes
80
  well_dataframes = {}
81
+
82
  # Iterate through unique well names and filter the data
83
  for well_name in df_monthly_24['NPD_WELL_BORE_NAME'].unique():
84
  well_df = df_monthly_24[df_monthly_24['NPD_WELL_BORE_NAME'] == well_name]
85
  well_dataframes[well_name] = well_df
86
+
87
  # Initialize forecast variables
88
  t_forecast_dict = {}
89
  q_forecast_dict = {}
 
92
  # Iterate through unique well names and perform forecasting for each well
93
  for well_name, well_df in well_dataframes.items():
94
  st.write(f"Forecasting for Well: {well_name}")
95
+
96
  # Create a 't' array where t is DATEPRD
97
  t = well_df['DATEPRD'].values
98
+
99
  # Create a 'q' array where q is BORE_OIL_VOL
100
  q = well_df['BORE_OIL_VOL'].values
101
+
102
  # Subtract one datetime from another for 't'
103
  timedelta_t = [j - i for i, j in zip(t[:-1], t[1:])]
104
  timedelta_t = np.array(timedelta_t)
105
  timedelta_t = timedelta_t / np.timedelta64(1, 'D') # Convert timedelta to days
106
+
107
  # Take cumulative sum over timedeltas for 't'
108
  t = np.cumsum(timedelta_t)
109
  t = np.append(0, t)
110
  t = t.astype(float)
111
+
112
  # Normalize 't' and 'q' data
113
  t_normalized = t / max(t)
114
  q_normalized = q / max(q)
115
+
116
  # Function for exponential decline
117
  def exponential(t, qi, di):
118
  return qi * np.exp(-di * t)
119
+
120
  # Fit the exponential decline model to the normalized data
121
  popt, pcov = curve_fit(exponential, t_normalized, q_normalized)
122
  qi, di = popt
123
+
124
  # Check if di is <= 0.0, if so, skip this well
125
  if di <= 0.0:
126
  print(f'Skipping well {well_name} due to di <= 0.0')
127
  continue
128
+
129
  # De-normalize qi and di
130
  qi = qi * max(q)
131
  di = di / max(t)
132
+
 
 
 
 
 
 
 
133
  # Initialize forecast variables
134
  t_forecast = []
135
  q_forecast = []
136
  Qp_forecast = []
137
+
138
  # Initial values
139
  t_current = 0
140
  q_current = exponential(t_current, qi, di)
141
+
142
+ # Function to calculate cumulative production
143
+ def cumpro(q_forecast, qi, di):
144
+ return (qi - q_forecast) / di
145
+
146
  Qp_current = cumpro(q_current, qi, di)
147
+
148
  # Start forecasting until q_forecast reaches 25
149
  while q_current >= 25:
150
  t_forecast.append(t_current)
151
  q_forecast.append(q_current)
152
  Qp_forecast.append(Qp_current)
153
+
154
  # Increment time step
155
  t_current += 1
156
  q_current = exponential(t_current, qi, di)
157
  Qp_current = cumpro(q_current, qi, di)
158
+
159
  # Convert lists to numpy arrays for convenience
160
  t_forecast = np.array(t_forecast)
161
  q_forecast = np.array(q_forecast)
162
  Qp_forecast = np.array(Qp_forecast)
163
+
164
+ # Display results in Streamlit
165
  st.write('Final Rate:', np.round(q_forecast[-1], 3), 'BOPD')
166
+ st.write('Final Cumulative Production:', np.round(Qp_forecast[-1], 2), 'BBL OIL')
167
+
168
+ # Plot the results using Matplotlib and display them in Streamlit
 
 
 
 
 
 
169
  plt.figure(figsize=(15, 5))
170
  plt.subplot(1, 2, 1)
171
  plt.plot(t, q, '.', color='red', label='Production Data')
 
176
  plt.xlim(left=0)
177
  plt.ylim(bottom=0)
178
  plt.legend()
179
+
180
  plt.subplot(1, 2, 2)
181
  plt.plot(t_forecast, Qp_forecast)
182
  plt.title('OIL Cumulative Production Result of DCA', size=13, pad=15)
 
184
  plt.ylabel('Production (BBL OIL)')
185
  plt.xlim(left=0)
186
  plt.ylim(bottom=0)
187
+
188
+ # Display the Matplotlib figure in Streamlit
189
  st.pyplot()
duckdcaapp.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from scipy.optimize import curve_fit
6
+ import duckdb
7
+ import seaborn as sns
8
+
9
+ st.title("DECLINE CURVE ANALYSIS (DCA)")
10
+
11
+ #make connection to database
12
+ con=duckdb.connect('/Users/rianrachmanto/pypro/database/trialduckdb/trial.db')
13
+ QUERY = """
14
+ SELECT
15
+ DATEPRD,
16
+ NPD_WELL_BORE_NAME,
17
+ BORE_OIL_VOL,
18
+ BORE_GAS_VOL,
19
+ BORE_WAT_VOL
20
+ FROM volve_well_test
21
+ WHERE
22
+ BORE_OIL_VOL IS NOT NULL
23
+ AND BORE_GAS_VOL IS NOT NULL
24
+ AND BORE_WAT_VOL IS NOT NULL
25
+ ORDER BY
26
+ NPD_WELL_BORE_NAME ASC, DATEPRD DESC
27
+ """
28
+ st.set_option('deprecation.showPyplotGlobalUse', False)
29
+
30
+ def data_handler(QUERY):
31
+ query_job=con.execute(QUERY)
32
+ df=con.sql(QUERY).df()
33
+ st.write(df.head())
34
+ df_fil = df[(df['BORE_OIL_VOL'] > 0) & (df['BORE_GAS_VOL'] > 0) & (df['BORE_WAT_VOL'] > 0)].copy()
35
+ df_fil.loc[:, 'DATEPRD'] = pd.to_datetime(df_fil['DATEPRD'])
36
+
37
+ sns.set_theme(style="darkgrid")
38
+ st.write(sns.relplot(
39
+ data=df_fil,
40
+ x="DATEPRD", y="BORE_OIL_VOL", col="NPD_WELL_BORE_NAME", hue="NPD_WELL_BORE_NAME",
41
+ kind="line", palette="crest", linewidth=4, zorder=5,
42
+ col_wrap=2, height=3, aspect=1.5, legend=False
43
+ ).fig)
44
+
45
+ # Create a dataframe for monthly average
46
+ df_monthly = df_fil.groupby(['NPD_WELL_BORE_NAME', pd.Grouper(key='DATEPRD', freq='M')]).mean()
47
+ df_monthly = df_monthly.reset_index()
48
+ df_monthly_24 = df_monthly[df_monthly['DATEPRD'] >= '2015-01-01']
49
+ st.title("Monthly Average")
50
+ sns.set_theme(style="darkgrid")
51
+ st.write(sns.relplot(
52
+ data=df_monthly_24,
53
+ x="DATEPRD", y="BORE_OIL_VOL", col="NPD_WELL_BORE_NAME", hue="NPD_WELL_BORE_NAME",
54
+ kind="line", palette="crest", linewidth=4, zorder=5,
55
+ col_wrap=2, height=3, aspect=1.5, legend=False
56
+ ).fig)
57
+
58
+ return df_monthly_24
59
+ df_monthly_24 = data_handler(QUERY)
60
+
61
+ # Add a "Forecast" button
62
+ if st.button("Forecast"):
63
+ # Create an empty dictionary to store dataframes
64
+ well_dataframes = {}
65
+
66
+ # Iterate through unique well names and filter the data
67
+ for well_name in df_monthly_24['NPD_WELL_BORE_NAME'].unique():
68
+ well_df = df_monthly_24[df_monthly_24['NPD_WELL_BORE_NAME'] == well_name]
69
+ well_dataframes[well_name] = well_df
70
+
71
+ # Initialize forecast variables
72
+ t_forecast_dict = {}
73
+ q_forecast_dict = {}
74
+ Qp_forecast_dict = {}
75
+
76
+ # Iterate through unique well names and perform forecasting for each well
77
+ for well_name, well_df in well_dataframes.items():
78
+ st.write(f"Forecasting for Well: {well_name}")
79
+
80
+ # Create a 't' array where t is DATEPRD
81
+ t = well_df['DATEPRD'].values
82
+
83
+ # Create a 'q' array where q is BORE_OIL_VOL
84
+ q = well_df['BORE_OIL_VOL'].values
85
+
86
+ # Subtract one datetime from another for 't'
87
+ timedelta_t = [j - i for i, j in zip(t[:-1], t[1:])]
88
+ timedelta_t = np.array(timedelta_t)
89
+ timedelta_t = timedelta_t / np.timedelta64(1, 'D') # Convert timedelta to days
90
+
91
+ # Take cumulative sum over timedeltas for 't'
92
+ t = np.cumsum(timedelta_t)
93
+ t = np.append(0, t)
94
+ t = t.astype(float)
95
+
96
+ # Normalize 't' and 'q' data
97
+ t_normalized = t / max(t)
98
+ q_normalized = q / max(q)
99
+
100
+ # Function for exponential decline
101
+ def exponential(t, qi, di):
102
+ return qi * np.exp(-di * t)
103
+
104
+ # Fit the exponential decline model to the normalized data
105
+ popt, pcov = curve_fit(exponential, t_normalized, q_normalized)
106
+ qi, di = popt
107
+
108
+ # Check if di is <= 0.0, if so, skip this well
109
+ if di <= 0.0:
110
+ print(f'Skipping well {well_name} due to di <= 0.0')
111
+ continue
112
+
113
+ # De-normalize qi and di
114
+ qi = qi * max(q)
115
+ di = di / max(t)
116
+
117
+ # Initialize forecast variables
118
+ t_forecast = []
119
+ q_forecast = []
120
+ Qp_forecast = []
121
+
122
+ # Initial values
123
+ t_current = 0
124
+ q_current = exponential(t_current, qi, di)
125
+
126
+ # Function to calculate cumulative production
127
+ def cumpro(q_forecast, qi, di):
128
+ return (qi - q_forecast) / di
129
+
130
+ Qp_current = cumpro(q_current, qi, di)
131
+
132
+ # Start forecasting until q_forecast reaches 25
133
+ while q_current >= 25:
134
+ t_forecast.append(t_current)
135
+ q_forecast.append(q_current)
136
+ Qp_forecast.append(Qp_current)
137
+
138
+ # Increment time step
139
+ t_current += 1
140
+ q_current = exponential(t_current, qi, di)
141
+ Qp_current = cumpro(q_current, qi, di)
142
+
143
+ # Convert lists to numpy arrays for convenience
144
+ t_forecast = np.array(t_forecast)
145
+ q_forecast = np.array(q_forecast)
146
+ Qp_forecast = np.array(Qp_forecast)
147
+
148
+ # Display results in Streamlit
149
+ st.write('Final Rate:', np.round(q_forecast[-1], 3), 'BOPD')
150
+ st.write('Final Cumulative Production:', np.round(Qp_forecast[-1], 2), 'BBL OIL')
151
+
152
+ # Plot the results using Matplotlib and display them in Streamlit
153
+ plt.figure(figsize=(15, 5))
154
+ plt.subplot(1, 2, 1)
155
+ plt.plot(t, q, '.', color='red', label='Production Data')
156
+ plt.plot(t_forecast, q_forecast, label='Forecast')
157
+ plt.title('Oil Production Rate Result of DCA', size=13, pad=15)
158
+ plt.xlabel('Days')
159
+ plt.ylabel('Rate (BBL OIL/d)')
160
+ plt.xlim(left=0)
161
+ plt.ylim(bottom=0)
162
+ plt.legend()
163
+
164
+ plt.subplot(1, 2, 2)
165
+ plt.plot(t_forecast, Qp_forecast)
166
+ plt.title('OIL Cumulative Production Result of DCA', size=13, pad=15)
167
+ plt.xlabel('Days')
168
+ plt.ylabel('Production (BBL OIL)')
169
+ plt.xlim(left=0)
170
+ plt.ylim(bottom=0)
171
+
172
+ # Display the Matplotlib figure in Streamlit
173
+ st.pyplot()