# -*- coding: utf-8 -*-
"""
Electrofacies is a model to calculate numerical facies from log data.
It uses sckit-learn for standardization and clustering.
"""
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import MiniBatchKMeans
[docs]def electrofacies(logs, formations, curves, n_clusters, log_scale = [],
n_components = 0.85, curve_name = 'FACIES'):
"""
Electrofacies function to group intervals by rock type. Also
referred to as heterogenous rock analysis.
Parameters
----------
logs : list of :class:`ptr.Log` objects
List of Log objects
formations: list of formation names
List of strings containg formation names which should be
previously loaded into Log objects
curves : list of curve names
List of strings containing curve names as inputs in the
electrofacies calculations
n_clusters : int
Number of clusters to group intervals. Number of electrofacies.
log_scale : list of curve names
List of string containing curve names which are preprocessed
on a log scale. For example, deep resistivity separates better
on a log scale, and is graph logarithmically when viewing data
in a log viewer.
n_components : int, float, None or string (default 0.85)
Number of principal components to keep. If value is less than
one, the number of principal components be the number required
to exceed the explained variance.
curve_name : str (default 'FACIES')
Name of the new electrofacies curve.
Returns
-------
list
list of :class:`petropy.Log` objects
Examples
--------
>>> # loads sample Wolfcamp calculates electrofacies for that well
>>> import petropy as ptr
# reads sample Wolfcamp Log from las file
>>> log = ptr.log_data('WFMP')
>>> logs = [log]
>>> f = ['WFMPA', 'WFMPB', 'WFMPC']
>>> c = ['GR_N', 'RESDEEP_N', 'RHOB_N', 'NPHI_N', 'PE_N']
>>> scale = ['RESDEEP_N']
>>> logs = ptr.electrofacies(logs, f, c, 8, log_scale = scale)
>>> import petropy as ptr
# loads logs from a list of paths and
# calculates electrofacies across the wells
#
# defin file_paths for las files to analyze
>>> file_paths = ['path/to/log1.las', 'path/to/log2.las',
... 'path/to/log3.las', 'path/to/log4.las']
# create list of Log objects
>>> logs = [ptr.Log(x) for x in file_paths]
# define csv with tops for all wells
>>> tops_csv = 'path/to/tops.csv'
# add formation tops to wells
>>> for log in logs:
... log.tops_from_csv(tops_csv)
# define list of formation tops. If single formation, f = ['FORM']
>>> f = ['FORM1', 'FORM2']
# list of curves to use for classification
>>> c = ['GR_N', 'RESDEEP_N', 'RHOB_N', 'NPHI_N', 'PE_N']
>>> scale = ['RESDEEP_N']
# run electrofacies across logs in list
>>> logs = electrofacies(logs, f, c, 8, log_scale = scale)
# save las in renamed file
>>> for i, log in enumerate(logs):
... new_file_name = file_paths[i].split('.')[0]+'_with_HRA.las'
... log.write(new_file_name)
"""
df = pd.DataFrame()
for log in logs:
if log.well['UWI'] is None:
raise ValueError('UWI required for log identification.')
log_df = log.df()
log_df['UWI'] = log.well['UWI'].value
log_df['DEPTH_INDEX'] = np.arange(0, len(log[0]))
for formation in formations:
top = log.tops[formation]
bottom = log.next_formation_depth(formation)
depth_index = np.intersect1d(np.where(log[0] >= top)[0],
np.where(log[0] < bottom)[0])
df = df.append(log_df.iloc[depth_index])
for s in log_scale:
df[s] = np.log(df[s])
not_null_rows = pd.notnull(df[curves]).any(axis = 1)
X = StandardScaler().fit_transform(df.loc[not_null_rows, curves])
pc = PCA(n_components = n_components).fit(X)
components = pd.DataFrame(data = pc.transform(X),
index = df[not_null_rows].index)
minibatch_input = components.as_matrix()
components.columns = \
['PC%i' % x for x in range(1, pc.n_components_ + 1)]
components['UWI'] = df.loc[not_null_rows, 'UWI']
components['DEPTH_INDEX'] = df.loc[not_null_rows, 'DEPTH_INDEX']
size = len(components) // 20
if size > 10000:
size = 10000
elif size < 100:
size = 100
df.loc[not_null_rows, curve_name] = \
MiniBatchKMeans(n_clusters = n_clusters,
batch_size = size).fit_predict(minibatch_input)
df.loc[not_null_rows, curve_name] += 1
for log in logs:
uwi = log.well['UWI'].value
for v, vector in enumerate(pc.components_):
v += 1
pc_curve = 'PC%i' % v
### add eigenvector data to header ###
if pc_curve in log.keys():
data = log[pc_curve]
depth_index = components.loc[components.UWI == uwi,
'DEPTH_INDEX']
data[depth_index] = \
np.copy(components.loc[components.UWI == uwi,
pc_curve])
else:
data = np.empty(len(log[0]))
data[:] = np.nan
depth_index = components.loc[components.UWI == uwi,
'DEPTH_INDEX']
data[depth_index] = \
np.copy(components.loc[components.UWI == uwi,
pc_curve])
log.add_curve(pc_curve, np.copy(data),
descr = 'Pricipal Component %i from electrofacies' % v)
if curve_name in log.keys():
data = log[curve_name]
depth_index = df.loc[df.UWI == uwi, 'DEPTH_INDEX']
data[depth_index] = df.loc[df.UWI == uwi, curve_name]
else:
data = np.empty(len(log[0]))
data[:] = np.nan
depth_index = df.loc[df.UWI == uwi, 'DEPTH_INDEX']
data[depth_index] = \
np.copy(df.loc[df.UWI == uwi, curve_name])
log.add_curve(curve_name, np.copy(data),
descr = 'Electrofacies')
return logs