Source code for petropy.electrofacies

# -*- coding: utf-8 -*-
"""
Electrofacies is a model to calculate numerical facies from log data.
It uses sckit-learn for standardization and clustering.

"""
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import MiniBatchKMeans

[docs]def electrofacies(logs, formations, curves, n_clusters, log_scale = [],
                  n_components = 0.85, curve_name = 'FACIES'):
    """
    Electrofacies function to group intervals by rock type. Also
    referred to as heterogenous rock analysis.

    Parameters
    ----------
    logs : list of :class:`ptr.Log` objects
        List of Log objects
    formations: list of formation names
        List of strings containg formation names which should be
        previously loaded into Log objects
    curves : list of curve names
        List of strings containing curve names as inputs in the
        electrofacies calculations
    n_clusters : int
        Number of clusters to group intervals. Number of electrofacies.
    log_scale : list of curve names
        List of string containing curve names which are preprocessed
        on a log scale. For example, deep resistivity separates better
        on a log scale, and is graph logarithmically when viewing data
        in a log viewer.
    n_components : int, float, None or string (default 0.85)
        Number of principal components to keep. If value is less than
        one, the number of principal components be the number required
        to exceed the explained variance.
    curve_name : str (default 'FACIES')
        Name of the new electrofacies curve.

    Returns
    -------
    list
        list of :class:`petropy.Log` objects

    Examples
    --------
    >>> # loads sample Wolfcamp calculates electrofacies for that well
    >>> import petropy as ptr
    # reads sample Wolfcamp Log from las file
    >>> log = ptr.log_data('WFMP')
    >>> logs = [log]
    >>> f = ['WFMPA', 'WFMPB', 'WFMPC']
    >>> c = ['GR_N', 'RESDEEP_N', 'RHOB_N', 'NPHI_N', 'PE_N']
    >>> scale = ['RESDEEP_N']
    >>> logs = ptr.electrofacies(logs, f, c, 8, log_scale = scale)

    >>> import petropy as ptr
    # loads logs from a list of paths and
    # calculates electrofacies across the wells
    #
    # defin file_paths for las files to analyze
    >>> file_paths = ['path/to/log1.las', 'path/to/log2.las',
    ... 'path/to/log3.las', 'path/to/log4.las']
    # create list of Log objects
    >>> logs = [ptr.Log(x) for x in file_paths]
    # define csv with tops for all wells
    >>> tops_csv = 'path/to/tops.csv'
    # add formation tops to wells
    >>> for log in logs:
    ...     log.tops_from_csv(tops_csv)
    # define list of formation tops. If single formation, f = ['FORM']
    >>> f = ['FORM1', 'FORM2']
    # list of curves to use for classification
    >>> c = ['GR_N', 'RESDEEP_N', 'RHOB_N', 'NPHI_N', 'PE_N']
    >>> scale = ['RESDEEP_N']
    # run electrofacies across logs in list
    >>> logs = electrofacies(logs, f, c, 8, log_scale = scale)
    # save las in renamed file
    >>> for i, log in enumerate(logs):
    ...     new_file_name = file_paths[i].split('.')[0]+'_with_HRA.las'
    ...     log.write(new_file_name)

    """

    df = pd.DataFrame()

    for log in logs:

        if log.well['UWI'] is None:
            raise ValueError('UWI required for log identification.')

        log_df = log.df()
        log_df['UWI'] = log.well['UWI'].value
        log_df['DEPTH_INDEX'] = np.arange(0, len(log[0]))

        for formation in formations:
            top = log.tops[formation]
            bottom = log.next_formation_depth(formation)
            depth_index = np.intersect1d(np.where(log[0] >= top)[0],
                                         np.where(log[0] < bottom)[0])
            df = df.append(log_df.iloc[depth_index])

    for s in log_scale:
        df[s] = np.log(df[s])

    not_null_rows = pd.notnull(df[curves]).any(axis = 1)

    X = StandardScaler().fit_transform(df.loc[not_null_rows, curves])

    pc = PCA(n_components = n_components).fit(X)

    components = pd.DataFrame(data = pc.transform(X),
                              index = df[not_null_rows].index)

    minibatch_input = components.as_matrix()

    components.columns = \
                   ['PC%i' % x for x in range(1, pc.n_components_ + 1)]

    components['UWI'] = df.loc[not_null_rows, 'UWI']
    components['DEPTH_INDEX'] = df.loc[not_null_rows, 'DEPTH_INDEX']

    size = len(components) // 20
    if size > 10000:
        size = 10000
    elif size < 100:
        size = 100

    df.loc[not_null_rows, curve_name] = \
                        MiniBatchKMeans(n_clusters = n_clusters,
                        batch_size = size).fit_predict(minibatch_input)
    df.loc[not_null_rows, curve_name] += 1

    for log in logs:

        uwi = log.well['UWI'].value

        for v, vector in enumerate(pc.components_):
            v += 1
            pc_curve = 'PC%i' % v

            ### add eigenvector data to header ###

            if pc_curve in log.keys():
                data = log[pc_curve]
                depth_index = components.loc[components.UWI == uwi,
                                             'DEPTH_INDEX']
                data[depth_index] = \
                          np.copy(components.loc[components.UWI == uwi,
                                                pc_curve])
            else:
                data = np.empty(len(log[0]))
                data[:] = np.nan
                depth_index = components.loc[components.UWI == uwi,
                                             'DEPTH_INDEX']
                data[depth_index] = \
                          np.copy(components.loc[components.UWI == uwi,
                          pc_curve])

                log.add_curve(pc_curve, np.copy(data),
                descr = 'Pricipal Component %i from electrofacies' % v)

        if curve_name in log.keys():
            data = log[curve_name]
            depth_index = df.loc[df.UWI == uwi, 'DEPTH_INDEX']
            data[depth_index] = df.loc[df.UWI == uwi, curve_name]
        else:
            data = np.empty(len(log[0]))
            data[:] = np.nan
            depth_index = df.loc[df.UWI == uwi, 'DEPTH_INDEX']

            data[depth_index] = \
                             np.copy(df.loc[df.UWI == uwi, curve_name])

            log.add_curve(curve_name, np.copy(data),
                          descr = 'Electrofacies')

    return logs
Source code for petropy.electrofacies

PetroPy

Navigation