import pickle
import sys
from os.path import exists, join
from typing import Tuple, Union, List
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from twistpy.polarization.model import PolarizationModel6C
from twistpy.convenience import get_project_root
[docs]class SupportVectorMachine:
"""Support vector machine for wave type classification based on six component polarization analysis.
Used to train and classify wave types via 6-C polarization analysis. This class merely exists for convenience.
The core functionality of this class inherits from :obj:`sklearn.svm.SVC`.
Parameters
----------
name : :obj:`str`
Name of the support vector machine
"""
def __init__(self, name: str) -> None:
self.name: str = name
self.C: float = None
self.kernel: str = None
self.free_surface: bool = None
self.N: int = None
self.scaling_veloctiy: float = None
self.vp: Tuple[float, float] = None
self.vp_to_vs: Tuple[float, float] = None
self.vl: Tuple[float, float] = None
self.vr: Tuple[float, float] = None
self.phi: Tuple[float, float] = None
self.theta: Tuple[float, float] = None
self.xi: Tuple[float, float] = None
self.plot_confusion_matrix: bool = None
self.scaling_velocity: float = None
if not self.name:
raise Exception("Please specify a name for this SupportVectorMachine!")
[docs] def train(
self,
wave_types: List[str] = ["R", "L", "P", "SV", "SH", "Noise"],
N: int = 5000,
scaling_velocity: float = 1.0,
vp: Tuple[float, float] = (50.0, 2000.0),
vp_to_vs: Tuple[float, float] = (1.7, 2.4),
vl: Tuple[float, float] = (50, 2000),
vr: Tuple[float, float] = (50, 2000),
phi: Tuple[float, float] = (0, 360),
theta: Tuple[float, float] = (0, 90),
xi: Tuple[float, float] = (-90, 90),
free_surface: bool = True,
C: float = 10.0,
kernel: str = "rbf",
gamma: Union[str, float] = "scale",
plot_confusion_matrix: bool = True,
) -> None:
"""Train support vector machine with random polarization models from the specified parameter range.
Parameters
----------
wave_types : :obj:`list` of :obj:`str`, default=['R', 'L', 'P', 'SV', 'SH', 'Noise']
List of wave-types that are used for training.
| 'P': P-wave
| 'SV': SV-wave
| 'SH': SH-wave
| 'L': Love-wave
| 'R': Rayleigh-wave
N : :obj:`int`, default=5000
Number of randomly generated polarization models for each wave type. Thereof, 80% are
used for training and the remaining 20% are used for testing to evaluate the performance of the classifier.
scaling_velocity : :obj:`float`, default=1.
Scaling velocity (in m/s) that was applied to the translational components of the real data.
vp : :obj:`tuple`
P-wave velocity range (in m/s) from which the random parametrization of the training set of
polarization vectors is drawn as (vp_min, vp_max).
vp_to_vs : :obj:`tuple`
Range of P-to-S wave velocity ratios from which the random parametrization of the training set of
polarization vectors is drawn (vp_to_vs_min, vp_to_vs_max).
vl : :obj:`tuple`
Range of Love wave velocities in m/s.
vr : :obj:`tuple`
Range of Rayleigh wave velocities in m/s.
phi : :obj:`tuple`
Azimuth angle range in degrees.
theta : :obj:`tuple`
Inclination angle range in degrees.
xi : :obj:`tuple`
Rayleigh wave ellipticity angle range in degrees.
free_surface : :obj:`bool`, default=True
Specifies whether free-surface polarization models apply
C : :obj:`float`, default=10.0
Regularization parameter for the support vector machine. See :obj:`sklearn.svm.SVC`.
kernel : :obj:`str` or *callable*, default='rbf'
Kernel type used for the support vector machine. Defaults to a radial basis function kernel.
See :obj:`sklearn.svm.SVC`.
gamma : :obj:`str` or float, default='scale'
Kernel coefficient for the radial basis function kernel. See :obj:`sklearn.svm.SVC`.
plot_confusion_matrix : :obj:`bool`, default=True
Specify whether a confusion matrix will be plotted after training
"""
# Initial sanity checks
wtypes_implemented = ["R", "L", "P", "SV", "SH", "Noise"]
for w_type in wave_types:
assert w_type in wtypes_implemented, (
f"Invalid wave type specified: {w_type}! Must be in "
f"[{wtypes_implemented}]"
)
# Set class attributes
self.C, self.kernel, self.free_surface = C, kernel, free_surface
self.N, self.scaling_velocity = N, scaling_velocity
self.vp, self.vp_to_vs, self.vl, self.vr = vp, vp_to_vs, vl, vr
self.phi, self.theta, self.xi = phi, theta, xi
self.plot_confusion_matrix = plot_confusion_matrix
pkl_filename = join(
get_project_root(), "twistpy", "SVC_models", self.name + ".pkl"
)
if exists(pkl_filename):
print(
f"A trained model already exists with this name and is saved at '{pkl_filename}'"
)
print(
"Nothing will be done! Please delete the file above if you want to re-train this model."
)
return
df = pd.DataFrame(
index=np.arange(0, len(wave_types) * N),
columns=[
"t1_real",
"t2_real",
"t3_real",
"r1_real",
"r2_real",
"r3_real",
"t1_imag",
"t2_imag",
"t3_imag",
"r1_imag",
"r2_imag",
"r3_imag",
"wave_type",
],
)
phi = np.random.uniform(phi[0], phi[1], (N, 1))
theta = np.random.uniform(theta[0], theta[1], (N, 1))
xi = np.random.uniform(xi[0], xi[1], (N, 1))
vr = np.random.uniform(vr[0], vr[1], (N, 1))
vl = np.random.uniform(vl[0], vl[1], (N, 1))
vp_to_vs = np.random.uniform(vp_to_vs[0], vp_to_vs[1], (N, 1))
vp = np.random.uniform(vp[0], vp[1], (N, 1))
print("Generating random polarization models for training! \n")
# Generate random P-wave polarization models drawn from a uniform distribution
# Generate random Rayleigh-wave polarization models drawn from a uniform distribution
columns = [
"t1_real",
"t2_real",
"t3_real",
"r1_real",
"r2_real",
"r3_real",
"t1_imag",
"t2_imag",
"t3_imag",
"r1_imag",
"r2_imag",
"r3_imag",
]
for it, w_type in enumerate(wave_types):
if w_type == "Noise":
u1_real: np.ndarray = np.random.normal(0.0, 1, size=(6, N))
u1_imag: np.ndarray = np.random.normal(0.0, 1, size=(6, N))
u1: np.ndarray = u1_real + 1j * u1_imag
u1: np.ndarray = u1 / np.linalg.norm(u1, axis=0)
u1: np.ndarray = np.random.choice([-1, 1]) * u1
gamma_samson = np.arctan2(
2 * np.einsum("ij,ij->j", u1.real, u1.imag, optimize=True),
np.einsum("ij,ij->j", u1.real, u1.real, optimize=True)
- np.einsum("ij,ij->j", u1.imag, u1.imag, optimize=True),
)
phi1 = -0.5 * gamma_samson
pol_noise = np.exp(1j * phi1) * u1
df.loc[it * N : (it + 1) * N - 1, "wave_type"] = w_type
for idx, column in enumerate(columns):
if idx < 6:
df.loc[it * N : (it + 1) * N - 1, column] = pol_noise[
idx, :
].real.tolist()
else:
df.loc[it * N : (it + 1) * N - 1, column] = pol_noise[
idx - 6, :
].imag.tolist()
else:
# Generate random 6C wave polarization models drawn from a uniform distribution
pm = PolarizationModel6C(
wave_type=w_type,
vr=vr,
vp=vp,
vs=vp / vp_to_vs,
phi=phi,
xi=xi,
theta=theta,
vl=vl,
scaling_velocity=scaling_velocity,
free_surface=free_surface,
)
# Direction of polarization can either be positive or negative
pol = np.random.choice([-1, 1], (1, N)) * pm.polarization_vector
df.loc[it * N : (it + 1) * N - 1, "wave_type"] = w_type
for idx, column in enumerate(columns):
if idx < 6:
df.loc[it * N : (it + 1) * N - 1, column] = pol[
idx, :
].real.tolist()
else:
df.loc[it * N : (it + 1) * N - 1, column] = pol[
idx - 6, :
].imag.tolist()
print("Training Support Vector Machine!")
df = df.dropna()
X = df.drop(["wave_type"], axis="columns")
y = df.wave_type
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)
model = SVC(kernel=kernel, C=C, gamma=gamma)
model.fit(Xtrain, ytrain)
with open(pkl_filename, "wb") as file:
pickle.dump(model, file)
print(
f"Training successfully completed. Model score on independent test data is '{model.score(Xtest, ytest)}'!"
)
print(f"Model has been saved as '{pkl_filename}'!'")
if plot_confusion_matrix:
from sklearn.metrics import ConfusionMatrixDisplay
ConfusionMatrixDisplay.from_predictions(
ytest,
model.predict(Xtest),
labels=model.classes_,
cmap="binary",
normalize="true",
)
plt.show()
[docs] def load_model(self) -> SVC:
"""
Loads a previously trained support vector machine from disk.
Returns
-------
model : :obj:`~sklearn.svm.SVC`
Scikit-learn SVC object.
"""
file_name = join(sys.path[0], "SVC_models", self.name + ".pkl")
if not exists(file_name):
raise Exception(f"The model '{self.name}' has not been trained yet")
else:
with open(file_name, "rb") as file:
model = pickle.load(file)
return model