Source code for SeisMonitor.monitor.associator.ai

import numpy as np
import pandas as pd
import json
import math
import datetime as dt
import pyproj
import os
from . import utils as ut
from SeisMonitor.utils import isfile
from gamma.utils import association
from obspy.core.inventory.inventory import Inventory, read_inventory
from tqdm import tqdm
from obspy import UTCDateTime
from obspy.core.event.event import Event
from obspy.core.event import ResourceIdentifier, Catalog
from obspy.core.event.base import CreationInfo
from obspy.core.event.origin import Pick
from obspy.core.event.base import (
    QuantityError,
    WaveformStreamID,
    CreationInfo,
    Comment
)
from obspy.core.event.origin import Origin, OriginQuality, Arrival


[docs] class GaMMAObj: """GaMMA object for seismic event association and processing. Parameters ---------- region : list List of [lon_min, lon_max, lat_min, lat_max, z_min, z_max]. z is depth in km. epsg_proj : str EPSG projection code. use_dbscan : bool, default=True Whether to use DBSCAN clustering. use_amplitude : bool, default=True Whether to use amplitude in processing. dbscan_eps : float, default=10.0 DBSCAN epsilon parameter. dbscan_min_samples : int, default=3 DBSCAN minimum samples parameter. vel : dict, default={"p": 7.0, "s": 7.0 / 1.75} Average velocity model with P and S wave velocities. method : str, default="BGMM" Association method. oversample_factor : int, default=20 Oversampling factor for processing. min_picks_per_eq : int, default=5 Minimum picks per earthquake. max_sigma11 : float, default=2.0 Maximum sigma for x-x uncertainty. max_sigma22 : float, default=1.0 Maximum sigma for y-y uncertainty. max_sigma12 : float, default=1.0 Maximum sigma for x-y uncertainty. calculate_amp : bool, default=True Whether to calculate amplitudes. p_window : int, default=10 P-wave window length. s_window : int, default=5 S-wave window length. waterlevel : int, default=10 Waterlevel for amplitude calculation. """ def __init__( self, region, epsg_proj, use_dbscan=True, use_amplitude=True, dbscan_eps=10.0, dbscan_min_samples=3, vel={"p": 7.0, "s": 7.0 / 1.75}, method="BGMM", oversample_factor=20, min_picks_per_eq=5, max_sigma11=2.0, max_sigma22=1.0, max_sigma12=1.0, calculate_amp=True, p_window=10, s_window=5, waterlevel=10 ): """Initialize GaMMAObj with configuration parameters.""" self.lon_lims = region[0:2] self.lat_lims = region[2:4] self.z_lims = region[4:] self.epsg_proj = epsg_proj self.use_dbscan = use_dbscan self.use_amplitude = use_amplitude self.vel = vel self.dbscan_eps = dbscan_eps self.dbscan_min_samples = dbscan_min_samples self.method = method self.oversample_factor = oversample_factor self.min_picks_per_eq = min_picks_per_eq self.max_sigma11 = max_sigma11 self.max_sigma22 = max_sigma22 self.max_sigma12 = max_sigma12 self.dims = ["x(km)", "y(km)", "z(km)"] self.calculate_amp = calculate_amp self.p_window = p_window self.s_window = s_window self.waterlevel = waterlevel self.config = self._get_config() self.response = None self.name = "GaMMA"
[docs] def add_response(self, response): """Add response inventory to the GaMMA object. Args: response (Inventory or str): Either an Inventory object or path to inventory file """ if isinstance(response, Inventory): self.response = response else: self.response = read_inventory(response)
def _get_config(self): """Generate configuration dictionary with transformed coordinates. Returns: dict: Configuration dictionary with projected coordinates """ config = self.__dict__ in_proj = pyproj.Proj("EPSG:4326") out_proj = pyproj.Proj(self.epsg_proj) y_min, x_min = pyproj.transform(in_proj, out_proj, self.lat_lims[0], self.lon_lims[0]) y_max, x_max = pyproj.transform(in_proj, out_proj, self.lat_lims[1], self.lon_lims[1]) config["x(km)"] = np.array([x_min, x_max]) / 1e3 config["y(km)"] = np.array([y_min, y_max]) / 1e3 config["z(km)"] = np.array(self.z_lims) config["bfgs_bounds"] = ( (config["x(km)"][0] - 1, config["x(km)"][1] + 1), # x (config["y(km)"][0] - 1, config["y(km)"][1] + 1), # y (0, config["z(km)"][1] + 1), # z (None, None), # t ) return config @property def stations(self): """Get station information with transformed coordinates. Returns: pandas.DataFrame: Station information with projected coordinates Raises: Exception: If response inventory hasn't been added """ if self.response is None: raise Exception("You must add response file to the GaMMA Object") stations = ut.get_stations_gamma_df(self.response) in_proj = pyproj.Proj("EPSG:4326") out_proj = pyproj.Proj(self.epsg_proj) stations["y(km)"] = stations.apply( lambda x: pyproj.transform(in_proj, out_proj, x["latitude"], x["longitude"])[0] / 1e3, axis=1 ) stations["x(km)"] = stations.apply( lambda x: pyproj.transform(in_proj, out_proj, x["latitude"], x["longitude"])[1] / 1e3, axis=1 ) stations["z(km)"] = stations["elevation(m)"] / -1e3 stations = stations.drop_duplicates(ignore_index=True) return stations
[docs] def get_gamma_picks(event_picks): """Convert event picks to Obspy Pick objects. Args: event_picks (pandas.DataFrame): DataFrame containing pick information Returns: list: List of Obspy Pick objects """ pick_list = [] for i, row in event_picks.iterrows(): loc = row.location str_id = ".".join((str(row.network), str(row.station), str(loc), row.instrument_type + "Z")) comment = { 'probability': row.prob, 'GaMMA_probability': row.prob_gamma } if row.author == "EQTransformer": comment.update({ 'snr': row.snr, 'detection_probability': row.detection_probability, 'event_start_time': row.event_start_time.strftime("%Y-%m-%d %H:%M:%S.%f"), 'event_end_time': row.event_end_time.strftime("%Y-%m-%d %H:%M:%S.%f") }) pick_obj = Pick( resource_id=ResourceIdentifier(id=row.pick_id, prefix="pick"), time=UTCDateTime(row.timestamp), time_errors=QuantityError( uncertainty=20/100, confidence_level=row.prob*100 ), waveform_id=WaveformStreamID( network_code=row.network, station_code=row.station, location_code=loc, channel_code=row.instrument_type + "Z", resource_uri=ResourceIdentifier(id=str_id), seed_string=str_id ), phase_hint=row["type"], evaluation_mode='automatic', creation_info=CreationInfo( author=row.author, creation_time=UTCDateTime.now() ), method_id=row.author, comments=[Comment(text=json.dumps(comment))] ) pick_list.append(pick_obj) return pick_list
[docs] def picks2arrivals(picks): """Convert picks to arrivals. Args: picks (list): List of Pick objects Returns: list: List of Arrival objects """ arrivals = [] for pick in picks: arrival = Arrival( resource_id=ResourceIdentifier(id=pick.resource_id.id, prefix='arrival'), pick_id=pick.resource_id, phase=pick.phase_hint, time_weight=pick.time_errors.confidence_level/100, creation_info=pick.creation_info ) arrivals.append(arrival) return arrivals
[docs] def get_gamma_origin(catalog_info, event_picks, in_proj="EPSG:3116", out_proj="EPSG:4326"): """Create an Origin object from catalog information and picks. Args: catalog_info (pandas.Series): Catalog information for the event event_picks (list): List of Pick objects in_proj (str): Input projection EPSG code out_proj (str): Output projection EPSG code Returns: Origin: Obspy Origin object """ y, x = catalog_info["y(km)"]*1e3, catalog_info["x(km)"]*1e3 in_proj = pyproj.Proj(in_proj) out_proj = pyproj.Proj(out_proj) lat, lon = pyproj.transform(in_proj, out_proj, y, x) origin = Origin( resource_id=ResourceIdentifier( id=UTCDateTime(catalog_info.time).strftime("%Y%m%d.%H%M%S.%f"), prefix='origin' ), time=UTCDateTime(catalog_info.time), time_errors=QuantityError(uncertainty=catalog_info.sigma_time), longitude=lon, longitude_errors=QuantityError(), latitude=lat, latitude_errors=QuantityError(), depth=catalog_info["z(km)"]*1e3, depth_errors=QuantityError(), method_id=ResourceIdentifier(id="GaMMA"), arrivals=picks2arrivals(event_picks), quality=OriginQuality(associated_phase_count=len(event_picks)), evaluation_status="preliminary", evaluation_mode="automatic", creation_info=CreationInfo( author="SeisMonitor", creation_time=UTCDateTime.now() ) ) return origin
[docs] def get_gamma_catalog(picks_df, catalog_df, in_proj, out_proj): """Create a Catalog object from picks and catalog dataframes. Args: picks_df (pandas.DataFrame): DataFrame containing pick information catalog_df (pandas.DataFrame): DataFrame containing catalog information in_proj (str): Input projection EPSG code out_proj (str): Output projection EPSG code Returns: Catalog: Obspy Catalog object """ events = [] for i, row in catalog_df.iterrows(): picks = picks_df[picks_df["event_idx"] == i] picks = get_gamma_picks(picks) origin = get_gamma_origin(row, picks, in_proj, out_proj) ev = Event( resource_id=ResourceIdentifier(id=origin.resource_id.id, prefix='event'), event_type="earthquake", event_type_certainty="known", picks=picks, amplitudes=[], focal_mechanisms=[], origins=[origin], magnitudes=[], station_magnitudes=[], creation_info=CreationInfo( author="SeisMonitor", creation_time=UTCDateTime.now() ) ) ev.preferred_origin_id = origin.resource_id.id events.append(ev) catalog = Catalog( events=events, resource_id=ResourceIdentifier(prefix='catalog'), creation_info=CreationInfo( author="SeisMonitor", creation_time=UTCDateTime.now() ) ) return catalog
[docs] class GaMMA: """Main GaMMA class for seismic event association. Args: gamma_obj (GaMMAObj): GaMMAObj instance with configuration """ def __init__(self, gamma_obj): """Initialize GaMMA with a GaMMAObj.""" self.gamma_obj = gamma_obj
[docs] def associate(self, picks_csv, xml_path, out_dir): """Associate picks and create catalog. Args: picks_csv (str): Path to picks CSV file xml_path (str): Path to station XML file out_dir (str): Output directory path Returns: tuple: (Catalog, pandas.DataFrame, pandas.DataFrame) containing catalog, catalog DataFrame, and picks DataFrame """ self.picks_csv = picks_csv self.xml_path = xml_path self.response = read_inventory(xml_path) self.out_dir = out_dir self.xml_out_file = os.path.join(out_dir, "associations.xml") self.catalog_out_file = os.path.join(out_dir, "catalog.csv") self.picks_out_file = os.path.join(out_dir, "picks.csv") picks_df = ut.get_picks_gamma_df( self.picks_csv, self.response, compute_amplitudes=self.gamma_obj.calculate_amp, p_window=self.gamma_obj.p_window, s_window=self.gamma_obj.s_window, waterlevel=self.gamma_obj.waterlevel ) stations = list(set(picks_df["station"].to_list())) stations = [str(val) for val in stations] self.gamma_obj.add_response(self.response) station_df = self.gamma_obj.stations station_df = station_df[station_df["station_name"].isin(stations)] station_df = station_df.reset_index(drop=True) config = self.gamma_obj.__dict__ pbar = tqdm(1) meta = station_df.merge(picks_df["id"], how="right", on="id") catalogs, assignments = association( picks_df, station_df, config, method=self.gamma_obj.method, pbar=pbar ) if not catalogs: return Catalog(), pd.DataFrame(), pd.DataFrame() catalog = pd.DataFrame(catalogs) catalog.index = catalog.index + 1 # important to fix event index with the same index in assignments catalog["time"] = pd.to_datetime(catalog["time"], format="%Y-%m-%dT%H:%M:%S.%f") catalog["event_idx"] = catalog.index # catalog["event_index"] = catalog["time"].apply(lambda x: x) # catalog["event_idx"] = catalog.index assignments = pd.DataFrame( assignments, columns=["pick_index", "event_idx", "prob_gamma"] ) assignments = assignments.set_index("pick_index") picks = pd.merge( picks_df, assignments, left_index=True, right_index=True, how='right' ) isfile(self.catalog_out_file) catalog.to_csv(self.catalog_out_file, index=False) isfile(self.picks_out_file) picks.to_csv(self.picks_out_file) #saving pick index obspy_catalog = get_gamma_catalog( picks, catalog, self.gamma_obj.epsg_proj, "EPSG:4326" ) isfile(self.xml_out_file) obspy_catalog.write(self.xml_out_file, format="SC3ML") return obspy_catalog, catalog, picks
if __name__ == "__main__": csv = "/home/emmanuel/EDCT/SeisMonitor/SeisMonitor/monitor/associator/test.csv" cat_csv = "/home/emmanuel/EDCT/SeisMonitor/SeisMonitor/monitor/associator/cat_test.csv" df = pd.read_csv(csv) df["timestamp"] = pd.to_datetime(df["timestamp"]) cat_df = pd.read_csv(cat_csv) cat_df["time"] = pd.to_datetime(cat_df["time"]) get_gamma_catalog(df, cat_df)