[1]:

import numpy as np
import matplotlib.pyplot as plt
import h5py
import corner
import os
import matplotlib.lines as mlines
from matplotlib.font_manager import FontProperties
plt.rcParams['font.size'] = 20

notebook_direc = os.getcwd()
samples_direc = notebook_direc + "/../../MCMC_FM_Data"
run_direc = samples_direc + "/PE_samples"

FM_direc = samples_direc + "/fisher_matrices"

[2]:

def load_samples_h5(file_path):
    with h5py.File(file_path, "r") as f:
        # Load parameter labels
        param_labels = [label.decode('utf-8') if isinstance(label, bytes) else label
                        for label in f["param_labels"][()]]

        # Load true values
        true_vals = f["true_values"][()]

        # Load samples into a dictionary
        samples_dict = {}
        for label in param_labels:
            samples_dict[label] = f[f"samples/{label}"][()]

    return samples_dict, param_labels, true_vals

[3]:

# Identify path of samples for case_1 in FEW paper
file_path_case_1 = run_direc + "/Case_1_EMRI_prograde" + '/prograde_EMRI_case_1_1eneg5.h5'

# Extract processed samples for case_1, parameter labels and the true parameters
burned_samples_dict_case_1, param_labels, true_vals_EMRI_case_1 =  load_samples_h5(file_path_case_1)

# Extract as list
burned_samples_EMRI_case_1 = list(burned_samples_dict_case_1.values())

N_params = len(param_labels)

# Extract mean and 1sigma deviations of processed MCMC chains
std_vecs = [np.std(burned_samples_EMRI_case_1[j]) for j in range(N_params)]
mean_vecs = np.array([np.mean(burned_samples_EMRI_case_1[j]) for j in range(N_params)])

# Extrat Fisher matrix computation
fname = FM_direc + "/case_1/Fisher_case_1.h5"
with h5py.File(fname, "r") as f:
    FM = f["Fisher"][:,:]

# Compute approximation to parameter covariance matrix
Cov_Matrix = np.linalg.inv(FM)

# Now generate samples from the approximated Fisher matrix
FM_approx_samples = np.random.multivariate_normal(mean_vecs, Cov_Matrix, len(burned_samples_EMRI_case_1[0]))

Posterior variance comparison

[4]:

# Quantitative Goodness of Fit Analysis
# Compare Fisher Matrix approximation vs MCMC samples

import warnings
warnings.filterwarnings('ignore')

print("=" * 70)
print("QUANTITATIVE GOODNESS OF FIT ANALYSIS")
print("=" * 70)

# 1. Parameter-wise comparison
print("\n1. PARAMETER-WISE COMPARISON:")
print("-" * 80)
print(f"{'Parameter':<18} {'MCMC σ':<18} {'FM σ':<18} {'σ Ratio':<15}")
print("-" * 80)

mcmc_stds = []
fm_stds = []

for i, param in enumerate(param_labels):
    mcmc_mean = np.mean(burned_samples_EMRI_case_1[i])
    mcmc_std = np.std(burned_samples_EMRI_case_1[i])
    fm_mean = np.mean(FM_approx_samples[:, i])
    fm_std = np.std(FM_approx_samples[:, i])

    mcmc_stds.append(mcmc_std)
    fm_stds.append(fm_std)

    std_ratio = fm_std / mcmc_std if mcmc_std != 0 else np.inf

    print(f"{param:<18} {mcmc_std:<18.4e} {fm_std:<18.4e} {std_ratio:<15.3f}")

mcmc_stds = np.array(mcmc_stds)
fm_stds = np.array(fm_stds)

======================================================================
QUANTITATIVE GOODNESS OF FIT ANALYSIS
======================================================================

1. PARAMETER-WISE COMPARISON:
--------------------------------------------------------------------------------
Parameter          MCMC σ             FM σ               σ Ratio
--------------------------------------------------------------------------------
$M/M_{\odot}$      8.3227e-01         8.1852e-01         0.983
$\mu/M_{\odot}$    1.1399e-05         1.0981e-05         0.963
$a$                2.3269e-07         2.2174e-07         0.953
$p_{0}$            3.1193e-06         3.0713e-06         0.985
$e_{0}$            1.5803e-07         1.5276e-07         0.967
$D_{s}/Gpc$        8.4940e-02         8.3070e-02         0.978
$\theta_{S}$       2.8403e-03         2.8054e-03         0.988
$\phi_{S}$         2.7745e-03         2.7006e-03         0.973
$\theta_{K}$       2.3351e-02         2.2953e-02         0.983
$\phi_{K}$         2.2755e-02         2.2373e-02         0.983
$\Phi_{\phi_{0}}$  6.8029e-02         6.6177e-02         0.973
$\Phi_{r_{0}}$     1.5101e-02         1.5250e-02         1.010

Corner plot

[5]:

# Build corner kwargs
corner_kwargs = dict(plot_datapoints=False,smooth1d=True,
                       labels=param_labels, levels=(1 - np.exp(-0.5), 1 - np.exp(-2), 1 - np.exp(-9 / 2.)),
                       label_kwargs=dict(fontsize=40), max_n_ticks=4,
                       show_titles=False, smooth = True, labelpad = 0.4)

# Reshape for corner plot
samples_corner_1eneg5 = np.column_stack(burned_samples_EMRI_case_1)

# plot a corner plot of the 1 and 2D marginalised distributions from MCMC analysis
figure = corner.corner(samples_corner_1eneg5,bins = 30, color = 'blue', **corner_kwargs)

# plot a corner plot of the 1 and 2D marginalised distributions approximated via FM
corner.corner(FM_approx_samples, fig = figure, bins = 30, color = 'red', **corner_kwargs)

axes = np.array(figure.axes).reshape((N_params, N_params))

# True parameters for 1D marginals
for i in range(N_params):
    ax = axes[i, i]
    ax.axvline(true_vals_EMRI_case_1[i], color="k")

# True parameters for 2D marginals
for yi in range(N_params):
    for xi in range(yi):
        ax = axes[yi, xi]
        ax.axhline(true_vals_EMRI_case_1[yi], color="k")
        ax.axvline(true_vals_EMRI_case_1[xi],color= "k")
        ax.plot(true_vals_EMRI_case_1[xi], true_vals_EMRI_case_1[yi], "sk")

for ax in figure.get_axes():
    ax.tick_params(axis='both', labelsize=18)
# For legend -- show true params, MCMC, FM samples
blue_line = mlines.Line2D([], [], color='blue', label=r'MCMC')
red_line = mlines.Line2D([], [], color='red', label=r'Fisher Matrix')
black_line = mlines.Line2D([], [], color='black', label='True Value')

# Plot legend
plt.legend(handles=[blue_line, red_line, black_line], fontsize = 65, frameon = True, bbox_to_anchor=(0.25, N_params), loc="upper right", title = "FEW v2: Case 1", title_fontproperties = FontProperties(size = 70, weight = 'bold'))
plt.subplots_adjust(left=-0.1, bottom=-0.1, right=None, top=None, wspace=0.15, hspace=0.15)

../_images/validation_check_fisher_against_mcmc_executed_6_0.png

KL, KS and JS divergence checks

[6]:

from scipy.stats import entropy, gaussian_kde
from scipy.spatial.distance import jensenshannon

# 2. KL Divergence and Jensen-Shannon Divergence Analysis
print("\n2. DIVERGENCE ANALYSIS:")
print("-" * 70)

kl_divergences = []
js_divergences = []
ks_statistics = []

# For each parameter, compute divergences
for i, param in enumerate(param_labels):
    mcmc_samples = burned_samples_EMRI_case_1[i]
    fm_samples = FM_approx_samples[:, i]

    # Create probability density estimates using KDE
    try:
        # Use KDE to estimate probability densities
        mcmc_kde = gaussian_kde(mcmc_samples)
        fm_kde = gaussian_kde(fm_samples)

        # Create common evaluation points
        min_val = min(np.min(mcmc_samples), np.min(fm_samples))
        max_val = max(np.max(mcmc_samples), np.max(fm_samples))
        eval_points = np.linspace(min_val, max_val, 1000)

        # Evaluate PDFs
        mcmc_pdf = mcmc_kde(eval_points)
        fm_pdf = fm_kde(eval_points)

        # Normalize to ensure they sum to 1 (for discrete approximation)
        mcmc_pdf = mcmc_pdf / np.sum(mcmc_pdf)
        fm_pdf = fm_pdf / np.sum(fm_pdf)

        # Add small epsilon to avoid log(0)
        epsilon = 1e-10
        mcmc_pdf = mcmc_pdf + epsilon
        fm_pdf = fm_pdf + epsilon

        # Calculate KL divergence: D_KL(MCMC || FM)
        kl_div = entropy(mcmc_pdf, fm_pdf)
        kl_divergences.append(kl_div)

        # Calculate Jensen-Shannon divergence (symmetric)
        js_div = jensenshannon(mcmc_pdf, fm_pdf)
        js_divergences.append(js_div)

        # Kolmogorov-Smirnov test
        from scipy.stats import ks_2samp
        ks_stat, ks_pvalue = ks_2samp(mcmc_samples, fm_samples)
        ks_statistics.append(ks_stat)

        print(f"{param:<20} KL: {kl_div:<10.4f} JS: {js_div:<10.4f} KS: {ks_stat:<10.4f}")

    except Exception as e:
        print(f"{param:<20} Error in divergence calculation: {str(e)[:30]}...")
        kl_divergences.append(np.nan)
        js_divergences.append(np.nan)
        ks_statistics.append(np.nan)

# Summary statistics
print(f"\nDIVERGENCE SUMMARY:")
print(f"Mean KL divergence:  {np.nanmean(kl_divergences):.4f}")
print(f"Mean JS divergence:  {np.nanmean(js_divergences):.4f}")
print(f"Mean KS statistic:   {np.nanmean(ks_statistics):.4f}")


2. DIVERGENCE ANALYSIS:
----------------------------------------------------------------------
$M/M_{\odot}$        KL: 0.0045     JS: 0.0333     KS: 0.0231
$\mu/M_{\odot}$      KL: 0.0034     JS: 0.0285     KS: 0.0179
$a$                  KL: 0.0038     JS: 0.0293     KS: 0.0174
$p_{0}$              KL: 0.0047     JS: 0.0335     KS: 0.0173
$e_{0}$              KL: 0.0030     JS: 0.0271     KS: 0.0154
$D_{s}/Gpc$          KL: 0.0072     JS: 0.0411     KS: 0.0185
$\theta_{S}$         KL: 0.0022     JS: 0.0223     KS: 0.0115
$\phi_{S}$           KL: 0.0048     JS: 0.0342     KS: 0.0256
$\theta_{K}$         KL: 0.0038     JS: 0.0292     KS: 0.0185
$\phi_{K}$           KL: 0.0084     JS: 0.0450     KS: 0.0291
$\Phi_{\phi_{0}}$    KL: 0.0063     JS: 0.0369     KS: 0.0189
$\Phi_{r_{0}}$       KL: 0.0044     JS: 0.0334     KS: 0.0222

DIVERGENCE SUMMARY:
Mean KL divergence:  0.0047
Mean JS divergence:  0.0328
Mean KS statistic:   0.0196