From 4578621c7590b79874357fc1f0c4e24332c11b55 Mon Sep 17 00:00:00 2001
From: Jason <83615043+JJassonn69@users.noreply.github.com>
Date: Mon, 13 Apr 2026 20:36:28 +0545
Subject: [PATCH] fix: restore T20-stripped print() calls in cosim scripts; add
 60 mem validation tests

- Restored print() output in 6 generator/cosim scripts that ruff T20
  had silently stripped, leaving dead 'for _var: pass' stubs and
  orphaned expressions. Files restored from pre-ruff commit and
  re-linted with T20/ERA/ARG/E501 per-file-ignores.
- Removed 5 dead/self-blessing scripts (compare.py, compare_doppler.py,
  compare_mf.py, validate_mem_files.py, LUT.py).
- Added test_mem_validation.py: 60 pytest tests validating .mem files
  against independently-derived ground truth (twiddle factors, chirp
  waveforms, memory addressing, segment padding).
- Updated CI cross-layer-tests job to include test_mem_validation.py.
- All 150 tests pass (61 GUI + 29 cross-layer + 60 mem validation).
---
 .github/workflows/ci-tests.yml                |   1 +
 8_Utils/Python/LUT.py                         |  24 -
 9_Firmware/9_2_FPGA/tb/cosim/compare.py       | 449 --------------
 .../9_2_FPGA/tb/cosim/compare_doppler.py      | 340 -----------
 9_Firmware/9_2_FPGA/tb/cosim/compare_mf.py    | 330 ----------
 9_Firmware/9_2_FPGA/tb/cosim/gen_chirp_mem.py |  51 +-
 .../9_2_FPGA/tb/cosim/gen_doppler_golden.py   |  38 +-
 .../9_2_FPGA/tb/cosim/gen_mf_cosim_golden.py  |  24 +-
 9_Firmware/9_2_FPGA/tb/cosim/radar_scene.py   |  39 +-
 .../tb/cosim/real_data/golden_reference.py    | 178 ++++--
 .../9_2_FPGA/tb/cosim/validate_mem_files.py   | 569 ------------------
 9_Firmware/9_2_FPGA/tb/gen_mf_golden_ref.py   |  25 +-
 .../tests/cross_layer/test_mem_validation.py  | 444 ++++++++++++++
 pyproject.toml                                |   4 +-
 14 files changed, 744 insertions(+), 1772 deletions(-)
 delete mode 100644 8_Utils/Python/LUT.py
 delete mode 100644 9_Firmware/9_2_FPGA/tb/cosim/compare.py
 delete mode 100644 9_Firmware/9_2_FPGA/tb/cosim/compare_doppler.py
 delete mode 100644 9_Firmware/9_2_FPGA/tb/cosim/compare_mf.py
 delete mode 100644 9_Firmware/9_2_FPGA/tb/cosim/validate_mem_files.py
 create mode 100644 9_Firmware/tests/cross_layer/test_mem_validation.py

diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index 7b39a9e..06172a7 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -111,4 +111,5 @@ jobs:
         run: >
           uv run pytest
           9_Firmware/tests/cross_layer/test_cross_layer_contract.py
+          9_Firmware/tests/cross_layer/test_mem_validation.py
           -v --tb=short
diff --git a/8_Utils/Python/LUT.py b/8_Utils/Python/LUT.py
deleted file mode 100644
index 56a4cb1..0000000
--- a/8_Utils/Python/LUT.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import numpy as np
-
-# Define parameters
-fs = 120e6  # Sampling frequency
-Ts = 1 / fs  # Sampling time
-Tb = 1e-6  # Burst time
-Tau = 30e-6  # Pulse repetition time
-fmax = 15e6  # Maximum frequency on ramp
-fmin = 1e6  # Minimum frequency on ramp
-
-# Compute number of samples per ramp
-n = int(Tb / Ts)
-N = np.arange(0, n, 1)
-
-# Compute instantaneous phase
-theta_n = 2 * np.pi * ((N**2 * Ts**2 * (fmax - fmin) / (2 * Tb)) + fmin * N * Ts)
-
-# Generate waveform and scale it to 8-bit unsigned values (0 to 255)
-y = 1 + np.sin(theta_n)  # Normalize from 0 to 2
-y_scaled = np.round(y * 127.5).astype(int)  # Scale to 8-bit range (0-255)
-
-# Print values in Verilog-friendly format
-for _i in range(n):
-    pass
diff --git a/9_Firmware/9_2_FPGA/tb/cosim/compare.py b/9_Firmware/9_2_FPGA/tb/cosim/compare.py
deleted file mode 100644
index 429c1cf..0000000
--- a/9_Firmware/9_2_FPGA/tb/cosim/compare.py
+++ /dev/null
@@ -1,449 +0,0 @@
-#!/usr/bin/env python3
-"""
-Co-simulation Comparison: RTL vs Python Model for AERIS-10 DDC Chain.
-
-Reads the ADC hex test vectors, runs them through the bit-accurate Python
-model (fpga_model.py), then compares the output against the RTL simulation
-CSV (from tb_ddc_cosim.v).
-
-Key considerations:
-  - The RTL DDC has LFSR phase dithering on the NCO FTW, so exact bit-match
-    is not expected. We use statistical metrics (correlation, RMS error).
-  - The CDC (gray-coded 400→100 MHz crossing) may introduce non-deterministic
-    latency offsets. We auto-align using cross-correlation.
-  - The comparison reports pass/fail based on configurable thresholds.
-
-Usage:
-    python3 compare.py [scenario]
-
-    scenario: dc, single_target, multi_target, noise_only, sine_1mhz
-              (default: dc)
-
-Author: Phase 0.5 co-simulation suite for PLFM_RADAR
-"""
-
-import math
-import os
-import sys
-
-# Add this directory to path for imports
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-from fpga_model import SignalChain
-
-
-# =============================================================================
-# Configuration
-# =============================================================================
-
-# Thresholds for pass/fail
-# These are generous because of LFSR dithering and CDC latency jitter
-MAX_RMS_ERROR_LSB = 50.0     # Max RMS error in 18-bit LSBs
-MIN_CORRELATION = 0.90        # Min Pearson correlation coefficient
-MAX_LATENCY_DRIFT = 15        # Max latency offset between RTL and model (samples)
-MAX_COUNT_DIFF = 20           # Max output count difference (LFSR dithering affects CIC timing)
-
-# Scenarios
-SCENARIOS = {
-    'dc': {
-        'adc_hex': 'adc_dc.hex',
-        'rtl_csv': 'rtl_bb_dc.csv',
-        'description': 'DC input (ADC=128)',
-        # DC input: expect small outputs, but LFSR dithering adds ~+128 LSB
-        # average bias to NCO FTW which accumulates through CIC integrators
-        # as a small DC offset (~15-20 LSB in baseband). This is expected.
-        'max_rms': 25.0,        # Relaxed to account for LFSR dithering bias
-        'min_corr': -1.0,       # Correlation not meaningful for near-zero
-    },
-    'single_target': {
-        'adc_hex': 'adc_single_target.hex',
-        'rtl_csv': 'rtl_bb_single_target.csv',
-        'description': 'Single target at 500m',
-        'max_rms': MAX_RMS_ERROR_LSB,
-        'min_corr': -1.0,       # Correlation not meaningful with LFSR dithering
-    },
-    'multi_target': {
-        'adc_hex': 'adc_multi_target.hex',
-        'rtl_csv': 'rtl_bb_multi_target.csv',
-        'description': 'Multi-target (5 targets)',
-        'max_rms': MAX_RMS_ERROR_LSB,
-        'min_corr': -1.0,       # Correlation not meaningful with LFSR dithering
-    },
-    'noise_only': {
-        'adc_hex': 'adc_noise_only.hex',
-        'rtl_csv': 'rtl_bb_noise_only.csv',
-        'description': 'Noise only',
-        'max_rms': MAX_RMS_ERROR_LSB,
-        'min_corr': -1.0,       # Correlation not meaningful with LFSR dithering
-    },
-    'sine_1mhz': {
-        'adc_hex': 'adc_sine_1mhz.hex',
-        'rtl_csv': 'rtl_bb_sine_1mhz.csv',
-        'description': '1 MHz sine wave',
-        'max_rms': MAX_RMS_ERROR_LSB,
-        'min_corr': -1.0,       # Correlation not meaningful with LFSR dithering
-    },
-}
-
-
-# =============================================================================
-# Helper functions
-# =============================================================================
-
-def load_adc_hex(filepath):
-    """Load 8-bit unsigned ADC samples from hex file."""
-    samples = []
-    with open(filepath) as f:
-        for line in f:
-            line = line.strip()
-            if not line or line.startswith('//'):
-                continue
-            samples.append(int(line, 16))
-    return samples
-
-
-def load_rtl_csv(filepath):
-    """Load RTL baseband output CSV (sample_idx, baseband_i, baseband_q)."""
-    bb_i = []
-    bb_q = []
-    with open(filepath) as f:
-        f.readline()  # Skip header
-        for line in f:
-            line = line.strip()
-            if not line:
-                continue
-            parts = line.split(',')
-            bb_i.append(int(parts[1]))
-            bb_q.append(int(parts[2]))
-    return bb_i, bb_q
-
-
-def run_python_model(adc_samples):
-    """Run ADC samples through the Python DDC model.
-
-    Returns the 18-bit FIR outputs (not the 16-bit DDC interface outputs),
-    because the RTL testbench captures the FIR output directly
-    (baseband_i_reg <= fir_i_out in ddc_400m.v).
-    """
-
-    chain = SignalChain()
-    result = chain.process_adc_block(adc_samples)
-
-    # Use fir_i_raw / fir_q_raw (18-bit) to match RTL's baseband output
-    # which is the FIR output before DDC interface 18->16 rounding
-    bb_i = result['fir_i_raw']
-    bb_q = result['fir_q_raw']
-
-    return bb_i, bb_q
-
-
-def compute_rms_error(a, b):
-    """Compute RMS error between two equal-length lists."""
-    if len(a) != len(b):
-        raise ValueError(f"Length mismatch: {len(a)} vs {len(b)}")
-    if len(a) == 0:
-        return 0.0
-    sum_sq = sum((x - y) ** 2 for x, y in zip(a, b, strict=False))
-    return math.sqrt(sum_sq / len(a))
-
-
-def compute_max_abs_error(a, b):
-    """Compute maximum absolute error between two equal-length lists."""
-    if len(a) != len(b) or len(a) == 0:
-        return 0
-    return max(abs(x - y) for x, y in zip(a, b, strict=False))
-
-
-def compute_correlation(a, b):
-    """Compute Pearson correlation coefficient."""
-    n = len(a)
-    if n < 2:
-        return 0.0
-
-    mean_a = sum(a) / n
-    mean_b = sum(b) / n
-
-    cov = sum((a[i] - mean_a) * (b[i] - mean_b) for i in range(n))
-    std_a_sq = sum((x - mean_a) ** 2 for x in a)
-    std_b_sq = sum((x - mean_b) ** 2 for x in b)
-
-    if std_a_sq < 1e-10 or std_b_sq < 1e-10:
-        # Near-zero variance (e.g., DC input)
-        return 1.0 if abs(mean_a - mean_b) < 1.0 else 0.0
-
-    return cov / math.sqrt(std_a_sq * std_b_sq)
-
-
-def cross_correlate_lag(a, b, max_lag=20):
-    """
-    Find the lag that maximizes cross-correlation between a and b.
-    Returns (best_lag, best_correlation) where positive lag means b is delayed.
-    """
-    n = min(len(a), len(b))
-    if n < 10:
-        return 0, 0.0
-
-    best_lag = 0
-    best_corr = -2.0
-
-    for lag in range(-max_lag, max_lag + 1):
-        # Align: a[start_a:end_a] vs b[start_b:end_b]
-        if lag >= 0:
-            start_a = lag
-            start_b = 0
-        else:
-            start_a = 0
-            start_b = -lag
-
-        end = min(len(a) - start_a, len(b) - start_b)
-        if end < 10:
-            continue
-
-        seg_a = a[start_a:start_a + end]
-        seg_b = b[start_b:start_b + end]
-
-        corr = compute_correlation(seg_a, seg_b)
-        if corr > best_corr:
-            best_corr = corr
-            best_lag = lag
-
-    return best_lag, best_corr
-
-
-def compute_signal_stats(samples):
-    """Compute basic statistics of a signal."""
-    if not samples:
-        return {'mean': 0, 'rms': 0, 'min': 0, 'max': 0, 'count': 0}
-    n = len(samples)
-    mean = sum(samples) / n
-    rms = math.sqrt(sum(x * x for x in samples) / n)
-    return {
-        'mean': mean,
-        'rms': rms,
-        'min': min(samples),
-        'max': max(samples),
-        'count': n,
-    }
-
-
-# =============================================================================
-# Main comparison
-# =============================================================================
-
-def compare_scenario(scenario_name):
-    """Run comparison for one scenario. Returns True if passed."""
-    if scenario_name not in SCENARIOS:
-        return False
-
-    cfg = SCENARIOS[scenario_name]
-    base_dir = os.path.dirname(os.path.abspath(__file__))
-
-
-    # ---- Load ADC data ----
-    adc_path = os.path.join(base_dir, cfg['adc_hex'])
-    if not os.path.exists(adc_path):
-        return False
-    adc_samples = load_adc_hex(adc_path)
-
-    # ---- Load RTL output ----
-    rtl_path = os.path.join(base_dir, cfg['rtl_csv'])
-    if not os.path.exists(rtl_path):
-        return False
-    rtl_i, rtl_q = load_rtl_csv(rtl_path)
-
-    # ---- Run Python model ----
-    py_i, py_q = run_python_model(adc_samples)
-
-    # ---- Length comparison ----
-    len_diff = abs(len(rtl_i) - len(py_i))
-
-    # ---- Signal statistics ----
-    rtl_i_stats = compute_signal_stats(rtl_i)
-    rtl_q_stats = compute_signal_stats(rtl_q)
-    py_i_stats = compute_signal_stats(py_i)
-    py_q_stats = compute_signal_stats(py_q)
-
-
-    # ---- Trim to common length ----
-    common_len = min(len(rtl_i), len(py_i))
-    if common_len < 10:
-        return False
-
-    rtl_i_trim = rtl_i[:common_len]
-    rtl_q_trim = rtl_q[:common_len]
-    py_i_trim = py_i[:common_len]
-    py_q_trim = py_q[:common_len]
-
-    # ---- Cross-correlation to find latency offset ----
-    lag_i, _corr_i = cross_correlate_lag(rtl_i_trim, py_i_trim,
-                                        max_lag=MAX_LATENCY_DRIFT)
-    lag_q, _corr_q = cross_correlate_lag(rtl_q_trim, py_q_trim,
-                                        max_lag=MAX_LATENCY_DRIFT)
-
-    # ---- Apply latency correction ----
-    best_lag = lag_i  # Use I-channel lag (should be same as Q)
-    if abs(lag_i - lag_q) > 1:
-        # Use the average
-        best_lag = (lag_i + lag_q) // 2
-
-    if best_lag > 0:
-        # RTL is delayed relative to Python
-        aligned_rtl_i = rtl_i_trim[best_lag:]
-        aligned_rtl_q = rtl_q_trim[best_lag:]
-        aligned_py_i = py_i_trim[:len(aligned_rtl_i)]
-        aligned_py_q = py_q_trim[:len(aligned_rtl_q)]
-    elif best_lag < 0:
-        # Python is delayed relative to RTL
-        aligned_py_i = py_i_trim[-best_lag:]
-        aligned_py_q = py_q_trim[-best_lag:]
-        aligned_rtl_i = rtl_i_trim[:len(aligned_py_i)]
-        aligned_rtl_q = rtl_q_trim[:len(aligned_py_q)]
-    else:
-        aligned_rtl_i = rtl_i_trim
-        aligned_rtl_q = rtl_q_trim
-        aligned_py_i = py_i_trim
-        aligned_py_q = py_q_trim
-
-    aligned_len = min(len(aligned_rtl_i), len(aligned_py_i))
-    aligned_rtl_i = aligned_rtl_i[:aligned_len]
-    aligned_rtl_q = aligned_rtl_q[:aligned_len]
-    aligned_py_i = aligned_py_i[:aligned_len]
-    aligned_py_q = aligned_py_q[:aligned_len]
-
-
-    # ---- Error metrics (after alignment) ----
-    rms_i = compute_rms_error(aligned_rtl_i, aligned_py_i)
-    rms_q = compute_rms_error(aligned_rtl_q, aligned_py_q)
-    compute_max_abs_error(aligned_rtl_i, aligned_py_i)
-    compute_max_abs_error(aligned_rtl_q, aligned_py_q)
-    corr_i_aligned = compute_correlation(aligned_rtl_i, aligned_py_i)
-    corr_q_aligned = compute_correlation(aligned_rtl_q, aligned_py_q)
-
-
-    # ---- First/last sample comparison ----
-    for k in range(min(10, aligned_len)):
-        ei = aligned_rtl_i[k] - aligned_py_i[k]
-        eq = aligned_rtl_q[k] - aligned_py_q[k]
-
-    # ---- Write detailed comparison CSV ----
-    compare_csv_path = os.path.join(base_dir, f"compare_{scenario_name}.csv")
-    with open(compare_csv_path, 'w') as f:
-        f.write("idx,rtl_i,py_i,err_i,rtl_q,py_q,err_q\n")
-        for k in range(aligned_len):
-            ei = aligned_rtl_i[k] - aligned_py_i[k]
-            eq = aligned_rtl_q[k] - aligned_py_q[k]
-            f.write(f"{k},{aligned_rtl_i[k]},{aligned_py_i[k]},{ei},"
-                    f"{aligned_rtl_q[k]},{aligned_py_q[k]},{eq}\n")
-
-    # ---- Pass/Fail ----
-    max_rms = cfg.get('max_rms', MAX_RMS_ERROR_LSB)
-    min_corr = cfg.get('min_corr', MIN_CORRELATION)
-
-    results = []
-
-    # Check 1: Output count sanity
-    count_ok = len_diff <= MAX_COUNT_DIFF
-    results.append(('Output count match', count_ok,
-                     f"diff={len_diff} <= {MAX_COUNT_DIFF}"))
-
-    # Check 2: RMS amplitude ratio (RTL vs Python should have same power)
-    # The LFSR dithering randomizes sample phases but preserves overall
-    # signal power, so RMS amplitudes should match within ~10%.
-    rtl_rms = max(rtl_i_stats['rms'], rtl_q_stats['rms'])
-    py_rms = max(py_i_stats['rms'], py_q_stats['rms'])
-    if py_rms > 1.0 and rtl_rms > 1.0:
-        rms_ratio = max(rtl_rms, py_rms) / min(rtl_rms, py_rms)
-        rms_ratio_ok = rms_ratio <= 1.20  # Within 20%
-        results.append(('RMS amplitude ratio', rms_ratio_ok,
-                         f"ratio={rms_ratio:.3f} <= 1.20"))
-    else:
-        # Near-zero signals (DC input): check absolute RMS error
-        rms_ok = max(rms_i, rms_q) <= max_rms
-        results.append(('RMS error (low signal)', rms_ok,
-                         f"max(I={rms_i:.2f}, Q={rms_q:.2f}) <= {max_rms:.1f}"))
-
-    # Check 3: Mean DC offset match
-    # Both should have similar DC bias. For large signals (where LFSR dithering
-    # causes the NCO to walk in phase), allow the mean to differ proportionally
-    # to the signal RMS. Use max(30 LSB, 3% of signal RMS).
-    mean_err_i = abs(rtl_i_stats['mean'] - py_i_stats['mean'])
-    mean_err_q = abs(rtl_q_stats['mean'] - py_q_stats['mean'])
-    max_mean_err = max(mean_err_i, mean_err_q)
-    signal_rms = max(rtl_rms, py_rms)
-    mean_threshold = max(30.0, signal_rms * 0.03)  # 3% of signal RMS or 30 LSB
-    mean_ok = max_mean_err <= mean_threshold
-    results.append(('Mean DC offset match', mean_ok,
-                     f"max_diff={max_mean_err:.1f} <= {mean_threshold:.1f}"))
-
-    # Check 4: Correlation (skip for near-zero signals or dithered scenarios)
-    if min_corr > -0.5:
-        corr_ok = min(corr_i_aligned, corr_q_aligned) >= min_corr
-        results.append(('Correlation', corr_ok,
-                         f"min(I={corr_i_aligned:.4f}, Q={corr_q_aligned:.4f}) >= {min_corr:.2f}"))
-
-    # Check 5: Dynamic range match
-    # Peak amplitudes should be in the same ballpark
-    rtl_peak = max(abs(rtl_i_stats['min']), abs(rtl_i_stats['max']),
-                   abs(rtl_q_stats['min']), abs(rtl_q_stats['max']))
-    py_peak = max(abs(py_i_stats['min']), abs(py_i_stats['max']),
-                  abs(py_q_stats['min']), abs(py_q_stats['max']))
-    if py_peak > 10 and rtl_peak > 10:
-        peak_ratio = max(rtl_peak, py_peak) / min(rtl_peak, py_peak)
-        peak_ok = peak_ratio <= 1.50  # Within 50%
-        results.append(('Peak amplitude ratio', peak_ok,
-                         f"ratio={peak_ratio:.3f} <= 1.50"))
-
-    # Check 6: Latency offset
-    lag_ok = abs(best_lag) <= MAX_LATENCY_DRIFT
-    results.append(('Latency offset', lag_ok,
-                     f"|{best_lag}| <= {MAX_LATENCY_DRIFT}"))
-
-    # ---- Report ----
-    all_pass = True
-    for _name, ok, _detail in results:
-        if not ok:
-            all_pass = False
-
-    if all_pass:
-        pass
-    else:
-        pass
-
-    return all_pass
-
-
-def main():
-    """Run comparison for specified scenario(s)."""
-    if len(sys.argv) > 1:
-        scenario = sys.argv[1]
-        if scenario == 'all':
-            # Run all scenarios that have RTL CSV files
-            base_dir = os.path.dirname(os.path.abspath(__file__))
-            overall_pass = True
-            run_count = 0
-            pass_count = 0
-            for name, cfg in SCENARIOS.items():
-                rtl_path = os.path.join(base_dir, cfg['rtl_csv'])
-                if os.path.exists(rtl_path):
-                    ok = compare_scenario(name)
-                    run_count += 1
-                    if ok:
-                        pass_count += 1
-                    else:
-                        overall_pass = False
-                else:
-                    pass
-
-            if overall_pass:
-                pass
-            else:
-                pass
-            return 0 if overall_pass else 1
-        ok = compare_scenario(scenario)
-        return 0 if ok else 1
-    ok = compare_scenario('dc')
-    return 0 if ok else 1
-
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/9_Firmware/9_2_FPGA/tb/cosim/compare_doppler.py b/9_Firmware/9_2_FPGA/tb/cosim/compare_doppler.py
deleted file mode 100644
index 56e0969..0000000
--- a/9_Firmware/9_2_FPGA/tb/cosim/compare_doppler.py
+++ /dev/null
@@ -1,340 +0,0 @@
-#!/usr/bin/env python3
-"""
-Co-simulation Comparison: RTL vs Python Model for AERIS-10 Doppler Processor.
-
-Compares the RTL Doppler output (from tb_doppler_cosim.v) against the Python
-model golden reference (from gen_doppler_golden.py).
-
-After fixing the windowing pipeline bugs in doppler_processor.v (BRAM address
-alignment and pipeline staging), the RTL achieves BIT-PERFECT match with the
-Python model.  The comparison checks:
-  1. Per-range-bin peak Doppler bin agreement (100% required)
-  2. Per-range-bin I/Q correlation (1.0 expected)
-  3. Per-range-bin magnitude spectrum correlation (1.0 expected)
-  4. Global output energy (exact match expected)
-
-Usage:
-    python3 compare_doppler.py [scenario|all]
-
-    scenario: stationary, moving, two_targets (default: stationary)
-    all: run all scenarios
-
-Author: Phase 0.5 Doppler co-simulation suite for PLFM_RADAR
-"""
-
-import math
-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-
-# =============================================================================
-# Configuration
-# =============================================================================
-
-DOPPLER_FFT = 32
-RANGE_BINS = 64
-TOTAL_OUTPUTS = RANGE_BINS * DOPPLER_FFT  # 2048
-SUBFRAME_SIZE = 16
-
-SCENARIOS = {
-    'stationary': {
-        'golden_csv': 'doppler_golden_py_stationary.csv',
-        'rtl_csv': 'rtl_doppler_stationary.csv',
-        'description': 'Single stationary target at ~500m',
-    },
-    'moving': {
-        'golden_csv': 'doppler_golden_py_moving.csv',
-        'rtl_csv': 'rtl_doppler_moving.csv',
-        'description': 'Single moving target v=15m/s',
-    },
-    'two_targets': {
-        'golden_csv': 'doppler_golden_py_two_targets.csv',
-        'rtl_csv': 'rtl_doppler_two_targets.csv',
-        'description': 'Two targets at different ranges/velocities',
-    },
-}
-
-# Pass/fail thresholds — BIT-PERFECT match expected after pipeline fix
-PEAK_AGREEMENT_MIN = 1.00     # 100% peak Doppler bin agreement required
-MAG_CORR_MIN = 0.99           # Near-perfect magnitude correlation required
-ENERGY_RATIO_MIN = 0.999      # Energy ratio must be ~1.0 (bit-perfect)
-ENERGY_RATIO_MAX = 1.001      # Energy ratio must be ~1.0 (bit-perfect)
-
-
-# =============================================================================
-# Helper functions
-# =============================================================================
-
-def load_doppler_csv(filepath):
-    """
-    Load Doppler output CSV with columns (range_bin, doppler_bin, out_i, out_q).
-    Returns dict: {rbin: [(dbin, i, q), ...]}
-    """
-    data = {}
-    with open(filepath) as f:
-        f.readline()  # Skip header
-        for line in f:
-            line = line.strip()
-            if not line:
-                continue
-            parts = line.split(',')
-            rbin = int(parts[0])
-            dbin = int(parts[1])
-            i_val = int(parts[2])
-            q_val = int(parts[3])
-            if rbin not in data:
-                data[rbin] = []
-            data[rbin].append((dbin, i_val, q_val))
-    return data
-
-
-def extract_iq_arrays(data_dict, rbin):
-    """Extract I and Q arrays for a given range bin, ordered by doppler bin."""
-    if rbin not in data_dict:
-        return [0] * DOPPLER_FFT, [0] * DOPPLER_FFT
-    entries = sorted(data_dict[rbin], key=lambda x: x[0])
-    i_arr = [e[1] for e in entries]
-    q_arr = [e[2] for e in entries]
-    return i_arr, q_arr
-
-
-def pearson_correlation(a, b):
-    """Compute Pearson correlation coefficient."""
-    n = len(a)
-    if n < 2:
-        return 0.0
-    mean_a = sum(a) / n
-    mean_b = sum(b) / n
-    cov = sum((a[i] - mean_a) * (b[i] - mean_b) for i in range(n))
-    std_a_sq = sum((x - mean_a) ** 2 for x in a)
-    std_b_sq = sum((x - mean_b) ** 2 for x in b)
-    if std_a_sq < 1e-10 or std_b_sq < 1e-10:
-        return 1.0 if abs(mean_a - mean_b) < 1.0 else 0.0
-    return cov / math.sqrt(std_a_sq * std_b_sq)
-
-
-def magnitude_l1(i_arr, q_arr):
-    """L1 magnitude: |I| + |Q|."""
-    return [abs(i) + abs(q) for i, q in zip(i_arr, q_arr, strict=False)]
-
-
-def find_peak_bin(i_arr, q_arr):
-    """Find bin with max L1 magnitude."""
-    mags = magnitude_l1(i_arr, q_arr)
-    return max(range(len(mags)), key=lambda k: mags[k])
-
-
-def peak_bins_match(py_peak, rtl_peak):
-    """Return True if peaks match within +/-1 bin inside the same sub-frame."""
-    py_sf = py_peak // SUBFRAME_SIZE
-    rtl_sf = rtl_peak // SUBFRAME_SIZE
-    if py_sf != rtl_sf:
-        return False
-
-    py_bin = py_peak % SUBFRAME_SIZE
-    rtl_bin = rtl_peak % SUBFRAME_SIZE
-    diff = abs(py_bin - rtl_bin)
-    return diff <= 1 or diff >= SUBFRAME_SIZE - 1
-
-
-def total_energy(data_dict):
-    """Sum of I^2 + Q^2 across all range bins and Doppler bins."""
-    total = 0
-    for rbin in data_dict:
-        for (_dbin, i_val, q_val) in data_dict[rbin]:
-            total += i_val * i_val + q_val * q_val
-    return total
-
-
-# =============================================================================
-# Scenario comparison
-# =============================================================================
-
-def compare_scenario(name, config, base_dir):
-    """Compare one Doppler scenario. Returns (passed, result_dict)."""
-
-    golden_path = os.path.join(base_dir, config['golden_csv'])
-    rtl_path = os.path.join(base_dir, config['rtl_csv'])
-
-    if not os.path.exists(golden_path):
-        return False, {}
-    if not os.path.exists(rtl_path):
-        return False, {}
-
-    py_data = load_doppler_csv(golden_path)
-    rtl_data = load_doppler_csv(rtl_path)
-
-    sorted(py_data.keys())
-    sorted(rtl_data.keys())
-
-
-    # ---- Check 1: Both have data ----
-    py_total = sum(len(v) for v in py_data.values())
-    rtl_total = sum(len(v) for v in rtl_data.values())
-    if py_total == 0 or rtl_total == 0:
-        return False, {}
-
-    # ---- Check 2: Output count ----
-    count_ok = (rtl_total == TOTAL_OUTPUTS)
-
-    # ---- Check 3: Global energy ----
-    py_energy = total_energy(py_data)
-    rtl_energy = total_energy(rtl_data)
-    if py_energy > 0:
-        energy_ratio = rtl_energy / py_energy
-    else:
-        energy_ratio = 1.0 if rtl_energy == 0 else float('inf')
-
-
-    # ---- Check 4: Per-range-bin analysis ----
-    peak_agreements = 0
-    mag_correlations = []
-    i_correlations = []
-    q_correlations = []
-
-    peak_details = []
-
-    for rbin in range(RANGE_BINS):
-        py_i, py_q = extract_iq_arrays(py_data, rbin)
-        rtl_i, rtl_q = extract_iq_arrays(rtl_data, rbin)
-
-        py_peak = find_peak_bin(py_i, py_q)
-        rtl_peak = find_peak_bin(rtl_i, rtl_q)
-
-        # Peak agreement (allow +/-1 bin tolerance, but only within a sub-frame)
-        if peak_bins_match(py_peak, rtl_peak):
-            peak_agreements += 1
-
-        py_mag = magnitude_l1(py_i, py_q)
-        rtl_mag = magnitude_l1(rtl_i, rtl_q)
-
-        mag_corr = pearson_correlation(py_mag, rtl_mag)
-        corr_i = pearson_correlation(py_i, rtl_i)
-        corr_q = pearson_correlation(py_q, rtl_q)
-
-        mag_correlations.append(mag_corr)
-        i_correlations.append(corr_i)
-        q_correlations.append(corr_q)
-
-        py_rbin_energy = sum(i*i + q*q for i, q in zip(py_i, py_q, strict=False))
-        rtl_rbin_energy = sum(i*i + q*q for i, q in zip(rtl_i, rtl_q, strict=False))
-
-        peak_details.append({
-            'rbin': rbin,
-            'py_peak': py_peak,
-            'rtl_peak': rtl_peak,
-            'mag_corr': mag_corr,
-            'corr_i': corr_i,
-            'corr_q': corr_q,
-            'py_energy': py_rbin_energy,
-            'rtl_energy': rtl_rbin_energy,
-        })
-
-    peak_agreement_frac = peak_agreements / RANGE_BINS
-    avg_mag_corr = sum(mag_correlations) / len(mag_correlations)
-    avg_corr_i = sum(i_correlations) / len(i_correlations)
-    avg_corr_q = sum(q_correlations) / len(q_correlations)
-
-
-    # Show top 5 range bins by Python energy
-    top_rbins = sorted(peak_details, key=lambda x: -x['py_energy'])[:5]
-    for _d in top_rbins:
-        pass
-
-    # ---- Pass/Fail ----
-    checks = []
-
-    checks.append(('RTL output count == 2048', count_ok))
-
-    energy_ok = (ENERGY_RATIO_MIN < energy_ratio < ENERGY_RATIO_MAX)
-    checks.append((f'Energy ratio in bounds '
-                    f'({ENERGY_RATIO_MIN}-{ENERGY_RATIO_MAX})', energy_ok))
-
-    peak_ok = (peak_agreement_frac >= PEAK_AGREEMENT_MIN)
-    checks.append((f'Peak agreement >= {PEAK_AGREEMENT_MIN:.0%}', peak_ok))
-
-    # For range bins with significant energy, check magnitude correlation
-    high_energy_rbins = [d for d in peak_details
-                         if d['py_energy'] > py_energy / (RANGE_BINS * 10)]
-    if high_energy_rbins:
-        he_mag_corr = sum(d['mag_corr'] for d in high_energy_rbins) / len(high_energy_rbins)
-        he_ok = (he_mag_corr >= MAG_CORR_MIN)
-        checks.append((f'High-energy rbin avg mag_corr >= {MAG_CORR_MIN:.2f} '
-                        f'(actual={he_mag_corr:.3f})', he_ok))
-
-    all_pass = True
-    for _check_name, passed in checks:
-        if not passed:
-            all_pass = False
-
-    # ---- Write detailed comparison CSV ----
-    compare_csv = os.path.join(base_dir, f'compare_doppler_{name}.csv')
-    with open(compare_csv, 'w') as f:
-        f.write('range_bin,doppler_bin,py_i,py_q,rtl_i,rtl_q,diff_i,diff_q\n')
-        for rbin in range(RANGE_BINS):
-            py_i, py_q = extract_iq_arrays(py_data, rbin)
-            rtl_i, rtl_q = extract_iq_arrays(rtl_data, rbin)
-            for dbin in range(DOPPLER_FFT):
-                f.write(f'{rbin},{dbin},{py_i[dbin]},{py_q[dbin]},'
-                        f'{rtl_i[dbin]},{rtl_q[dbin]},'
-                        f'{rtl_i[dbin]-py_i[dbin]},{rtl_q[dbin]-py_q[dbin]}\n')
-
-    result = {
-        'scenario': name,
-        'rtl_count': rtl_total,
-        'energy_ratio': energy_ratio,
-        'peak_agreement': peak_agreement_frac,
-        'avg_mag_corr': avg_mag_corr,
-        'avg_corr_i': avg_corr_i,
-        'avg_corr_q': avg_corr_q,
-        'passed': all_pass,
-    }
-
-    return all_pass, result
-
-
-# =============================================================================
-# Main
-# =============================================================================
-
-def main():
-    base_dir = os.path.dirname(os.path.abspath(__file__))
-
-    arg = sys.argv[1].lower() if len(sys.argv) > 1 else 'stationary'
-
-    if arg == 'all':
-        run_scenarios = list(SCENARIOS.keys())
-    elif arg in SCENARIOS:
-        run_scenarios = [arg]
-    else:
-        sys.exit(1)
-
-
-    results = []
-    for name in run_scenarios:
-        passed, result = compare_scenario(name, SCENARIOS[name], base_dir)
-        results.append((name, passed, result))
-
-    # Summary
-
-
-    all_pass = True
-    for _name, passed, result in results:
-        if not result:
-            all_pass = False
-        else:
-            if not passed:
-                all_pass = False
-
-    if all_pass:
-        pass
-    else:
-        pass
-
-    sys.exit(0 if all_pass else 1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/9_Firmware/9_2_FPGA/tb/cosim/compare_mf.py b/9_Firmware/9_2_FPGA/tb/cosim/compare_mf.py
deleted file mode 100644
index c766a1d..0000000
--- a/9_Firmware/9_2_FPGA/tb/cosim/compare_mf.py
+++ /dev/null
@@ -1,330 +0,0 @@
-#!/usr/bin/env python3
-"""
-Co-simulation Comparison: RTL vs Python Model for AERIS-10 Matched Filter.
-
-Compares the RTL matched filter output (from tb_mf_cosim.v) against the
-Python model golden reference (from gen_mf_cosim_golden.py).
-
-Two modes of operation:
-  1. Synthesis branch (no -DSIMULATION): RTL uses fft_engine.v with fixed-point
-     twiddle ROM (fft_twiddle_1024.mem) and frequency_matched_filter.v. The
-     Python model was built to match this exactly. Expect BIT-PERFECT results
-     (correlation = 1.0, energy ratio = 1.0).
-
-  2. SIMULATION branch (-DSIMULATION): RTL uses behavioral FFT with floating-
-     point twiddles ($rtoi($cos*32767)) and shift-then-add conjugate multiply.
-     Python model uses fixed-point twiddles and add-then-round. Expect large
-     numerical differences; only state-machine mechanics are validated.
-
-Usage:
-    python3 compare_mf.py [scenario|all]
-
-    scenario: chirp, dc, impulse, tone5 (default: chirp)
-    all: run all scenarios
-
-Author: Phase 0.5 matched-filter co-simulation suite for PLFM_RADAR
-"""
-
-import math
-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-
-# =============================================================================
-# Configuration
-# =============================================================================
-
-FFT_SIZE = 1024
-
-SCENARIOS = {
-    'chirp': {
-        'golden_csv': 'mf_golden_py_chirp.csv',
-        'rtl_csv': 'rtl_mf_chirp.csv',
-        'description': 'Radar chirp: 2 targets vs ref chirp',
-    },
-    'dc': {
-        'golden_csv': 'mf_golden_py_dc.csv',
-        'rtl_csv': 'rtl_mf_dc.csv',
-        'description': 'DC autocorrelation (I=0x1000)',
-    },
-    'impulse': {
-        'golden_csv': 'mf_golden_py_impulse.csv',
-        'rtl_csv': 'rtl_mf_impulse.csv',
-        'description': 'Impulse autocorrelation (delta at n=0)',
-    },
-    'tone5': {
-        'golden_csv': 'mf_golden_py_tone5.csv',
-        'rtl_csv': 'rtl_mf_tone5.csv',
-        'description': 'Tone autocorrelation (bin 5, amp=8000)',
-    },
-}
-
-# Thresholds for pass/fail
-# These are generous because of the fundamental twiddle arithmetic differences
-# between the SIMULATION branch (float twiddles) and Python model (fixed twiddles)
-ENERGY_CORR_MIN = 0.80       # Min correlation of magnitude spectra
-TOP_PEAK_OVERLAP_MIN = 0.50  # At least 50% of top-N peaks must overlap
-RMS_RATIO_MAX = 50.0         # Max ratio of RMS energies (generous, since gain differs)
-ENERGY_RATIO_MIN = 0.001     # Min ratio (total energy RTL / total energy Python)
-ENERGY_RATIO_MAX = 1000.0    # Max ratio
-
-
-# =============================================================================
-# Helper functions
-# =============================================================================
-
-def load_csv(filepath):
-    """Load CSV with columns (bin, out_i/range_profile_i, out_q/range_profile_q)."""
-    vals_i = []
-    vals_q = []
-    with open(filepath) as f:
-        f.readline()  # Skip header
-        for line in f:
-            line = line.strip()
-            if not line:
-                continue
-            parts = line.split(',')
-            vals_i.append(int(parts[1]))
-            vals_q.append(int(parts[2]))
-    return vals_i, vals_q
-
-
-def magnitude_spectrum(vals_i, vals_q):
-    """Compute magnitude = |I| + |Q| for each bin (L1 norm, matches RTL)."""
-    return [abs(i) + abs(q) for i, q in zip(vals_i, vals_q, strict=False)]
-
-
-def magnitude_l2(vals_i, vals_q):
-    """Compute magnitude = sqrt(I^2 + Q^2) for each bin."""
-    return [math.sqrt(i*i + q*q) for i, q in zip(vals_i, vals_q, strict=False)]
-
-
-def total_energy(vals_i, vals_q):
-    """Compute total energy (sum of I^2 + Q^2)."""
-    return sum(i*i + q*q for i, q in zip(vals_i, vals_q, strict=False))
-
-
-def rms_magnitude(vals_i, vals_q):
-    """Compute RMS of complex magnitude."""
-    n = len(vals_i)
-    if n == 0:
-        return 0.0
-    return math.sqrt(sum(i*i + q*q for i, q in zip(vals_i, vals_q, strict=False)) / n)
-
-
-def pearson_correlation(a, b):
-    """Compute Pearson correlation coefficient between two lists."""
-    n = len(a)
-    if n < 2:
-        return 0.0
-    mean_a = sum(a) / n
-    mean_b = sum(b) / n
-    cov = sum((a[i] - mean_a) * (b[i] - mean_b) for i in range(n))
-    std_a_sq = sum((x - mean_a) ** 2 for x in a)
-    std_b_sq = sum((x - mean_b) ** 2 for x in b)
-    if std_a_sq < 1e-10 or std_b_sq < 1e-10:
-        return 1.0 if abs(mean_a - mean_b) < 1.0 else 0.0
-    return cov / math.sqrt(std_a_sq * std_b_sq)
-
-
-def find_peak(vals_i, vals_q):
-    """Find the bin with the maximum L1 magnitude."""
-    mags = magnitude_spectrum(vals_i, vals_q)
-    peak_bin = 0
-    peak_mag = mags[0]
-    for i in range(1, len(mags)):
-        if mags[i] > peak_mag:
-            peak_mag = mags[i]
-            peak_bin = i
-    return peak_bin, peak_mag
-
-
-def top_n_peaks(mags, n=10):
-    """Find the top-N peak bins by magnitude. Returns set of bin indices."""
-    indexed = sorted(enumerate(mags), key=lambda x: -x[1])
-    return {idx for idx, _ in indexed[:n]}
-
-
-def spectral_peak_overlap(mags_a, mags_b, n=10):
-    """Fraction of top-N peaks from A that also appear in top-N of B."""
-    peaks_a = top_n_peaks(mags_a, n)
-    peaks_b = top_n_peaks(mags_b, n)
-    if len(peaks_a) == 0:
-        return 1.0
-    overlap = peaks_a & peaks_b
-    return len(overlap) / len(peaks_a)
-
-
-# =============================================================================
-# Comparison for one scenario
-# =============================================================================
-
-def compare_scenario(scenario_name, config, base_dir):
-    """Compare one scenario. Returns (pass/fail, result_dict)."""
-
-    golden_path = os.path.join(base_dir, config['golden_csv'])
-    rtl_path = os.path.join(base_dir, config['rtl_csv'])
-
-    if not os.path.exists(golden_path):
-        return False, {}
-    if not os.path.exists(rtl_path):
-        return False, {}
-
-    py_i, py_q = load_csv(golden_path)
-    rtl_i, rtl_q = load_csv(rtl_path)
-
-
-    if len(py_i) != FFT_SIZE or len(rtl_i) != FFT_SIZE:
-        return False, {}
-
-    # ---- Metric 1: Energy ----
-    py_energy = total_energy(py_i, py_q)
-    rtl_energy = total_energy(rtl_i, rtl_q)
-    py_rms = rms_magnitude(py_i, py_q)
-    rtl_rms = rms_magnitude(rtl_i, rtl_q)
-
-    if py_energy > 0 and rtl_energy > 0:
-        energy_ratio = rtl_energy / py_energy
-        rms_ratio = rtl_rms / py_rms
-    elif py_energy == 0 and rtl_energy == 0:
-        energy_ratio = 1.0
-        rms_ratio = 1.0
-    else:
-        energy_ratio = float('inf') if py_energy == 0 else 0.0
-        rms_ratio = float('inf') if py_rms == 0 else 0.0
-
-
-    # ---- Metric 2: Peak location ----
-    py_peak_bin, _py_peak_mag = find_peak(py_i, py_q)
-    rtl_peak_bin, _rtl_peak_mag = find_peak(rtl_i, rtl_q)
-
-
-    # ---- Metric 3: Magnitude spectrum correlation ----
-    py_mag = magnitude_l2(py_i, py_q)
-    rtl_mag = magnitude_l2(rtl_i, rtl_q)
-    mag_corr = pearson_correlation(py_mag, rtl_mag)
-
-
-    # ---- Metric 4: Top-N peak overlap ----
-    # Use L1 magnitudes for peak finding (matches RTL)
-    py_mag_l1 = magnitude_spectrum(py_i, py_q)
-    rtl_mag_l1 = magnitude_spectrum(rtl_i, rtl_q)
-    peak_overlap_10 = spectral_peak_overlap(py_mag_l1, rtl_mag_l1, n=10)
-    peak_overlap_20 = spectral_peak_overlap(py_mag_l1, rtl_mag_l1, n=20)
-
-
-    # ---- Metric 5: I and Q channel correlation ----
-    corr_i = pearson_correlation(py_i, rtl_i)
-    corr_q = pearson_correlation(py_q, rtl_q)
-
-
-    # ---- Pass/Fail Decision ----
-    # The SIMULATION branch uses floating-point twiddles ($cos/$sin) while
-    # the Python model uses the fixed-point twiddle ROM (matching synthesis).
-    # These are fundamentally different FFT implementations. We do NOT expect
-    # structural similarity (correlation, peak overlap) between them.
-    #
-    # What we CAN verify:
-    # 1. Both produce non-trivial output (state machine completes)
-    # 2. Output count is correct (1024 samples)
-    # 3. Energy is in a reasonable range (not wildly wrong)
-    #
-    # The true bit-accuracy comparison will happen when the synthesis branch
-    # is simulated (xsim on remote server) using the same fft_engine.v that
-    # the Python model was built to match.
-
-    checks = []
-
-    # Check 1: Both produce output
-    both_have_output = py_energy > 0 and rtl_energy > 0
-    checks.append(('Both produce output', both_have_output))
-
-    # Check 2: RTL produced expected sample count
-    correct_count = len(rtl_i) == FFT_SIZE
-    checks.append(('Correct output count (1024)', correct_count))
-
-    # Check 3: Energy ratio within generous bounds
-    # Allow very wide range since twiddle differences cause large gain variation
-    energy_ok = ENERGY_RATIO_MIN < energy_ratio < ENERGY_RATIO_MAX
-    checks.append((f'Energy ratio in bounds ({ENERGY_RATIO_MIN}-{ENERGY_RATIO_MAX})',
-                    energy_ok))
-
-    # Print checks
-    all_pass = True
-    for _name, passed in checks:
-        if not passed:
-            all_pass = False
-
-    result = {
-        'scenario': scenario_name,
-        'py_energy': py_energy,
-        'rtl_energy': rtl_energy,
-        'energy_ratio': energy_ratio,
-        'rms_ratio': rms_ratio,
-        'py_peak_bin': py_peak_bin,
-        'rtl_peak_bin': rtl_peak_bin,
-        'mag_corr': mag_corr,
-        'peak_overlap_10': peak_overlap_10,
-        'peak_overlap_20': peak_overlap_20,
-        'corr_i': corr_i,
-        'corr_q': corr_q,
-        'passed': all_pass,
-    }
-
-    # Write detailed comparison CSV
-    compare_csv = os.path.join(base_dir, f'compare_mf_{scenario_name}.csv')
-    with open(compare_csv, 'w') as f:
-        f.write('bin,py_i,py_q,rtl_i,rtl_q,py_mag,rtl_mag,diff_i,diff_q\n')
-        for k in range(FFT_SIZE):
-            f.write(f'{k},{py_i[k]},{py_q[k]},{rtl_i[k]},{rtl_q[k]},'
-                    f'{py_mag_l1[k]},{rtl_mag_l1[k]},'
-                    f'{rtl_i[k]-py_i[k]},{rtl_q[k]-py_q[k]}\n')
-
-    return all_pass, result
-
-
-# =============================================================================
-# Main
-# =============================================================================
-
-def main():
-    base_dir = os.path.dirname(os.path.abspath(__file__))
-
-    arg = sys.argv[1].lower() if len(sys.argv) > 1 else 'chirp'
-
-    if arg == 'all':
-        run_scenarios = list(SCENARIOS.keys())
-    elif arg in SCENARIOS:
-        run_scenarios = [arg]
-    else:
-        sys.exit(1)
-
-
-    results = []
-    for name in run_scenarios:
-        passed, result = compare_scenario(name, SCENARIOS[name], base_dir)
-        results.append((name, passed, result))
-
-    # Summary
-
-
-    all_pass = True
-    for _name, passed, result in results:
-        if not result:
-            all_pass = False
-        else:
-            if not passed:
-                all_pass = False
-
-    if all_pass:
-        pass
-    else:
-        pass
-
-    sys.exit(0 if all_pass else 1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/9_Firmware/9_2_FPGA/tb/cosim/gen_chirp_mem.py b/9_Firmware/9_2_FPGA/tb/cosim/gen_chirp_mem.py
index 8bec7b8..8df228d 100644
--- a/9_Firmware/9_2_FPGA/tb/cosim/gen_chirp_mem.py
+++ b/9_Firmware/9_2_FPGA/tb/cosim/gen_chirp_mem.py
@@ -126,17 +126,40 @@ def write_mem_file(filename, values):
     with open(path, 'w') as f:
         for v in values:
             f.write(to_hex16(v) + '\n')
+    print(f"  Wrote {filename}: {len(values)} entries")
 
 
 def main():
+    print("=" * 60)
+    print("AERIS-10 Chirp .mem File Generator")
+    print("=" * 60)
+    print()
+    print("Parameters:")
+    print(f"  CHIRP_BW         = {CHIRP_BW/1e6:.1f} MHz")
+    print(f"  FS_SYS           = {FS_SYS/1e6:.1f} MHz")
+    print(f"  T_LONG_CHIRP     = {T_LONG_CHIRP*1e6:.1f} us")
+    print(f"  T_SHORT_CHIRP    = {T_SHORT_CHIRP*1e6:.1f} us")
+    print(f"  LONG_CHIRP_SAMPLES = {LONG_CHIRP_SAMPLES}")
+    print(f"  SHORT_CHIRP_SAMPLES = {SHORT_CHIRP_SAMPLES}")
+    print(f"  FFT_SIZE         = {FFT_SIZE}")
+    print(f"  Chirp rate (long)  = {CHIRP_BW/T_LONG_CHIRP:.3e} Hz/s")
+    print(f"  Chirp rate (short) = {CHIRP_BW/T_SHORT_CHIRP:.3e} Hz/s")
+    print(f"  Q15 scale        = {SCALE}")
+    print()
 
     # ---- Long chirp ----
+    print("Generating full long chirp (3000 samples)...")
     long_i, long_q = generate_full_long_chirp()
 
     # Verify first sample matches generate_reference_chirp_q15() from radar_scene.py
     # (which only generates the first 1024 samples)
+    print(f"  Sample[0]:    I={long_i[0]:6d}  Q={long_q[0]:6d}")
+    print(f"  Sample[1023]: I={long_i[1023]:6d}  Q={long_q[1023]:6d}")
+    print(f"  Sample[2999]: I={long_i[2999]:6d}  Q={long_q[2999]:6d}")
 
     # Segment into 4 x 1024 blocks
+    print()
+    print("Segmenting into 4 x 1024 blocks...")
     for seg in range(LONG_SEGMENTS):
         start = seg * FFT_SIZE
         end = start + FFT_SIZE
@@ -154,18 +177,27 @@ def main():
                 seg_i.append(0)
                 seg_q.append(0)
 
-        FFT_SIZE - valid_count
+        zero_count = FFT_SIZE - valid_count
+        print(f"  Seg {seg}: indices [{start}:{end-1}], "
+              f"valid={valid_count}, zeros={zero_count}")
 
         write_mem_file(f"long_chirp_seg{seg}_i.mem", seg_i)
         write_mem_file(f"long_chirp_seg{seg}_q.mem", seg_q)
 
     # ---- Short chirp ----
+    print()
+    print("Generating short chirp (50 samples)...")
     short_i, short_q = generate_short_chirp()
+    print(f"  Sample[0]:  I={short_i[0]:6d}  Q={short_q[0]:6d}")
+    print(f"  Sample[49]: I={short_i[49]:6d}  Q={short_q[49]:6d}")
 
     write_mem_file("short_chirp_i.mem", short_i)
     write_mem_file("short_chirp_q.mem", short_q)
 
     # ---- Verification summary ----
+    print()
+    print("=" * 60)
+    print("Verification:")
 
     # Cross-check seg0 against radar_scene.py generate_reference_chirp_q15()
     # That function generates exactly the first 1024 samples of the chirp
@@ -180,24 +212,33 @@ def main():
             mismatches += 1
 
     if mismatches == 0:
-        pass
+        print("  [PASS] Seg0 matches radar_scene.py generate_reference_chirp_q15()")
     else:
+        print(f"  [FAIL] Seg0 has {mismatches} mismatches vs generate_reference_chirp_q15()")
         return 1
 
     # Check magnitude envelope
-    max(math.sqrt(i*i + q*q) for i, q in zip(long_i, long_q, strict=False))
+    max_mag = max(math.sqrt(i*i + q*q) for i, q in zip(long_i, long_q, strict=False))
+    print(f"  Max magnitude: {max_mag:.1f} (expected ~{Q15_MAX * SCALE:.1f})")
+    print(f"  Magnitude ratio: {max_mag / (Q15_MAX * SCALE):.6f}")
 
     # Check seg3 zero padding
     seg3_i_path = os.path.join(MEM_DIR, 'long_chirp_seg3_i.mem')
     with open(seg3_i_path) as f:
         seg3_lines = [line.strip() for line in f if line.strip()]
     nonzero_seg3 = sum(1 for line in seg3_lines if line != '0000')
+    print(f"  Seg3 non-zero entries: {nonzero_seg3}/{len(seg3_lines)} "
+          f"(expected 0 since chirp ends at sample 2999)")
 
     if nonzero_seg3 == 0:
-        pass
+        print("  [PASS] Seg3 is all zeros (chirp 3000 samples < seg3 start 3072)")
     else:
-        pass
+        print(f"  [WARN] Seg3 has {nonzero_seg3} non-zero entries")
 
+    print()
+    print(f"Generated 10 .mem files in {os.path.abspath(MEM_DIR)}")
+    print("Run validate_mem_files.py to do full validation.")
+    print("=" * 60)
 
     return 0
 
diff --git a/9_Firmware/9_2_FPGA/tb/cosim/gen_doppler_golden.py b/9_Firmware/9_2_FPGA/tb/cosim/gen_doppler_golden.py
index 61981a9..71e8093 100644
--- a/9_Firmware/9_2_FPGA/tb/cosim/gen_doppler_golden.py
+++ b/9_Firmware/9_2_FPGA/tb/cosim/gen_doppler_golden.py
@@ -51,6 +51,7 @@ def write_hex_32bit(filepath, samples):
         for (i_val, q_val) in samples:
             packed = ((q_val & 0xFFFF) << 16) | (i_val & 0xFFFF)
             f.write(f"{packed:08X}\n")
+    print(f"  Wrote {len(samples)} packed samples to {filepath}")
 
 
 def write_csv(filepath, headers, *columns):
@@ -60,6 +61,7 @@ def write_csv(filepath, headers, *columns):
         for i in range(len(columns[0])):
             row = ','.join(str(col[i]) for col in columns)
             f.write(row + '\n')
+    print(f"  Wrote {len(columns[0])} rows to {filepath}")
 
 
 def write_hex_16bit(filepath, data):
@@ -116,10 +118,15 @@ SCENARIOS = {
 
 def generate_scenario(name, targets, description, base_dir):
     """Generate input hex + golden output for one scenario."""
+    print(f"\n{'='*60}")
+    print(f"Scenario: {name} — {description}")
+    print("Model: CLEAN (dual 16-pt FFT)")
+    print(f"{'='*60}")
 
     # Generate Doppler frame (32 chirps x 64 range bins)
     frame_i, frame_q = generate_doppler_frame(targets, seed=42)
 
+    print(f"  Generated frame: {len(frame_i)} chirps x {len(frame_i[0])} range bins")
 
     # ---- Write input hex file (packed 32-bit: {Q, I}) ----
     # RTL expects data streamed chirp-by-chirp: chirp0[rb0..rb63], chirp1[rb0..rb63], ...
@@ -137,6 +144,8 @@ def generate_scenario(name, targets, description, base_dir):
     dp = DopplerProcessor()
     doppler_i, doppler_q = dp.process_frame(frame_i, frame_q)
 
+    print(f"  Doppler output: {len(doppler_i)} range bins x "
+          f"{len(doppler_i[0])} doppler bins (2 sub-frames x {DOPPLER_FFT_SIZE})")
 
     # ---- Write golden output CSV ----
     # Format: range_bin, doppler_bin, out_i, out_q
@@ -164,6 +173,7 @@ def generate_scenario(name, targets, description, base_dir):
     write_hex_32bit(golden_hex, list(zip(flat_i, flat_q, strict=False)))
 
     # ---- Find peak per range bin ----
+    print("\n  Peak Doppler bins per range bin (top 5 by magnitude):")
     peak_info = []
     for rbin in range(RANGE_BINS):
         mags = [abs(doppler_i[rbin][d]) + abs(doppler_q[rbin][d])
@@ -174,11 +184,13 @@ def generate_scenario(name, targets, description, base_dir):
 
     # Sort by magnitude descending, show top 5
     peak_info.sort(key=lambda x: -x[2])
-    for rbin, dbin, _mag in peak_info[:5]:
-        doppler_i[rbin][dbin]
-        doppler_q[rbin][dbin]
-        dbin // DOPPLER_FFT_SIZE
-        dbin % DOPPLER_FFT_SIZE
+    for rbin, dbin, mag in peak_info[:5]:
+        i_val = doppler_i[rbin][dbin]
+        q_val = doppler_q[rbin][dbin]
+        sf = dbin // DOPPLER_FFT_SIZE
+        bin_in_sf = dbin % DOPPLER_FFT_SIZE
+        print(f"    rbin={rbin:2d}, dbin={dbin:2d} (sf{sf}:{bin_in_sf:2d}), mag={mag:6d}, "
+              f"I={i_val:6d}, Q={q_val:6d}")
 
     return {
         'name': name,
@@ -190,6 +202,10 @@ def generate_scenario(name, targets, description, base_dir):
 def main():
     base_dir = os.path.dirname(os.path.abspath(__file__))
 
+    print("=" * 60)
+    print("Doppler Processor Co-Sim Golden Reference Generator")
+    print(f"Architecture: dual {DOPPLER_FFT_SIZE}-pt FFT ({DOPPLER_TOTAL_BINS} total bins)")
+    print("=" * 60)
 
     scenarios_to_run = list(SCENARIOS.keys())
 
@@ -207,9 +223,17 @@ def main():
         r = generate_scenario(name, targets, description, base_dir)
         results.append(r)
 
-    for _ in results:
-        pass
+    print(f"\n{'='*60}")
+    print("Summary:")
+    print(f"{'='*60}")
+    for r in results:
+        print(f"  {r['name']:<15s} top peak: "
+              f"rbin={r['peak_info'][0][0]}, dbin={r['peak_info'][0][1]}, "
+              f"mag={r['peak_info'][0][2]}")
 
+    print(f"\nGenerated {len(results)} scenarios.")
+    print(f"Files written to: {base_dir}")
+    print("=" * 60)
 
 
 if __name__ == '__main__':
diff --git a/9_Firmware/9_2_FPGA/tb/cosim/gen_mf_cosim_golden.py b/9_Firmware/9_2_FPGA/tb/cosim/gen_mf_cosim_golden.py
index 2ac4de4..3971d9d 100644
--- a/9_Firmware/9_2_FPGA/tb/cosim/gen_mf_cosim_golden.py
+++ b/9_Firmware/9_2_FPGA/tb/cosim/gen_mf_cosim_golden.py
@@ -75,6 +75,7 @@ def generate_case(case_name, sig_i, sig_q, ref_i, ref_q, description, outdir,
 
     Returns dict with case info and results.
     """
+    print(f"\n--- {case_name}: {description} ---")
 
     assert len(sig_i) == FFT_SIZE, f"sig_i length {len(sig_i)} != {FFT_SIZE}"
     assert len(sig_q) == FFT_SIZE
@@ -87,6 +88,8 @@ def generate_case(case_name, sig_i, sig_q, ref_i, ref_q, description, outdir,
         write_hex_16bit(os.path.join(outdir, f"mf_sig_{case_name}_q.hex"), sig_q)
         write_hex_16bit(os.path.join(outdir, f"mf_ref_{case_name}_i.hex"), ref_i)
         write_hex_16bit(os.path.join(outdir, f"mf_ref_{case_name}_q.hex"), ref_q)
+        print(f"  Wrote input hex: mf_sig_{case_name}_{{i,q}}.hex, "
+              f"mf_ref_{case_name}_{{i,q}}.hex")
 
     # Run through bit-accurate Python model
     mf = MatchedFilterChain(fft_size=FFT_SIZE)
@@ -101,6 +104,9 @@ def generate_case(case_name, sig_i, sig_q, ref_i, ref_q, description, outdir,
             peak_mag = mag
             peak_bin = k
 
+    print(f"  Output: {len(out_i)} samples")
+    print(f"  Peak bin: {peak_bin}, magnitude: {peak_mag}")
+    print(f"  Peak I={out_i[peak_bin]}, Q={out_q[peak_bin]}")
 
     # Save golden output hex
     write_hex_16bit(os.path.join(outdir, f"mf_golden_py_i_{case_name}.hex"), out_i)
@@ -129,6 +135,10 @@ def generate_case(case_name, sig_i, sig_q, ref_i, ref_q, description, outdir,
 def main():
     base_dir = os.path.dirname(os.path.abspath(__file__))
 
+    print("=" * 60)
+    print("Matched Filter Co-Sim Golden Reference Generator")
+    print("Using bit-accurate Python model (fpga_model.py)")
+    print("=" * 60)
 
     results = []
 
@@ -148,7 +158,8 @@ def main():
                           base_dir)
         results.append(r)
     else:
-        pass
+        print("\nWARNING: bb_mf_test / ref_chirp hex files not found.")
+        print("Run radar_scene.py first.")
 
     # ---- Case 2: DC autocorrelation ----
     dc_val = 0x1000  # 4096
@@ -190,9 +201,16 @@ def main():
     results.append(r)
 
     # ---- Summary ----
-    for _ in results:
-        pass
+    print("\n" + "=" * 60)
+    print("Summary:")
+    print("=" * 60)
+    for r in results:
+        print(f"  {r['case_name']:10s}: peak at bin {r['peak_bin']}, "
+              f"mag={r['peak_mag']}, I={r['peak_i']}, Q={r['peak_q']}")
 
+    print(f"\nGenerated {len(results)} golden reference cases.")
+    print("Files written to:", base_dir)
+    print("=" * 60)
 
 
 if __name__ == '__main__':
diff --git a/9_Firmware/9_2_FPGA/tb/cosim/radar_scene.py b/9_Firmware/9_2_FPGA/tb/cosim/radar_scene.py
index 205f9e3..920b120 100644
--- a/9_Firmware/9_2_FPGA/tb/cosim/radar_scene.py
+++ b/9_Firmware/9_2_FPGA/tb/cosim/radar_scene.py
@@ -163,7 +163,7 @@ def generate_if_chirp(n_samples, chirp_bw=CHIRP_BW, f_if=F_IF, fs=FS_ADC):
     return chirp_i, chirp_q
 
 
-def generate_reference_chirp_q15(n_fft=FFT_SIZE, chirp_bw=CHIRP_BW, _f_if=F_IF, _fs=FS_ADC):
+def generate_reference_chirp_q15(n_fft=FFT_SIZE, chirp_bw=CHIRP_BW, f_if=F_IF, fs=FS_ADC):
     """
     Generate a reference chirp in Q15 format for the matched filter.
 
@@ -398,6 +398,7 @@ def generate_doppler_frame(targets, n_chirps=CHIRPS_PER_FRAME,
         for target in targets:
             # Which range bin does this target fall in?
             # After matched filter + range decimation:
+            # range_bin = target_delay_in_baseband_samples / decimation_factor
             delay_baseband_samples = target.delay_s * FS_SYS
             range_bin_float = delay_baseband_samples * n_range_bins / FFT_SIZE
             range_bin = round(range_bin_float)
@@ -405,6 +406,7 @@ def generate_doppler_frame(targets, n_chirps=CHIRPS_PER_FRAME,
             if range_bin < 0 or range_bin >= n_range_bins:
                 continue
 
+            # Amplitude (simplified)
             amp = target.amplitude / 4.0
 
             # Doppler phase for this chirp.
@@ -472,6 +474,7 @@ def write_hex_file(filepath, samples, bits=8):
                 val = s & ((1 << bits) - 1)
             f.write(fmt.format(val) + "\n")
 
+    print(f"  Wrote {len(samples)} samples to {filepath}")
 
 
 def write_csv_file(filepath, columns, headers=None):
@@ -491,6 +494,7 @@ def write_csv_file(filepath, columns, headers=None):
             row = [str(col[i]) for col in columns]
             f.write(",".join(row) + "\n")
 
+    print(f"  Wrote {n_rows} rows to {filepath}")
 
 
 # =============================================================================
@@ -503,6 +507,10 @@ def scenario_single_target(range_m=500, velocity=0, rcs=0, n_adc_samples=16384):
     Good for validating matched filter range response.
     """
     target = Target(range_m=range_m, velocity_mps=velocity, rcs_dbsm=rcs)
+    print(f"Scenario: Single target at {range_m}m")
+    print(f"  {target}")
+    print(f"  Beat freq: {CHIRP_BW / T_LONG_CHIRP * target.delay_s:.0f} Hz")
+    print(f"  Delay: {target.delay_samples:.1f} ADC samples")
 
     adc = generate_adc_samples([target], n_adc_samples, noise_stddev=2.0)
     return adc, [target]
@@ -517,8 +525,9 @@ def scenario_two_targets(n_adc_samples=16384):
         Target(range_m=300, velocity_mps=0, rcs_dbsm=10, phase_deg=0),
         Target(range_m=315, velocity_mps=0, rcs_dbsm=10, phase_deg=45),
     ]
-    for _t in targets:
-        pass
+    print("Scenario: Two targets (range resolution test)")
+    for t in targets:
+        print(f"  {t}")
 
     adc = generate_adc_samples(targets, n_adc_samples, noise_stddev=2.0)
     return adc, targets
@@ -535,8 +544,9 @@ def scenario_multi_target(n_adc_samples=16384):
         Target(range_m=2000, velocity_mps=50, rcs_dbsm=0, phase_deg=45),
         Target(range_m=5000, velocity_mps=-5, rcs_dbsm=-5, phase_deg=270),
     ]
-    for _t in targets:
-        pass
+    print("Scenario: Multi-target (5 targets)")
+    for t in targets:
+        print(f"  {t}")
 
     adc = generate_adc_samples(targets, n_adc_samples, noise_stddev=3.0)
     return adc, targets
@@ -546,6 +556,7 @@ def scenario_noise_only(n_adc_samples=16384, noise_stddev=5.0):
     """
     Noise-only scene — baseline for false alarm characterization.
     """
+    print(f"Scenario: Noise only (stddev={noise_stddev})")
     adc = generate_adc_samples([], n_adc_samples, noise_stddev=noise_stddev)
     return adc, []
 
@@ -554,6 +565,7 @@ def scenario_dc_tone(n_adc_samples=16384, adc_value=128):
     """
     DC input — validates CIC decimation and DC response.
     """
+    print(f"Scenario: DC tone (ADC value={adc_value})")
     return [adc_value] * n_adc_samples, []
 
 
@@ -561,6 +573,7 @@ def scenario_sine_wave(n_adc_samples=16384, freq_hz=1e6, amplitude=50):
     """
     Pure sine wave at ADC input — validates NCO/mixer frequency response.
     """
+    print(f"Scenario: Sine wave at {freq_hz/1e6:.1f} MHz, amplitude={amplitude}")
     adc = []
     for n in range(n_adc_samples):
         t = n / FS_ADC
@@ -590,35 +603,46 @@ def generate_all_test_vectors(output_dir=None):
     if output_dir is None:
         output_dir = os.path.dirname(os.path.abspath(__file__))
 
+    print("=" * 60)
+    print("Generating AERIS-10 Test Vectors")
+    print(f"Output directory: {output_dir}")
+    print("=" * 60)
 
     n_adc = 16384  # ~41 us of ADC data
 
     # --- Scenario 1: Single target ---
+    print("\n--- Scenario 1: Single Target ---")
     adc1, targets1 = scenario_single_target(range_m=500, n_adc_samples=n_adc)
     write_hex_file(os.path.join(output_dir, "adc_single_target.hex"), adc1, bits=8)
 
     # --- Scenario 2: Multi-target ---
+    print("\n--- Scenario 2: Multi-Target ---")
     adc2, targets2 = scenario_multi_target(n_adc_samples=n_adc)
     write_hex_file(os.path.join(output_dir, "adc_multi_target.hex"), adc2, bits=8)
 
     # --- Scenario 3: Noise only ---
+    print("\n--- Scenario 3: Noise Only ---")
     adc3, _ = scenario_noise_only(n_adc_samples=n_adc)
     write_hex_file(os.path.join(output_dir, "adc_noise_only.hex"), adc3, bits=8)
 
     # --- Scenario 4: DC ---
+    print("\n--- Scenario 4: DC Input ---")
     adc4, _ = scenario_dc_tone(n_adc_samples=n_adc)
     write_hex_file(os.path.join(output_dir, "adc_dc.hex"), adc4, bits=8)
 
     # --- Scenario 5: Sine wave ---
+    print("\n--- Scenario 5: 1 MHz Sine ---")
     adc5, _ = scenario_sine_wave(n_adc_samples=n_adc, freq_hz=1e6, amplitude=50)
     write_hex_file(os.path.join(output_dir, "adc_sine_1mhz.hex"), adc5, bits=8)
 
     # --- Reference chirp for matched filter ---
+    print("\n--- Reference Chirp ---")
     ref_re, ref_im = generate_reference_chirp_q15()
     write_hex_file(os.path.join(output_dir, "ref_chirp_i.hex"), ref_re, bits=16)
     write_hex_file(os.path.join(output_dir, "ref_chirp_q.hex"), ref_im, bits=16)
 
     # --- Baseband samples for matched filter test (bypass DDC) ---
+    print("\n--- Baseband Samples (bypass DDC) ---")
     bb_targets = [
         Target(range_m=500, velocity_mps=0, rcs_dbsm=10),
         Target(range_m=1500, velocity_mps=20, rcs_dbsm=5),
@@ -628,6 +652,7 @@ def generate_all_test_vectors(output_dir=None):
     write_hex_file(os.path.join(output_dir, "bb_mf_test_q.hex"), bb_q, bits=16)
 
     # --- Scenario info CSV ---
+    print("\n--- Scenario Info ---")
     with open(os.path.join(output_dir, "scenario_info.txt"), 'w') as f:
         f.write("AERIS-10 Test Vector Scenarios\n")
         f.write("=" * 60 + "\n\n")
@@ -657,7 +682,11 @@ def generate_all_test_vectors(output_dir=None):
         for t in bb_targets:
             f.write(f"  {t}\n")
 
+    print(f"\n  Wrote scenario info to {os.path.join(output_dir, 'scenario_info.txt')}")
 
+    print("\n" + "=" * 60)
+    print("ALL TEST VECTORS GENERATED")
+    print("=" * 60)
 
     return {
         'adc_single': adc1,
diff --git a/9_Firmware/9_2_FPGA/tb/cosim/real_data/golden_reference.py b/9_Firmware/9_2_FPGA/tb/cosim/real_data/golden_reference.py
index 9b0ca86..227006f 100644
--- a/9_Firmware/9_2_FPGA/tb/cosim/real_data/golden_reference.py
+++ b/9_Firmware/9_2_FPGA/tb/cosim/real_data/golden_reference.py
@@ -69,6 +69,7 @@ FIR_COEFFS_HEX = [
 # DDC output interface
 DDC_OUT_BITS = 16                # 18 → 16 bit with rounding + saturation
 
+# FFT (Range)
 FFT_SIZE = 1024
 FFT_DATA_W = 16
 FFT_INTERNAL_W = 32
@@ -147,15 +148,21 @@ def load_and_quantize_adi_data(data_path, config_path, frame_idx=0):
     4. Upconvert to 120 MHz IF (add I*cos - Q*sin) to create real signal
     5. Quantize to 8-bit unsigned (matching AD9484)
     """
+    print(f"[LOAD] Loading ADI dataset from {data_path}")
     data = np.load(data_path, allow_pickle=True)
     config = np.load(config_path, allow_pickle=True)
     
+    print(f"  Shape: {data.shape}, dtype: {data.dtype}")
+    print(f"  Config: sample_rate={config[0]:.0f}, IF={config[1]:.0f}, "
+          f"RF={config[2]:.0f}, chirps={config[3]:.0f}, BW={config[4]:.0f}, "
+          f"ramp={config[5]:.6f}s")
     
     # Extract one frame
     frame = data[frame_idx]  # (256, 1079) complex
     
     # Use first 32 chirps, first 1024 samples
     iq_block = frame[:DOPPLER_CHIRPS, :FFT_SIZE]  # (32, 1024) complex
+    print(f"  Using frame {frame_idx}: {DOPPLER_CHIRPS} chirps x {FFT_SIZE} samples")
     
     # The ADI data is baseband complex IQ at 4 MSPS.
     # AERIS-10 sees a real signal at 400 MSPS with 120 MHz IF.
@@ -190,6 +197,9 @@ def load_and_quantize_adi_data(data_path, config_path, frame_idx=0):
     iq_i = np.clip(iq_i, -32768, 32767)
     iq_q = np.clip(iq_q, -32768, 32767)
     
+    print(f"  Scaled to 16-bit (peak target {INPUT_PEAK_TARGET}): "
+          f"I range [{iq_i.min()}, {iq_i.max()}], "
+          f"Q range [{iq_q.min()}, {iq_q.max()}]")
     
     # Also create 8-bit ADC stimulus for DDC validation
     # Use just one chirp of real-valued data (I channel only, shifted to unsigned)
@@ -281,6 +291,7 @@ def run_ddc(adc_samples):
     # Build FIR coefficients as signed integers
     fir_coeffs = np.array([hex_to_signed(c, 18) for c in FIR_COEFFS_HEX], dtype=np.int64)
     
+    print(f"[DDC] Processing {n_samples} ADC samples at 400 MHz")
     
     # --- NCO + Mixer ---
     phase_accum = np.int64(0)
@@ -313,6 +324,7 @@ def run_ddc(adc_samples):
         # Phase accumulator update (ignore dithering for bit-accuracy)
         phase_accum = (phase_accum + NCO_PHASE_INC) & 0xFFFFFFFF
     
+    print(f"  Mixer output: I range [{mixed_i.min()}, {mixed_i.max()}]")
     
     # --- CIC Decimator (5-stage, decimate-by-4) ---
     # Integrator section (at 400 MHz rate)
@@ -320,9 +332,7 @@ def run_ddc(adc_samples):
     for n in range(n_samples):
         integrators[0][n + 1] = (integrators[0][n] + mixed_i[n]) & ((1 << CIC_ACC_WIDTH) - 1)
         for s in range(1, CIC_STAGES):
-            integrators[s][n + 1] = (
-                integrators[s][n] + integrators[s - 1][n + 1]
-            ) & ((1 << CIC_ACC_WIDTH) - 1)
+            integrators[s][n + 1] = (integrators[s][n] + integrators[s - 1][n + 1]) & ((1 << CIC_ACC_WIDTH) - 1)
     
     # Downsample by 4
     n_decimated = n_samples // CIC_DECIMATION
@@ -356,6 +366,7 @@ def run_ddc(adc_samples):
         scaled = comb[CIC_STAGES - 1][k] >> CIC_GAIN_SHIFT
         cic_output[k] = saturate(scaled, CIC_OUT_BITS)
     
+    print(f"  CIC output: {n_decimated} samples, range [{cic_output.min()}, {cic_output.max()}]")
     
     # --- FIR Filter (32-tap) ---
     delay_line = np.zeros(FIR_TAPS, dtype=np.int64)
@@ -377,6 +388,7 @@ def run_ddc(adc_samples):
         if fir_output[k] >= (1 << 17):
             fir_output[k] -= (1 << 18)
     
+    print(f"  FIR output: range [{fir_output.min()}, {fir_output.max()}]")
     
     # --- DDC Interface (18 → 16 bit) ---
     ddc_output = np.zeros(n_decimated, dtype=np.int64)
@@ -393,6 +405,7 @@ def run_ddc(adc_samples):
         else:
             ddc_output[k] = saturate(trunc + round_bit, 16)
     
+    print(f"  DDC output (16-bit): range [{ddc_output.min()}, {ddc_output.max()}]")
     
     return ddc_output
 
@@ -465,6 +478,7 @@ def run_range_fft(iq_i, iq_q, twiddle_file=None):
         # Generate twiddle factors if file not available
         cos_rom = np.round(32767 * np.cos(2 * np.pi * np.arange(N // 4) / N)).astype(np.int64)
     
+    print(f"[FFT] Running {N}-point range FFT (bit-accurate)")
     
     # Bit-reverse and sign-extend to 32-bit internal width
     def bit_reverse(val, bits):
@@ -502,6 +516,9 @@ def run_range_fft(iq_i, iq_q, twiddle_file=None):
             b_re = mem_re[addr_odd]
             b_im = mem_im[addr_odd]
             
+            # Twiddle multiply: forward FFT
+            #   prod_re = b_re * tw_cos + b_im * tw_sin
+            #   prod_im = b_im * tw_cos - b_re * tw_sin
             prod_re = b_re * tw_cos + b_im * tw_sin
             prod_im = b_im * tw_cos - b_re * tw_sin
             
@@ -524,6 +541,8 @@ def run_range_fft(iq_i, iq_q, twiddle_file=None):
         out_re[n] = saturate(mem_re[n], FFT_DATA_W)
         out_im[n] = saturate(mem_im[n], FFT_DATA_W)
     
+    print(f"  FFT output: re range [{out_re.min()}, {out_re.max()}], "
+          f"im range [{out_im.min()}, {out_im.max()}]")
     
     return out_re, out_im
 
@@ -558,6 +577,8 @@ def run_range_bin_decimator(range_fft_i, range_fft_q,
     decimated_i = np.zeros((n_chirps, output_bins), dtype=np.int64)
     decimated_q = np.zeros((n_chirps, output_bins), dtype=np.int64)
 
+    print(f"[DECIM] Decimating {n_in}→{output_bins} bins, mode={'peak' if mode==1 else 'avg' if mode==2 else 'simple'}, "
+          f"start_bin={start_bin}, {n_chirps} chirps")
 
     for c in range(n_chirps):
         # Index into input, skip start_bin
@@ -606,7 +627,7 @@ def run_range_bin_decimator(range_fft_i, range_fft_q,
                 # Averaging: sum group, then >> 4 (divide by 16)
                 sum_i = np.int64(0)
                 sum_q = np.int64(0)
-                for _ in range(decimation_factor):
+                for _s in range(decimation_factor):
                     if in_idx >= input_bins:
                         break
                     sum_i += int(range_fft_i[c, in_idx])
@@ -616,6 +637,9 @@ def run_range_bin_decimator(range_fft_i, range_fft_q,
                 decimated_i[c, obin] = int(sum_i) >> 4
                 decimated_q[c, obin] = int(sum_q) >> 4
 
+    print(f"  Decimated output: shape ({n_chirps}, {output_bins}), "
+          f"I range [{decimated_i.min()}, {decimated_i.max()}], "
+          f"Q range [{decimated_q.min()}, {decimated_q.max()}]")
 
     return decimated_i, decimated_q
 
@@ -641,6 +665,7 @@ def run_doppler_fft(range_data_i, range_data_q, twiddle_file_16=None):
     n_total = DOPPLER_TOTAL_BINS
     n_sf = CHIRPS_PER_SUBFRAME
 
+    print(f"[DOPPLER] Processing {n_range} range bins x {n_chirps} chirps → dual {n_fft}-point FFT")
 
     # Build 16-point Hamming window as signed 16-bit
     hamming = np.array([int(v) for v in HAMMING_Q15], dtype=np.int64)
@@ -650,9 +675,7 @@ def run_doppler_fft(range_data_i, range_data_q, twiddle_file_16=None):
     if twiddle_file_16 and os.path.exists(twiddle_file_16):
         cos_rom_16 = load_twiddle_rom(twiddle_file_16)
     else:
-        cos_rom_16 = np.round(
-            32767 * np.cos(2 * np.pi * np.arange(n_fft // 4) / n_fft)
-        ).astype(np.int64)
+        cos_rom_16 = np.round(32767 * np.cos(2 * np.pi * np.arange(n_fft // 4) / n_fft)).astype(np.int64)
 
     LOG2N_16 = 4
     doppler_map_i = np.zeros((n_range, n_total), dtype=np.int64)
@@ -724,6 +747,8 @@ def run_doppler_fft(range_data_i, range_data_q, twiddle_file_16=None):
                 doppler_map_i[rbin, bin_offset + n] = saturate(mem_re[n], 16)
                 doppler_map_q[rbin, bin_offset + n] = saturate(mem_im[n], 16)
 
+    print(f"  Doppler map: shape ({n_range}, {n_total}), "
+          f"I range [{doppler_map_i.min()}, {doppler_map_i.max()}]")
 
     return doppler_map_i, doppler_map_q
 
@@ -753,10 +778,12 @@ def run_mti_canceller(decim_i, decim_q, enable=True):
     mti_i = np.zeros_like(decim_i)
     mti_q = np.zeros_like(decim_q)
 
+    print(f"[MTI] 2-pulse canceller, enable={enable}, {n_chirps} chirps x {n_bins} bins")
 
     if not enable:
         mti_i[:] = decim_i
         mti_q[:] = decim_q
+        print("  Pass-through mode (MTI disabled)")
         return mti_i, mti_q
 
     for c in range(n_chirps):
@@ -772,6 +799,9 @@ def run_mti_canceller(decim_i, decim_q, enable=True):
                 mti_i[c, r] = saturate(diff_i, 16)
                 mti_q[c, r] = saturate(diff_q, 16)
 
+    print("  Chirp 0: muted (zeros)")
+    print(f"  Chirps 1-{n_chirps-1}: I range [{mti_i[1:].min()}, {mti_i[1:].max()}], "
+          f"Q range [{mti_q[1:].min()}, {mti_q[1:].max()}]")
     return mti_i, mti_q
 
 
@@ -798,12 +828,14 @@ def run_dc_notch(doppler_i, doppler_q, width=2):
       dc_notch_active = (width != 0) &&
                         (bin_within_sf < width || bin_within_sf > (15 - width + 1))
     """
-    _n_range, n_doppler = doppler_i.shape
+    n_range, n_doppler = doppler_i.shape
     notched_i = doppler_i.copy()
     notched_q = doppler_q.copy()
 
+    print(f"[DC NOTCH] width={width}, {n_range} range bins x {n_doppler} Doppler bins (dual sub-frame)")
 
     if width == 0:
+        print("  Pass-through (width=0)")
         return notched_i, notched_q
 
     zeroed_count = 0
@@ -815,6 +847,7 @@ def run_dc_notch(doppler_i, doppler_q, width=2):
             notched_q[:, dbin] = 0
             zeroed_count += 1
 
+    print(f"  Zeroed {zeroed_count} Doppler bin columns")
     return notched_i, notched_q
 
 
@@ -822,7 +855,7 @@ def run_dc_notch(doppler_i, doppler_q, width=2):
 # Stage 3e: CA-CFAR Detector (bit-accurate)
 # ===========================================================================
 def run_cfar_ca(doppler_i, doppler_q, guard=2, train=8,
-                alpha_q44=0x30, mode='CA', _simple_threshold=500):
+                alpha_q44=0x30, mode='CA', simple_threshold=500):
     """
     Bit-accurate model of cfar_ca.v — Cell-Averaging CFAR detector.
 
@@ -860,6 +893,9 @@ def run_cfar_ca(doppler_i, doppler_q, guard=2, train=8,
     if train == 0:
         train = 1
 
+    print(f"[CFAR] mode={mode}, guard={guard}, train={train}, "
+          f"alpha=0x{alpha_q44:02X} (Q4.4={alpha_q44/16:.2f}), "
+          f"{n_range} range x {n_doppler} Doppler")
 
     # Compute magnitudes: |I| + |Q| (17-bit unsigned, matching RTL L1 norm)
     # RTL: abs_i = I[15] ? (~I + 1) : I; abs_q = Q[15] ? (~Q + 1) : Q
@@ -927,6 +963,10 @@ def run_cfar_ca(doppler_i, doppler_q, guard=2, train=8,
             else:
                 noise_sum = leading_sum + lagging_sum  # Default to CA
 
+            # Threshold = (alpha * noise_sum) >> ALPHA_FRAC_BITS
+            # RTL: noise_product = r_alpha * noise_sum_reg (31-bit)
+            #      threshold = noise_product[ALPHA_FRAC_BITS +: MAG_WIDTH]
+            #      saturate if overflow
             noise_product = alpha_q44 * noise_sum
             threshold_raw = noise_product >> ALPHA_FRAC_BITS
 
@@ -934,12 +974,15 @@ def run_cfar_ca(doppler_i, doppler_q, guard=2, train=8,
             MAX_MAG = (1 << 17) - 1  # 131071
             threshold_val = MAX_MAG if threshold_raw > MAX_MAG else int(threshold_raw)
 
+            # Detection: magnitude > threshold
             if int(col[cut_idx]) > threshold_val:
                 detect_flags[cut_idx, dbin] = True
                 total_detections += 1
 
             thresholds[cut_idx, dbin] = threshold_val
 
+    print(f"  Total detections: {total_detections}")
+    print(f"  Magnitude range: [{magnitudes.min()}, {magnitudes.max()}]")
 
     return detect_flags, magnitudes, thresholds
 
@@ -953,16 +996,19 @@ def run_detection(doppler_i, doppler_q, threshold=10000):
     cfar_mag = |I| + |Q| (17-bit)
     detection if cfar_mag > threshold
     """
+    print(f"[DETECT] Running magnitude threshold detection (threshold={threshold})")
     
     mag = np.abs(doppler_i) + np.abs(doppler_q)  # L1 norm (|I| + |Q|)
     detections = np.argwhere(mag > threshold)
     
+    print(f"  {len(detections)} detections found")
     for d in detections[:20]:  # Print first 20
         rbin, dbin = d
-        mag[rbin, dbin]
+        m = mag[rbin, dbin]
+        print(f"    Range bin {rbin}, Doppler bin {dbin}: magnitude {m}")
     
     if len(detections) > 20:
-        pass
+        print(f"    ... and {len(detections) - 20} more")
     
     return mag, detections
 
@@ -976,6 +1022,7 @@ def run_float_reference(iq_i, iq_q):
     Uses the exact same RTL Hamming window coefficients (Q15) to isolate
     only the FFT fixed-point quantization error.
     """
+    print("\n[FLOAT REF] Running floating-point reference pipeline")
     
     n_chirps, n_samples = iq_i.shape[0], iq_i.shape[1] if iq_i.ndim == 2 else len(iq_i)
     
@@ -1023,6 +1070,8 @@ def write_hex_files(output_dir, iq_i, iq_q, prefix="stim"):
                 fi.write(signed_to_hex(int(iq_i[n]), 16) + '\n')
                 fq.write(signed_to_hex(int(iq_q[n]), 16) + '\n')
         
+        print(f"  Wrote {fn_i} ({n_samples} samples)")
+        print(f"  Wrote {fn_q} ({n_samples} samples)")
     
     elif iq_i.ndim == 2:
         n_rows, n_cols = iq_i.shape
@@ -1036,6 +1085,8 @@ def write_hex_files(output_dir, iq_i, iq_q, prefix="stim"):
                     fi.write(signed_to_hex(int(iq_i[r, c]), 16) + '\n')
                     fq.write(signed_to_hex(int(iq_q[r, c]), 16) + '\n')
         
+        print(f"  Wrote {fn_i} ({n_rows}x{n_cols} = {n_rows * n_cols} samples)")
+        print(f"  Wrote {fn_q} ({n_rows}x{n_cols} = {n_rows * n_cols} samples)")
 
 
 def write_adc_hex(output_dir, adc_data, prefix="adc_stim"):
@@ -1047,12 +1098,13 @@ def write_adc_hex(output_dir, adc_data, prefix="adc_stim"):
         for n in range(len(adc_data)):
             f.write(format(int(adc_data[n]) & 0xFF, '02X') + '\n')
     
+    print(f"  Wrote {fn} ({len(adc_data)} samples)")
 
 
 # ===========================================================================
 # Comparison metrics
 # ===========================================================================
-def compare_outputs(_name, fixed_i, fixed_q, float_i, float_q):
+def compare_outputs(name, fixed_i, fixed_q, float_i, float_q):
     """Compare fixed-point outputs against floating-point reference.
     
     Reports two metrics:
@@ -1068,7 +1120,7 @@ def compare_outputs(_name, fixed_i, fixed_q, float_i, float_q):
     
     # Count saturated bins
     sat_mask = (np.abs(fi) >= 32767) | (np.abs(fq) >= 32767)
-    np.sum(sat_mask)
+    n_saturated = np.sum(sat_mask)
     
     # Complex error — overall
     fixed_complex = fi + 1j * fq
@@ -1077,8 +1129,8 @@ def compare_outputs(_name, fixed_i, fixed_q, float_i, float_q):
     
     signal_power = np.mean(np.abs(ref_complex) ** 2) + 1e-30
     noise_power = np.mean(np.abs(error) ** 2) + 1e-30
-    10 * np.log10(signal_power / noise_power)
-    np.max(np.abs(error))
+    snr_db = 10 * np.log10(signal_power / noise_power)
+    max_error = np.max(np.abs(error))
     
     # Non-saturated comparison
     non_sat = ~sat_mask
@@ -1087,10 +1139,17 @@ def compare_outputs(_name, fixed_i, fixed_q, float_i, float_q):
         sig_ns = np.mean(np.abs(ref_complex[non_sat]) ** 2) + 1e-30
         noise_ns = np.mean(np.abs(error_ns) ** 2) + 1e-30
         snr_ns = 10 * np.log10(sig_ns / noise_ns)
-        np.max(np.abs(error_ns))
+        max_err_ns = np.max(np.abs(error_ns))
     else:
         snr_ns = 0.0
+        max_err_ns = 0.0
     
+    print(f"\n  [{name}] Comparison ({n} points):")
+    print(f"    Saturated:           {n_saturated}/{n} ({100.0*n_saturated/n:.2f}%)")
+    print(f"    Overall SNR:         {snr_db:.1f} dB")
+    print(f"    Overall max error:   {max_error:.1f}")
+    print(f"    Non-sat SNR:         {snr_ns:.1f} dB")
+    print(f"    Non-sat max error:   {max_err_ns:.1f}")
     
     return snr_ns  # Return the meaningful metric
 
@@ -1102,12 +1161,7 @@ def main():
     parser = argparse.ArgumentParser(description="AERIS-10 FPGA golden reference model")
     parser.add_argument('--frame', type=int, default=0, help='Frame index to process')
     parser.add_argument('--plot', action='store_true', help='Show plots')
-    parser.add_argument(
-        '--threshold',
-        type=int,
-        default=10000,
-        help='Detection threshold (L1 magnitude)'
-    )
+    parser.add_argument('--threshold', type=int, default=10000, help='Detection threshold (L1 magnitude)')
     args = parser.parse_args()
     
     # Paths
@@ -1115,14 +1169,14 @@ def main():
     fpga_dir = os.path.abspath(os.path.join(script_dir, '..', '..', '..'))
     data_base = os.path.expanduser("~/Downloads/adi_radar_data")
     amp_data = os.path.join(data_base, "amp_radar", "phaser_amp_4MSPS_500M_300u_256_m3dB.npy")
-    amp_config = os.path.join(
-        data_base,
-        "amp_radar",
-        "phaser_amp_4MSPS_500M_300u_256_m3dB_config.npy"
-    )
+    amp_config = os.path.join(data_base, "amp_radar", "phaser_amp_4MSPS_500M_300u_256_m3dB_config.npy")
     twiddle_1024 = os.path.join(fpga_dir, "fft_twiddle_1024.mem")
     output_dir = os.path.join(script_dir, "hex")
     
+    print("=" * 72)
+    print("AERIS-10 FPGA Golden Reference Model")
+    print("Using ADI CN0566 Phaser Radar Data (10.525 GHz X-band FMCW)")
+    print("=" * 72)
     
     # -----------------------------------------------------------------------
     # Load and quantize ADI data
@@ -1132,10 +1186,16 @@ def main():
     )
     
     # iq_i, iq_q: (32, 1024) int64, 16-bit range — post-DDC equivalent
+    print(f"\n{'=' * 72}")
+    print("Stage 0: Data loaded and quantized to 16-bit signed")
+    print(f"  IQ block shape: ({iq_i.shape[0]}, {iq_i.shape[1]})")
+    print(f"  ADC stimulus: {len(adc_8bit)} samples (8-bit unsigned)")
     
     # -----------------------------------------------------------------------
     # Write stimulus files
     # -----------------------------------------------------------------------
+    print(f"\n{'=' * 72}")
+    print("Writing hex stimulus files for RTL testbenches")
     
     # Post-DDC IQ for each chirp (for FFT + Doppler validation)
     write_hex_files(output_dir, iq_i, iq_q, "post_ddc")
@@ -1149,6 +1209,8 @@ def main():
     # -----------------------------------------------------------------------
     # Run range FFT on first chirp (bit-accurate)
     # -----------------------------------------------------------------------
+    print(f"\n{'=' * 72}")
+    print("Stage 2: Range FFT (1024-point, bit-accurate)")
     range_fft_i, range_fft_q = run_range_fft(iq_i[0], iq_q[0], twiddle_1024)
     write_hex_files(output_dir, range_fft_i, range_fft_q, "range_fft_chirp0")
     
@@ -1156,16 +1218,20 @@ def main():
     all_range_i = np.zeros((DOPPLER_CHIRPS, FFT_SIZE), dtype=np.int64)
     all_range_q = np.zeros((DOPPLER_CHIRPS, FFT_SIZE), dtype=np.int64)
     
+    print(f"\n  Running range FFT for all {DOPPLER_CHIRPS} chirps...")
     for c in range(DOPPLER_CHIRPS):
         ri, rq = run_range_fft(iq_i[c], iq_q[c], twiddle_1024)
         all_range_i[c] = ri
         all_range_q[c] = rq
         if (c + 1) % 8 == 0:
-            pass
+            print(f"    Chirp {c + 1}/{DOPPLER_CHIRPS} done")
     
     # -----------------------------------------------------------------------
     # Run Doppler FFT (bit-accurate) — "direct" path (first 64 bins)
     # -----------------------------------------------------------------------
+    print(f"\n{'=' * 72}")
+    print("Stage 3: Doppler FFT (dual 16-point with Hamming window)")
+    print("  [direct path: first 64 range bins, no decimation]")
     twiddle_16 = os.path.join(fpga_dir, "fft_twiddle_16.mem")
     doppler_i, doppler_q = run_doppler_fft(all_range_i, all_range_q, twiddle_file_16=twiddle_16)
     write_hex_files(output_dir, doppler_i, doppler_q, "doppler_map")
@@ -1175,6 +1241,8 @@ def main():
     # This models the actual RTL data flow:
     #   range FFT → range_bin_decimator (peak detection) → Doppler
     # -----------------------------------------------------------------------
+    print(f"\n{'=' * 72}")
+    print("Stage 2b: Range Bin Decimator (1024 → 64, peak detection)")
     
     decim_i, decim_q = run_range_bin_decimator(
         all_range_i, all_range_q,
@@ -1194,11 +1262,14 @@ def main():
                 q_val = int(all_range_q[c, b]) & 0xFFFF
                 packed = (q_val << 16) | i_val
                 f.write(f"{packed:08X}\n")
+    print(f"  Wrote {fc_input_file} ({DOPPLER_CHIRPS * FFT_SIZE} packed IQ words)")
     
     # Write decimated output reference for standalone decimator test
     write_hex_files(output_dir, decim_i, decim_q, "decimated_range")
     
     # Now run Doppler on the decimated data — this is the full-chain reference
+    print(f"\n{'=' * 72}")
+    print("Stage 3b: Doppler FFT on decimated data (full-chain path)")
     fc_doppler_i, fc_doppler_q = run_doppler_fft(
         decim_i, decim_q, twiddle_file_16=twiddle_16
     )
@@ -1213,6 +1284,7 @@ def main():
                 q_val = int(fc_doppler_q[rbin, dbin]) & 0xFFFF
                 packed = (q_val << 16) | i_val
                 f.write(f"{packed:08X}\n")
+    print(f"  Wrote {fc_doppler_packed_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} packed IQ words)")
     
     # Save numpy arrays for the full-chain path
     np.save(os.path.join(output_dir, "decimated_range_i.npy"), decim_i)
@@ -1225,12 +1297,16 @@ def main():
     # This models the complete RTL data flow:
     #   range FFT → decimator → MTI canceller → Doppler → DC notch → CFAR
     # -----------------------------------------------------------------------
+    print(f"\n{'=' * 72}")
+    print("Stage 3c: MTI Canceller (2-pulse, on decimated data)")
     mti_i, mti_q = run_mti_canceller(decim_i, decim_q, enable=True)
     write_hex_files(output_dir, mti_i, mti_q, "fullchain_mti_ref")
     np.save(os.path.join(output_dir, "fullchain_mti_i.npy"), mti_i)
     np.save(os.path.join(output_dir, "fullchain_mti_q.npy"), mti_q)
     
     # Doppler on MTI-filtered data
+    print(f"\n{'=' * 72}")
+    print("Stage 3b+c: Doppler FFT on MTI-filtered decimated data")
     mti_doppler_i, mti_doppler_q = run_doppler_fft(
         mti_i, mti_q, twiddle_file_16=twiddle_16
     )
@@ -1240,6 +1316,8 @@ def main():
     
     # DC notch on MTI-Doppler data
     DC_NOTCH_WIDTH = 2  # Default test value: zero bins {0, 1, 31}
+    print(f"\n{'=' * 72}")
+    print(f"Stage 3d: DC Notch Filter (width={DC_NOTCH_WIDTH})")
     notched_i, notched_q = run_dc_notch(mti_doppler_i, mti_doppler_q, width=DC_NOTCH_WIDTH)
     write_hex_files(output_dir, notched_i, notched_q, "fullchain_notched_ref")
     
@@ -1252,12 +1330,15 @@ def main():
                 q_val = int(notched_q[rbin, dbin]) & 0xFFFF
                 packed = (q_val << 16) | i_val
                 f.write(f"{packed:08X}\n")
+    print(f"  Wrote {fc_notched_packed_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} packed IQ words)")
     
     # CFAR on DC-notched data
     CFAR_GUARD = 2
     CFAR_TRAIN = 8
     CFAR_ALPHA = 0x30  # Q4.4 = 3.0
     CFAR_MODE = 'CA'
+    print(f"\n{'=' * 72}")
+    print(f"Stage 3e: CA-CFAR (guard={CFAR_GUARD}, train={CFAR_TRAIN}, alpha=0x{CFAR_ALPHA:02X})")
     cfar_flags, cfar_mag, cfar_thr = run_cfar_ca(
         notched_i, notched_q,
         guard=CFAR_GUARD, train=CFAR_TRAIN,
@@ -1272,6 +1353,7 @@ def main():
             for dbin in range(DOPPLER_TOTAL_BINS):
                 m = int(cfar_mag[rbin, dbin]) & 0x1FFFF
                 f.write(f"{m:05X}\n")
+    print(f"  Wrote {cfar_mag_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} mag values)")
     
     # 2. Threshold map (17-bit unsigned)
     cfar_thr_file = os.path.join(output_dir, "fullchain_cfar_thr.hex")
@@ -1280,6 +1362,7 @@ def main():
             for dbin in range(DOPPLER_TOTAL_BINS):
                 t = int(cfar_thr[rbin, dbin]) & 0x1FFFF
                 f.write(f"{t:05X}\n")
+    print(f"  Wrote {cfar_thr_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} threshold values)")
     
     # 3. Detection flags (1-bit per cell)
     cfar_det_file = os.path.join(output_dir, "fullchain_cfar_det.hex")
@@ -1288,6 +1371,7 @@ def main():
             for dbin in range(DOPPLER_TOTAL_BINS):
                 d = 1 if cfar_flags[rbin, dbin] else 0
                 f.write(f"{d:01X}\n")
+    print(f"  Wrote {cfar_det_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} detection flags)")
     
     # 4. Detection list (text)
     cfar_detections = np.argwhere(cfar_flags)
@@ -1295,14 +1379,12 @@ def main():
     with open(cfar_det_list_file, 'w') as f:
         f.write("# AERIS-10 Full-Chain CFAR Detection List\n")
         f.write(f"# Chain: decim -> MTI -> Doppler -> DC notch(w={DC_NOTCH_WIDTH}) -> CA-CFAR\n")
-        f.write(
-            f"# CFAR: guard={CFAR_GUARD}, train={CFAR_TRAIN}, "
-            f"alpha=0x{CFAR_ALPHA:02X}, mode={CFAR_MODE}\n"
-        )
+        f.write(f"# CFAR: guard={CFAR_GUARD}, train={CFAR_TRAIN}, alpha=0x{CFAR_ALPHA:02X}, mode={CFAR_MODE}\n")
         f.write("# Format: range_bin doppler_bin magnitude threshold\n")
         for det in cfar_detections:
             r, d = det
             f.write(f"{r} {d} {cfar_mag[r, d]} {cfar_thr[r, d]}\n")
+    print(f"  Wrote {cfar_det_list_file} ({len(cfar_detections)} detections)")
     
     # Save numpy arrays
     np.save(os.path.join(output_dir, "fullchain_cfar_mag.npy"), cfar_mag)
@@ -1310,6 +1392,8 @@ def main():
     np.save(os.path.join(output_dir, "fullchain_cfar_flags.npy"), cfar_flags)
     
     # Run detection on full-chain Doppler map
+    print(f"\n{'=' * 72}")
+    print("Stage 4: Detection on full-chain Doppler map")
     fc_mag, fc_detections = run_detection(fc_doppler_i, fc_doppler_q, threshold=args.threshold)
     
     # Save full-chain detection reference
@@ -1321,6 +1405,7 @@ def main():
         for d in fc_detections:
             rbin, dbin = d
             f.write(f"{rbin} {dbin} {fc_mag[rbin, dbin]}\n")
+    print(f"  Wrote {fc_det_file} ({len(fc_detections)} detections)")
     
     # Also write detection reference as hex for RTL comparison
     fc_det_mag_file = os.path.join(output_dir, "fullchain_detection_mag.hex")
@@ -1329,10 +1414,13 @@ def main():
             for dbin in range(DOPPLER_TOTAL_BINS):
                 m = int(fc_mag[rbin, dbin]) & 0x1FFFF  # 17-bit unsigned
                 f.write(f"{m:05X}\n")
+    print(f"  Wrote {fc_det_mag_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} magnitude values)")
     
     # -----------------------------------------------------------------------
     # Run detection on direct-path Doppler map (for backward compatibility)
     # -----------------------------------------------------------------------
+    print(f"\n{'=' * 72}")
+    print("Stage 4b: Detection on direct-path Doppler map")
     mag, detections = run_detection(doppler_i, doppler_q, threshold=args.threshold)
     
     # Save detection list
@@ -1344,23 +1432,26 @@ def main():
         for d in detections:
             rbin, dbin = d
             f.write(f"{rbin} {dbin} {mag[rbin, dbin]}\n")
+    print(f"  Wrote {det_file} ({len(detections)} detections)")
     
     # -----------------------------------------------------------------------
     # Float reference and comparison
     # -----------------------------------------------------------------------
+    print(f"\n{'=' * 72}")
+    print("Comparison: Fixed-point vs Float reference")
     
     range_fft_float, doppler_float = run_float_reference(iq_i, iq_q)
     
     # Compare range FFT (chirp 0)
     float_range_i = np.real(range_fft_float[0, :]).astype(np.float64)
     float_range_q = np.imag(range_fft_float[0, :]).astype(np.float64)
-    compare_outputs("Range FFT", range_fft_i, range_fft_q,
+    snr_range = compare_outputs("Range FFT", range_fft_i, range_fft_q,
                                 float_range_i, float_range_q)
     
     # Compare Doppler map
     float_doppler_i = np.real(doppler_float).flatten().astype(np.float64)
     float_doppler_q = np.imag(doppler_float).flatten().astype(np.float64)
-    compare_outputs("Doppler FFT", 
+    snr_doppler = compare_outputs("Doppler FFT", 
                                    doppler_i.flatten(), doppler_q.flatten(),
                                    float_doppler_i, float_doppler_q)
     
@@ -1372,10 +1463,26 @@ def main():
     np.save(os.path.join(output_dir, "doppler_map_i.npy"), doppler_i)
     np.save(os.path.join(output_dir, "doppler_map_q.npy"), doppler_q)
     np.save(os.path.join(output_dir, "detection_mag.npy"), mag)
+    print(f"\n  Saved numpy reference files to {output_dir}/")
     
     # -----------------------------------------------------------------------
     # Summary
     # -----------------------------------------------------------------------
+    print(f"\n{'=' * 72}")
+    print("SUMMARY")
+    print(f"{'=' * 72}")
+    print(f"  ADI dataset: frame {args.frame} of amp_radar (CN0566, 10.525 GHz)")
+    print(f"  Chirps processed: {DOPPLER_CHIRPS}")
+    print(f"  Samples/chirp: {FFT_SIZE}")
+    print(f"  Range FFT: {FFT_SIZE}-point → {snr_range:.1f} dB vs float")
+    print(f"  Doppler FFT (direct): {DOPPLER_FFT_SIZE}-point Hamming → {snr_doppler:.1f} dB vs float")
+    print(f"  Detections (direct): {len(detections)} (threshold={args.threshold})")
+    print("  Full-chain decimator: 1024→64 peak detection")
+    print(f"  Full-chain detections: {len(fc_detections)} (threshold={args.threshold})")
+    print(f"  MTI+CFAR chain: decim → MTI → Doppler → DC notch(w={DC_NOTCH_WIDTH}) → CA-CFAR")
+    print(f"  CFAR detections: {len(cfar_detections)} (guard={CFAR_GUARD}, train={CFAR_TRAIN}, alpha=0x{CFAR_ALPHA:02X})")
+    print(f"  Hex stimulus files: {output_dir}/")
+    print("  Ready for RTL co-simulation with Icarus Verilog")
     
     # -----------------------------------------------------------------------
     # Optional plots
@@ -1426,10 +1533,11 @@ def main():
             plt.tight_layout()
             plot_file = os.path.join(output_dir, "golden_reference_plots.png")
             plt.savefig(plot_file, dpi=150)
+            print(f"\n  Saved plots to {plot_file}")
             plt.show()
             
         except ImportError:
-            pass
+            print("\n  [WARN] matplotlib not available, skipping plots")
 
 
 if __name__ == "__main__":
diff --git a/9_Firmware/9_2_FPGA/tb/cosim/validate_mem_files.py b/9_Firmware/9_2_FPGA/tb/cosim/validate_mem_files.py
deleted file mode 100644
index 8b9d79e..0000000
--- a/9_Firmware/9_2_FPGA/tb/cosim/validate_mem_files.py
+++ /dev/null
@@ -1,569 +0,0 @@
-#!/usr/bin/env python3
-"""
-validate_mem_files.py — Validate all .mem files against AERIS-10 radar parameters.
-
-Checks:
-  1. Structural: line counts, hex format, value ranges for all 12 .mem files
-  2. FFT twiddle files: bit-exact match against cos(2*pi*k/N) in Q15
-  3. Long chirp .mem files: reverse-engineer parameters, check for chirp structure
-  4. Short chirp .mem files: check length, value range, spectral content
-   5. latency_buffer LATENCY=3187 parameter validation
-
-Usage:
-    python3 validate_mem_files.py
-"""
-
-import math
-import os
-import sys
-
-# ============================================================================
-# AERIS-10 System Parameters (from radar_scene.py)
-# ============================================================================
-F_CARRIER = 10.5e9        # 10.5 GHz carrier
-C_LIGHT = 3.0e8
-F_IF = 120e6              # IF frequency
-CHIRP_BW = 20e6           # 20 MHz sweep
-FS_ADC = 400e6            # ADC sample rate
-FS_SYS = 100e6            # System clock (100 MHz, after CIC 4x)
-T_LONG_CHIRP = 30e-6      # 30 us long chirp
-T_SHORT_CHIRP = 0.5e-6    # 0.5 us short chirp
-CIC_DECIMATION = 4
-FFT_SIZE = 1024
-DOPPLER_FFT_SIZE = 16
-LONG_CHIRP_SAMPLES = int(T_LONG_CHIRP * FS_SYS)  # 3000 at 100 MHz
-
-# Overlap-save parameters
-OVERLAP_SAMPLES = 128
-SEGMENT_ADVANCE = FFT_SIZE - OVERLAP_SAMPLES  # 896
-LONG_SEGMENTS = 4
-
-MEM_DIR = os.path.join(os.path.dirname(__file__), '..', '..')
-
-pass_count = 0
-fail_count = 0
-warn_count = 0
-
-def check(condition, _label):
-    global pass_count, fail_count
-    if condition:
-        pass_count += 1
-    else:
-        fail_count += 1
-
-def warn(_label):
-    global warn_count
-    warn_count += 1
-
-def read_mem_hex(filename):
-    """Read a .mem file, return list of integer values (16-bit signed)."""
-    path = os.path.join(MEM_DIR, filename)
-    values = []
-    with open(path) as f:
-        for line in f:
-            line = line.strip()
-            if not line or line.startswith('//'):
-                continue
-            val = int(line, 16)
-            # Interpret as 16-bit signed
-            if val >= 0x8000:
-                val -= 0x10000
-            values.append(val)
-    return values
-
-
-# ============================================================================
-# TEST 1: Structural validation of all .mem files
-# ============================================================================
-def test_structural():
-
-    expected = {
-        # FFT twiddle files (quarter-wave cosine ROMs)
-        'fft_twiddle_1024.mem': {'lines': 256, 'desc': '1024-pt FFT quarter-wave cos ROM'},
-        'fft_twiddle_16.mem':   {'lines': 4,   'desc': '16-pt FFT quarter-wave cos ROM'},
-        # Long chirp segments (4 segments x 1024 samples each)
-        'long_chirp_seg0_i.mem': {'lines': 1024, 'desc': 'Long chirp seg 0 I'},
-        'long_chirp_seg0_q.mem': {'lines': 1024, 'desc': 'Long chirp seg 0 Q'},
-        'long_chirp_seg1_i.mem': {'lines': 1024, 'desc': 'Long chirp seg 1 I'},
-        'long_chirp_seg1_q.mem': {'lines': 1024, 'desc': 'Long chirp seg 1 Q'},
-        'long_chirp_seg2_i.mem': {'lines': 1024, 'desc': 'Long chirp seg 2 I'},
-        'long_chirp_seg2_q.mem': {'lines': 1024, 'desc': 'Long chirp seg 2 Q'},
-        'long_chirp_seg3_i.mem': {'lines': 1024, 'desc': 'Long chirp seg 3 I'},
-        'long_chirp_seg3_q.mem': {'lines': 1024, 'desc': 'Long chirp seg 3 Q'},
-        # Short chirp (50 samples)
-        'short_chirp_i.mem': {'lines': 50, 'desc': 'Short chirp I'},
-        'short_chirp_q.mem': {'lines': 50, 'desc': 'Short chirp Q'},
-    }
-
-    for fname, info in expected.items():
-        path = os.path.join(MEM_DIR, fname)
-        exists = os.path.isfile(path)
-        check(exists, f"{fname} exists")
-        if not exists:
-            continue
-
-        vals = read_mem_hex(fname)
-        check(len(vals) == info['lines'],
-              f"{fname}: {len(vals)} data lines (expected {info['lines']})")
-
-        # Check all values are in 16-bit signed range
-        in_range = all(-32768 <= v <= 32767 for v in vals)
-        check(in_range, f"{fname}: all values in [-32768, 32767]")
-
-
-# ============================================================================
-# TEST 2: FFT Twiddle Factor Validation
-# ============================================================================
-def test_twiddle_1024():
-    vals = read_mem_hex('fft_twiddle_1024.mem')
-
-    max_err = 0
-    err_details = []
-    for k in range(min(256, len(vals))):
-        angle = 2.0 * math.pi * k / 1024.0
-        expected = round(math.cos(angle) * 32767.0)
-        expected = max(-32768, min(32767, expected))
-        actual = vals[k]
-        err = abs(actual - expected)
-        if err > max_err:
-            max_err = err
-        if err > 1:
-            err_details.append((k, actual, expected, err))
-
-    check(max_err <= 1,
-          f"fft_twiddle_1024.mem: max twiddle error = {max_err} LSB (tolerance: 1)")
-    if err_details:
-        for _, _act, _exp, _e in err_details[:5]:
-            pass
-
-
-def test_twiddle_16():
-    vals = read_mem_hex('fft_twiddle_16.mem')
-
-    max_err = 0
-    for k in range(min(4, len(vals))):
-        angle = 2.0 * math.pi * k / 16.0
-        expected = round(math.cos(angle) * 32767.0)
-        expected = max(-32768, min(32767, expected))
-        actual = vals[k]
-        err = abs(actual - expected)
-        if err > max_err:
-            max_err = err
-
-    check(max_err <= 1,
-          f"fft_twiddle_16.mem: max twiddle error = {max_err} LSB (tolerance: 1)")
-
-    # Print all 4 entries for reference
-    for k in range(min(4, len(vals))):
-        angle = 2.0 * math.pi * k / 16.0
-        expected = round(math.cos(angle) * 32767.0)
-
-
-# ============================================================================
-# TEST 3: Long Chirp .mem File Analysis
-# ============================================================================
-def test_long_chirp():
-
-    # Load all 4 segments
-    all_i = []
-    all_q = []
-    for seg in range(4):
-        seg_i = read_mem_hex(f'long_chirp_seg{seg}_i.mem')
-        seg_q = read_mem_hex(f'long_chirp_seg{seg}_q.mem')
-        all_i.extend(seg_i)
-        all_q.extend(seg_q)
-
-    total_samples = len(all_i)
-    check(total_samples == 4096,
-          f"Total long chirp samples: {total_samples} (expected 4096 = 4 segs x 1024)")
-
-    # Compute magnitude envelope
-    magnitudes = [math.sqrt(i*i + q*q) for i, q in zip(all_i, all_q, strict=False)]
-    max_mag = max(magnitudes)
-    min(magnitudes)
-    sum(magnitudes) / len(magnitudes)
-
-
-    # Check if this looks like it came from generate_reference_chirp_q15
-    # That function uses 32767 * 0.9 scaling => max magnitude ~29490
-    expected_max_from_model = 32767 * 0.9
-    uses_model_scaling = max_mag > expected_max_from_model * 0.8
-    if uses_model_scaling:
-        pass
-    else:
-        warn(f"Magnitude ({max_mag:.0f}) is much lower than expected from Python model "
-             f"({expected_max_from_model:.0f}). .mem files may have unknown provenance.")
-
-    # Check non-zero content: how many samples are non-zero?
-    sum(1 for v in all_i if v != 0)
-    sum(1 for v in all_q if v != 0)
-
-    # Analyze instantaneous frequency via phase differences
-    phases = []
-    for i_val, q_val in zip(all_i, all_q, strict=False):
-        if abs(i_val) > 5 or abs(q_val) > 5:  # Skip near-zero samples
-            phases.append(math.atan2(q_val, i_val))
-        else:
-            phases.append(None)
-
-    # Compute phase differences (instantaneous frequency)
-    freq_estimates = []
-    for n in range(1, len(phases)):
-        if phases[n] is not None and phases[n-1] is not None:
-            dp = phases[n] - phases[n-1]
-            # Unwrap
-            while dp > math.pi:
-                dp -= 2 * math.pi
-            while dp < -math.pi:
-                dp += 2 * math.pi
-            # Frequency in Hz (at 100 MHz sample rate, since these are post-DDC)
-            f_inst = dp * FS_SYS / (2 * math.pi)
-            freq_estimates.append(f_inst)
-
-    if freq_estimates:
-        sum(freq_estimates[:50]) / 50 if len(freq_estimates) > 50 else freq_estimates[0]
-        sum(freq_estimates[-50:]) / 50 if len(freq_estimates) > 50 else freq_estimates[-1]
-        f_min = min(freq_estimates)
-        f_max = max(freq_estimates)
-        f_range = f_max - f_min
-
-
-        # A chirp should show frequency sweep
-        is_chirp = f_range > 0.5e6  # At least 0.5 MHz sweep
-        check(is_chirp,
-              f"Long chirp shows frequency sweep ({f_range/1e6:.2f} MHz > 0.5 MHz)")
-
-        # Check if bandwidth roughly matches expected
-        bw_match = abs(f_range - CHIRP_BW) / CHIRP_BW < 0.5  # within 50%
-        if bw_match:
-            pass
-        else:
-            warn(f"Bandwidth {f_range/1e6:.2f} MHz does NOT match expected {CHIRP_BW/1e6:.2f} MHz")
-
-    # Compare segment boundaries for overlap-save consistency
-    # In proper overlap-save, the chirp data should be segmented at 896-sample boundaries
-    # with segments being 1024-sample FFT blocks
-    for seg in range(4):
-        seg_i = read_mem_hex(f'long_chirp_seg{seg}_i.mem')
-        seg_q = read_mem_hex(f'long_chirp_seg{seg}_q.mem')
-        seg_mags = [math.sqrt(i*i + q*q) for i, q in zip(seg_i, seg_q, strict=False)]
-        sum(seg_mags) / len(seg_mags)
-        max(seg_mags)
-
-        # Check segment 3 zero-padding (chirp is 3000 samples, seg3 starts at 3072)
-        # Samples 3000-4095 should be zero (or near-zero) if chirp is exactly 3000 samples
-        if seg == 3:
-            # Seg3 covers chirp samples 3072..4095
-            # If chirp is only 3000 samples, then only samples 0..(3000-3072) = NONE are valid
-            # Actually chirp has 3000 samples total. Seg3 starts at index 3*1024=3072.
-            # So seg3 should only have 3000-3072 = -72 -> no valid chirp data!
-            # Wait, but the .mem files have 1024 lines with non-trivial data...
-            # Let's check if seg3 has significant data
-            zero_count = sum(1 for m in seg_mags if m < 2)
-            if zero_count > 500:
-                pass
-            else:
-                pass
-        else:
-            pass
-
-
-# ============================================================================
-# TEST 4: Short Chirp .mem File Analysis
-# ============================================================================
-def test_short_chirp():
-
-    short_i = read_mem_hex('short_chirp_i.mem')
-    short_q = read_mem_hex('short_chirp_q.mem')
-
-    check(len(short_i) == 50, f"Short chirp I: {len(short_i)} samples (expected 50)")
-    check(len(short_q) == 50, f"Short chirp Q: {len(short_q)} samples (expected 50)")
-
-    # Expected: 0.5 us chirp at 100 MHz = 50 samples
-    expected_samples = int(T_SHORT_CHIRP * FS_SYS)
-    check(len(short_i) == expected_samples,
-          f"Short chirp length matches T_SHORT_CHIRP * FS_SYS = {expected_samples}")
-
-    magnitudes = [math.sqrt(i*i + q*q) for i, q in zip(short_i, short_q, strict=False)]
-    max(magnitudes)
-    sum(magnitudes) / len(magnitudes)
-
-
-    # Check non-zero
-    nonzero = sum(1 for m in magnitudes if m > 1)
-    check(nonzero == len(short_i), f"All {nonzero}/{len(short_i)} samples non-zero")
-
-    # Check it looks like a chirp (phase should be quadratic)
-    phases = [math.atan2(q, i) for i, q in zip(short_i, short_q, strict=False)]
-    freq_est = []
-    for n in range(1, len(phases)):
-        dp = phases[n] - phases[n-1]
-        while dp > math.pi:
-            dp -= 2 * math.pi
-        while dp < -math.pi:
-            dp += 2 * math.pi
-        freq_est.append(dp * FS_SYS / (2 * math.pi))
-
-    if freq_est:
-        freq_est[0]
-        freq_est[-1]
-
-
-# ============================================================================
-# TEST 5: Generate Expected Chirp .mem and Compare
-# ============================================================================
-def test_chirp_vs_model():
-
-    # Generate reference using the same method as radar_scene.py
-    chirp_rate = CHIRP_BW / T_LONG_CHIRP  # Hz/s
-
-    model_i = []
-    model_q = []
-    n_chirp = min(FFT_SIZE, LONG_CHIRP_SAMPLES)  # 1024
-
-    for n in range(n_chirp):
-        t = n / FS_SYS
-        phase = math.pi * chirp_rate * t * t
-        re_val = round(32767 * 0.9 * math.cos(phase))
-        im_val = round(32767 * 0.9 * math.sin(phase))
-        model_i.append(max(-32768, min(32767, re_val)))
-        model_q.append(max(-32768, min(32767, im_val)))
-
-    # Read seg0 from .mem
-    mem_i = read_mem_hex('long_chirp_seg0_i.mem')
-    mem_q = read_mem_hex('long_chirp_seg0_q.mem')
-
-    # Compare magnitudes
-    model_mags = [math.sqrt(i*i + q*q) for i, q in zip(model_i, model_q, strict=False)]
-    mem_mags = [math.sqrt(i*i + q*q) for i, q in zip(mem_i, mem_q, strict=False)]
-
-    model_max = max(model_mags)
-    mem_max = max(mem_mags)
-
-
-    # Check if they match (they almost certainly won't based on magnitude analysis)
-    matches = sum(1 for a, b in zip(model_i, mem_i, strict=False) if a == b)
-
-    if matches > len(model_i) * 0.9:
-        pass
-    else:
-        warn(".mem files do NOT match Python model. They likely have different provenance.")
-        # Try to detect scaling
-        if mem_max > 0:
-            model_max / mem_max
-
-    # Check phase correlation (shape match regardless of scaling)
-    model_phases = [math.atan2(q, i) for i, q in zip(model_i, model_q, strict=False)]
-    mem_phases = [math.atan2(q, i) for i, q in zip(mem_i, mem_q, strict=False)]
-
-    # Compute phase differences
-    phase_diffs = []
-    for mp, fp in zip(model_phases, mem_phases, strict=False):
-        d = mp - fp
-        while d > math.pi:
-            d -= 2 * math.pi
-        while d < -math.pi:
-            d += 2 * math.pi
-        phase_diffs.append(d)
-
-    sum(phase_diffs) / len(phase_diffs)
-    max_phase_diff = max(abs(d) for d in phase_diffs)
-
-
-    phase_match = max_phase_diff < 0.5  # within 0.5 rad
-    check(
-        phase_match,
-        f"Phase shape match: max diff = {math.degrees(max_phase_diff):.1f} deg "
-        f"(tolerance: 28.6 deg)",
-    )
-
-
-# ============================================================================
-# TEST 6: Latency Buffer LATENCY=3187 Validation
-# ============================================================================
-def test_latency_buffer():
-
-    # The latency buffer delays the reference chirp data to align with
-    # the matched filter processing chain output.
-    #
-    # The total latency through the processing chain depends on the branch:
-    #
-    # SYNTHESIS branch (fft_engine.v):
-    #   - Load: 1024 cycles (input)
-    #   - Forward FFT: LOG2N=10 stages x N/2=512 butterflies x 5-cycle pipeline = variable
-    #   - Reference FFT: same
-    #   - Conjugate multiply: 1024 cycles (4-stage pipeline in frequency_matched_filter)
-    #   - Inverse FFT: same as forward
-    #   - Output: 1024 cycles
-    #   Total: roughly 3000-4000 cycles depending on pipeline fill
-    #
-    # The LATENCY=3187 value was likely determined empirically to align
-    # the reference chirp arriving at the processing chain with the
-    # correct time-domain position.
-    #
-    # Key constraint: LATENCY must be < 4096 (BRAM buffer size)
-    LATENCY = 3187
-    BRAM_SIZE = 4096
-
-    check(LATENCY < BRAM_SIZE,
-          f"LATENCY ({LATENCY}) < BRAM size ({BRAM_SIZE})")
-
-    # The fft_engine processes in stages:
-    # - LOAD: 1024 clocks (accepts input)
-    # - Per butterfly stage: 512 butterflies x 5 pipeline stages = ~2560 clocks + overhead
-    #   Actually: 512 butterflies, each takes 5 cycles = 2560 per stage, 10 stages
-    #   Total compute: 10 * 2560 = 25600 clocks
-    # But this is just for ONE FFT. The chain does 3 FFTs + multiply.
-    #
-    # For the SIMULATION branch, it's 1 clock per operation (behavioral).
-    # LATENCY=3187 doesn't apply to simulation branch behavior —
-    # it's the physical hardware pipeline latency.
-    #
-    # For synthesis: the latency_buffer feeds ref data to the chain via
-    # chirp_memory_loader_param → latency_buffer → chain.
-    # But wait — looking at radar_receiver_final.v:
-    #   - mem_request drives valid_in on the latency buffer
-    #   - The buffer delays {ref_i, ref_q} by LATENCY valid_in cycles
-    #   - The delayed output feeds long_chirp_real/imag → chain
-    #
-    # The purpose: the chain in the SYNTHESIS branch reads reference data
-    # via the long_chirp_real/imag ports DURING ST_FWD_FFT (while collecting
-    # input samples). The reference data needs to arrive LATENCY cycles
-    # after the first mem_request, where LATENCY accounts for:
-    #   - The fft_engine pipeline latency from input to output
-    #   - Specifically, the chain processes: load 1024 → FFT → FFT → multiply → IFFT → output
-    #     The reference is consumed during the second FFT (ST_REF_BITREV/BUTTERFLY)
-    #     which starts after the first FFT completes.
-
-    # For now, validate that LATENCY is reasonable (between 1000 and 4095)
-    check(1000 < LATENCY < 4095,
-          f"LATENCY={LATENCY} in reasonable range [1000, 4095]")
-
-    # Check that the module name vs parameter is consistent
-    # Module name was renamed from latency_buffer_2159 to latency_buffer
-    # to match the actual parameterized LATENCY value. No warning needed.
-
-    # Validate address arithmetic won't overflow
-    min_read_ptr = 4096 + 0 - LATENCY
-    check(min_read_ptr >= 0 and min_read_ptr < 4096,
-          f"Min read_ptr after wrap = {min_read_ptr} (valid: 0..4095)")
-
-    # The latency buffer uses valid_in gated reads, so it only counts
-    # valid samples. The number of valid_in pulses between first write
-    # and first read is LATENCY.
-
-
-# ============================================================================
-# TEST 7: Cross-check chirp memory loader addressing
-# ============================================================================
-def test_memory_addressing():
-
-    # chirp_memory_loader_param uses: long_addr = {segment_select[1:0], sample_addr[9:0]}
-    # This creates a 12-bit address: seg[1:0] ++ addr[9:0]
-    # Segment 0: addresses 0x000..0x3FF (0..1023)
-    # Segment 1: addresses 0x400..0x7FF (1024..2047)
-    # Segment 2: addresses 0x800..0xBFF (2048..3071)
-    # Segment 3: addresses 0xC00..0xFFF (3072..4095)
-
-    for seg in range(4):
-        base = seg * 1024
-        end = base + 1023
-        addr_from_concat = (seg << 10) | 0  # {seg[1:0], 10'b0}
-        addr_end = (seg << 10) | 1023
-
-        check(
-            addr_from_concat == base,
-            f"Seg {seg} base address: {{{seg}[1:0], 10'b0}} = {addr_from_concat} "
-            f"(expected {base})",
-        )
-        check(addr_end == end,
-              f"Seg {seg} end address: {{{seg}[1:0], 10'h3FF}} = {addr_end} (expected {end})")
-
-    # Memory is declared as: reg [15:0] long_chirp_i [0:4095]
-    # $readmemh loads seg0 to [0:1023], seg1 to [1024:2047], etc.
-    # Addressing via {segment_select, sample_addr} maps correctly.
-
-
-# ============================================================================
-# TEST 8: Seg3 zero-padding analysis
-# ============================================================================
-def test_seg3_padding():
-
-    # The long chirp has 3000 samples (30 us at 100 MHz).
-    # With 4 segments of 1024 samples = 4096 total memory slots.
-    # Segments are loaded contiguously into memory:
-    #   Seg0: chirp samples 0..1023
-    #   Seg1: chirp samples 1024..2047
-    #   Seg2: chirp samples 2048..3071
-    #   Seg3: chirp samples 3072..4095
-    #
-    # But the chirp only has 3000 samples! So seg3 should have:
-    #   Valid chirp data at indices 0..(3000-3072-1) = NEGATIVE
-    #   Wait — 3072 > 3000, so seg3 has NO valid chirp samples if chirp is exactly 3000.
-    #
-    # However, the overlap-save algorithm in matched_filter_multi_segment.v
-    # collects data differently:
-    #   Seg0: collect 896 DDC samples, buffer[0:895], zero-pad [896:1023]
-    #   Seg1: overlap from seg0[768:895] → buffer[0:127], collect 896 → buffer[128:1023]
-    #   ...
-    # The chirp reference is indexed by segment_select + sample_addr,
-    # so it reads ALL 1024 values for each segment regardless.
-    #
-    # If the chirp is 3000 samples but only 4*1024=4096 slots exist,
-    # the question is: do the .mem files contain 3000 samples of real chirp
-    # data spread across 4096 slots, or something else?
-
-    seg3_i = read_mem_hex('long_chirp_seg3_i.mem')
-    seg3_q = read_mem_hex('long_chirp_seg3_q.mem')
-
-    mags = [math.sqrt(i*i + q*q) for i, q in zip(seg3_i, seg3_q, strict=False)]
-
-    # Count trailing zeros (samples after chirp ends)
-    trailing_zeros = 0
-    for m in reversed(mags):
-        if m < 2:
-            trailing_zeros += 1
-        else:
-            break
-
-    nonzero = sum(1 for m in mags if m > 2)
-
-
-    if nonzero == 1024:
-        # This means the .mem files encode 4096 chirp samples, not 3000
-        # The chirp duration used for .mem generation was different from T_LONG_CHIRP
-        actual_chirp_samples = 4 * 1024  # = 4096
-        actual_duration = actual_chirp_samples / FS_SYS
-        warn(f"Chirp in .mem files appears to be {actual_chirp_samples} samples "
-             f"({actual_duration*1e6:.1f} us), not {LONG_CHIRP_SAMPLES} samples "
-             f"({T_LONG_CHIRP*1e6:.1f} us)")
-    elif trailing_zeros > 100:
-        # Some padding at end
-        3072 + (1024 - trailing_zeros)
-
-
-# ============================================================================
-# MAIN
-# ============================================================================
-def main():
-
-    test_structural()
-    test_twiddle_1024()
-    test_twiddle_16()
-    test_long_chirp()
-    test_short_chirp()
-    test_chirp_vs_model()
-    test_latency_buffer()
-    test_memory_addressing()
-    test_seg3_padding()
-
-    if fail_count == 0:
-        pass
-    else:
-        pass
-
-    return 0 if fail_count == 0 else 1
-
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/9_Firmware/9_2_FPGA/tb/gen_mf_golden_ref.py b/9_Firmware/9_2_FPGA/tb/gen_mf_golden_ref.py
index 161e9d9..7454f15 100644
--- a/9_Firmware/9_2_FPGA/tb/gen_mf_golden_ref.py
+++ b/9_Firmware/9_2_FPGA/tb/gen_mf_golden_ref.py
@@ -147,6 +147,7 @@ def main():
     # =========================================================================
     # Case 2: Tone autocorrelation at bin 5
     # Signal and reference: complex tone at bin 5, amplitude 8000 (Q15)
+    # sig[n] = 8000 * exp(j * 2*pi*5*n/N)
     # Autocorrelation of a tone => peak at bin 0 (lag 0)
     # =========================================================================
     amp = 8000.0
@@ -240,12 +241,28 @@ def main():
     # =========================================================================
     # Print summary to stdout
     # =========================================================================
+    print("=" * 72)
+    print("Matched Filter Golden Reference Generator")
+    print(f"Output directory: {outdir}")
+    print(f"FFT length: {N}")
+    print("=" * 72)
 
-    for _ in summaries:
-        pass
+    for s in summaries:
+        print()
+        print(f"Case {s['case']}: {s['description']}")
+        print(f"  Peak bin:              {s['peak_bin']}")
+        print(f"  Peak magnitude (float):{s['peak_mag_float']:.6f}")
+        print(f"  Peak I (float):        {s['peak_i_float']:.6f}")
+        print(f"  Peak Q (float):        {s['peak_q_float']:.6f}")
+        print(f"  Peak I (quantized):    {s['peak_i_quant']}")
+        print(f"  Peak Q (quantized):    {s['peak_q_quant']}")
 
-    for _ in all_files:
-        pass
+    print()
+    print(f"Generated {len(all_files)} files:")
+    for fname in all_files:
+        print(f"  {fname}")
+    print()
+    print("Done.")
 
 
 if __name__ == "__main__":
diff --git a/9_Firmware/tests/cross_layer/test_mem_validation.py b/9_Firmware/tests/cross_layer/test_mem_validation.py
new file mode 100644
index 0000000..fd55e1d
--- /dev/null
+++ b/9_Firmware/tests/cross_layer/test_mem_validation.py
@@ -0,0 +1,444 @@
+"""
+test_mem_validation.py — Validate FPGA .mem files against AERIS-10 radar parameters.
+
+Migrated from tb/cosim/validate_mem_files.py into CI-friendly pytest tests.
+
+Checks:
+  1. Structural: line counts, hex format, value ranges for all 12+ .mem files
+  2. FFT twiddle files: bit-exact match against cos(2*pi*k/N) in Q15
+  3. Long chirp .mem files: frequency sweep, magnitude envelope, segment count
+  4. Short chirp .mem files: length, value range, non-zero content
+  5. Chirp vs independent model: phase shape agreement
+  6. Latency buffer LATENCY=3187 parameter validation
+  7. Chirp memory loader addressing: {segment_select, sample_addr} arithmetic
+  8. Seg3 zero-padding analysis
+"""
+
+import math
+import os
+import warnings
+
+import pytest
+
+# ============================================================================
+# AERIS-10 System Parameters (independently derived from hardware specs)
+# ============================================================================
+F_CARRIER = 10.5e9        # 10.5 GHz carrier
+C_LIGHT = 3.0e8
+F_IF = 120e6              # IF frequency
+CHIRP_BW = 20e6           # 20 MHz sweep bandwidth
+FS_ADC = 400e6            # ADC sample rate
+FS_SYS = 100e6            # System clock (100 MHz, after CIC 4x decimation)
+T_LONG_CHIRP = 30e-6      # 30 us long chirp
+T_SHORT_CHIRP = 0.5e-6    # 0.5 us short chirp
+CIC_DECIMATION = 4
+FFT_SIZE = 1024
+DOPPLER_FFT_SIZE = 16
+LONG_CHIRP_SAMPLES = int(T_LONG_CHIRP * FS_SYS)  # 3000 at 100 MHz
+
+# Overlap-save parameters
+OVERLAP_SAMPLES = 128
+SEGMENT_ADVANCE = FFT_SIZE - OVERLAP_SAMPLES  # 896
+LONG_SEGMENTS = 4
+
+# Path to FPGA RTL directory containing .mem files
+MEM_DIR = os.path.normpath(os.path.join(os.path.dirname(__file__), '..', '..', '9_2_FPGA'))
+
+# Expected .mem file inventory
+EXPECTED_MEM_FILES = {
+    'fft_twiddle_1024.mem': {'lines': 256, 'desc': '1024-pt FFT quarter-wave cos ROM'},
+    'fft_twiddle_16.mem':   {'lines': 4,   'desc': '16-pt FFT quarter-wave cos ROM'},
+    'long_chirp_seg0_i.mem': {'lines': 1024, 'desc': 'Long chirp seg 0 I'},
+    'long_chirp_seg0_q.mem': {'lines': 1024, 'desc': 'Long chirp seg 0 Q'},
+    'long_chirp_seg1_i.mem': {'lines': 1024, 'desc': 'Long chirp seg 1 I'},
+    'long_chirp_seg1_q.mem': {'lines': 1024, 'desc': 'Long chirp seg 1 Q'},
+    'long_chirp_seg2_i.mem': {'lines': 1024, 'desc': 'Long chirp seg 2 I'},
+    'long_chirp_seg2_q.mem': {'lines': 1024, 'desc': 'Long chirp seg 2 Q'},
+    'long_chirp_seg3_i.mem': {'lines': 1024, 'desc': 'Long chirp seg 3 I'},
+    'long_chirp_seg3_q.mem': {'lines': 1024, 'desc': 'Long chirp seg 3 Q'},
+    'short_chirp_i.mem': {'lines': 50, 'desc': 'Short chirp I'},
+    'short_chirp_q.mem': {'lines': 50, 'desc': 'Short chirp Q'},
+}
+
+
+def read_mem_hex(filename: str) -> list[int]:
+    """Read a .mem file, return list of integer values (16-bit signed)."""
+    path = os.path.join(MEM_DIR, filename)
+    values = []
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith('//'):
+                continue
+            val = int(line, 16)
+            if val >= 0x8000:
+                val -= 0x10000
+            values.append(val)
+    return values
+
+
+def compute_magnitudes(i_vals: list[int], q_vals: list[int]) -> list[float]:
+    """Compute magnitude envelope from I/Q sample lists."""
+    return [math.sqrt(i * i + q * q) for i, q in zip(i_vals, q_vals, strict=False)]
+
+
+def compute_inst_freq(i_vals: list[int], q_vals: list[int],
+                      fs: float, mag_thresh: float = 5.0) -> list[float]:
+    """Compute instantaneous frequency from I/Q via phase differencing."""
+    phases = []
+    for i_val, q_val in zip(i_vals, q_vals, strict=False):
+        if abs(i_val) > mag_thresh or abs(q_val) > mag_thresh:
+            phases.append(math.atan2(q_val, i_val))
+        else:
+            phases.append(None)
+
+    freq_estimates = []
+    for n in range(1, len(phases)):
+        if phases[n] is not None and phases[n - 1] is not None:
+            dp = phases[n] - phases[n - 1]
+            while dp > math.pi:
+                dp -= 2 * math.pi
+            while dp < -math.pi:
+                dp += 2 * math.pi
+            freq_estimates.append(dp * fs / (2 * math.pi))
+    return freq_estimates
+
+
+# ============================================================================
+# TEST 1: Structural validation — all .mem files exist with correct sizes
+# ============================================================================
+class TestStructural:
+    """Verify every expected .mem file exists, has the right line count, and valid values."""
+
+    @pytest.mark.parametrize("fname,info", EXPECTED_MEM_FILES.items(),
+                             ids=EXPECTED_MEM_FILES.keys())
+    def test_file_exists(self, fname, info):
+        path = os.path.join(MEM_DIR, fname)
+        assert os.path.isfile(path), f"{fname} missing from {MEM_DIR}"
+
+    @pytest.mark.parametrize("fname,info", EXPECTED_MEM_FILES.items(),
+                             ids=EXPECTED_MEM_FILES.keys())
+    def test_line_count(self, fname, info):
+        vals = read_mem_hex(fname)
+        assert len(vals) == info['lines'], (
+            f"{fname}: got {len(vals)} data lines, expected {info['lines']}"
+        )
+
+    @pytest.mark.parametrize("fname,info", EXPECTED_MEM_FILES.items(),
+                             ids=EXPECTED_MEM_FILES.keys())
+    def test_value_range(self, fname, info):
+        vals = read_mem_hex(fname)
+        for i, v in enumerate(vals):
+            assert -32768 <= v <= 32767, (
+                f"{fname}[{i}]: value {v} out of 16-bit signed range"
+            )
+
+
+# ============================================================================
+# TEST 2: FFT Twiddle Factor Validation (bit-exact against cos formula)
+# ============================================================================
+class TestTwiddle:
+    """Verify FFT twiddle .mem files match cos(2*pi*k/N) in Q15 to <=1 LSB."""
+
+    def test_twiddle_1024_bit_exact(self):
+        vals = read_mem_hex('fft_twiddle_1024.mem')
+        assert len(vals) == 256, f"Expected 256 quarter-wave entries, got {len(vals)}"
+
+        max_err = 0
+        worst_k = -1
+        for k in range(256):
+            angle = 2.0 * math.pi * k / 1024.0
+            expected = max(-32768, min(32767, round(math.cos(angle) * 32767.0)))
+            err = abs(vals[k] - expected)
+            if err > max_err:
+                max_err = err
+                worst_k = k
+
+        assert max_err <= 1, (
+            f"fft_twiddle_1024.mem: max error {max_err} LSB at k={worst_k} "
+            f"(got {vals[worst_k]}, expected "
+            f"{max(-32768, min(32767, round(math.cos(2*math.pi*worst_k/1024)*32767)))})"
+        )
+
+    def test_twiddle_16_bit_exact(self):
+        vals = read_mem_hex('fft_twiddle_16.mem')
+        assert len(vals) == 4, f"Expected 4 quarter-wave entries, got {len(vals)}"
+
+        max_err = 0
+        for k in range(4):
+            angle = 2.0 * math.pi * k / 16.0
+            expected = max(-32768, min(32767, round(math.cos(angle) * 32767.0)))
+            err = abs(vals[k] - expected)
+            if err > max_err:
+                max_err = err
+
+        assert max_err <= 1, f"fft_twiddle_16.mem: max error {max_err} LSB (tolerance: 1)"
+
+    def test_twiddle_1024_known_values(self):
+        """Spot-check specific twiddle values against hand-calculated results."""
+        vals = read_mem_hex('fft_twiddle_1024.mem')
+        # k=0: cos(0) = 1.0 -> 32767
+        assert vals[0] == 32767, f"k=0: expected 32767, got {vals[0]}"
+        # k=128: cos(pi/4) = sqrt(2)/2 -> round(32767 * 0.7071) = 23170
+        expected_128 = round(math.cos(2 * math.pi * 128 / 1024) * 32767)
+        assert abs(vals[128] - expected_128) <= 1, (
+            f"k=128: expected ~{expected_128}, got {vals[128]}"
+        )
+        # k=255: last entry in quarter-wave table
+        expected_255 = round(math.cos(2 * math.pi * 255 / 1024) * 32767)
+        assert abs(vals[255] - expected_255) <= 1, (
+            f"k=255: expected ~{expected_255}, got {vals[255]}"
+        )
+
+
+# ============================================================================
+# TEST 3: Long Chirp .mem File Analysis
+# ============================================================================
+class TestLongChirp:
+    """Validate long chirp .mem files show correct chirp characteristics."""
+
+    def test_total_sample_count(self):
+        """4 segments x 1024 samples = 4096 total."""
+        all_i, all_q = [], []
+        for seg in range(4):
+            all_i.extend(read_mem_hex(f'long_chirp_seg{seg}_i.mem'))
+            all_q.extend(read_mem_hex(f'long_chirp_seg{seg}_q.mem'))
+        assert len(all_i) == 4096, f"Total I samples: {len(all_i)}, expected 4096"
+        assert len(all_q) == 4096, f"Total Q samples: {len(all_q)}, expected 4096"
+
+    def test_nonzero_magnitude(self):
+        """Chirp should have significant non-zero content."""
+        all_i, all_q = [], []
+        for seg in range(4):
+            all_i.extend(read_mem_hex(f'long_chirp_seg{seg}_i.mem'))
+            all_q.extend(read_mem_hex(f'long_chirp_seg{seg}_q.mem'))
+        mags = compute_magnitudes(all_i, all_q)
+        max_mag = max(mags)
+        # Should use substantial dynamic range (at least 1000 out of 32767)
+        assert max_mag > 1000, f"Max magnitude {max_mag:.0f} is suspiciously low"
+
+    def test_frequency_sweep(self):
+        """Chirp should show at least 0.5 MHz frequency sweep."""
+        all_i, all_q = [], []
+        for seg in range(4):
+            all_i.extend(read_mem_hex(f'long_chirp_seg{seg}_i.mem'))
+            all_q.extend(read_mem_hex(f'long_chirp_seg{seg}_q.mem'))
+
+        freq_est = compute_inst_freq(all_i, all_q, FS_SYS)
+        assert len(freq_est) > 100, "Not enough valid phase samples for frequency analysis"
+
+        f_range = max(freq_est) - min(freq_est)
+        assert f_range > 0.5e6, (
+            f"Frequency sweep {f_range / 1e6:.2f} MHz is too narrow "
+            f"(expected > 0.5 MHz for a chirp)"
+        )
+
+    def test_bandwidth_reasonable(self):
+        """Chirp bandwidth should be within 50% of expected 20 MHz."""
+        all_i, all_q = [], []
+        for seg in range(4):
+            all_i.extend(read_mem_hex(f'long_chirp_seg{seg}_i.mem'))
+            all_q.extend(read_mem_hex(f'long_chirp_seg{seg}_q.mem'))
+
+        freq_est = compute_inst_freq(all_i, all_q, FS_SYS)
+        if not freq_est:
+            pytest.skip("No valid frequency estimates")
+
+        f_range = max(freq_est) - min(freq_est)
+        bw_error = abs(f_range - CHIRP_BW) / CHIRP_BW
+        if bw_error >= 0.5:
+            warnings.warn(
+                f"Bandwidth {f_range / 1e6:.2f} MHz differs from expected "
+                f"{CHIRP_BW / 1e6:.2f} MHz by {bw_error:.0%}",
+                stacklevel=1,
+            )
+
+
+# ============================================================================
+# TEST 4: Short Chirp .mem File Analysis
+# ============================================================================
+class TestShortChirp:
+    """Validate short chirp .mem files."""
+
+    def test_sample_count_matches_duration(self):
+        """0.5 us at 100 MHz = 50 samples."""
+        short_i = read_mem_hex('short_chirp_i.mem')
+        short_q = read_mem_hex('short_chirp_q.mem')
+        expected = int(T_SHORT_CHIRP * FS_SYS)
+        assert len(short_i) == expected, f"Short chirp I: {len(short_i)} != {expected}"
+        assert len(short_q) == expected, f"Short chirp Q: {len(short_q)} != {expected}"
+
+    def test_all_samples_nonzero(self):
+        """Every sample in the short chirp should have non-trivial magnitude."""
+        short_i = read_mem_hex('short_chirp_i.mem')
+        short_q = read_mem_hex('short_chirp_q.mem')
+        mags = compute_magnitudes(short_i, short_q)
+        nonzero = sum(1 for m in mags if m > 1)
+        assert nonzero == len(short_i), (
+            f"Only {nonzero}/{len(short_i)} samples are non-zero"
+        )
+
+
+# ============================================================================
+# TEST 5: Chirp vs Independent Model (phase shape agreement)
+# ============================================================================
+class TestChirpVsModel:
+    """Compare seg0 against independently generated chirp reference."""
+
+    def test_phase_shape_match(self):
+        """Phase trajectory of .mem seg0 should match model within 0.5 rad."""
+        # Generate reference chirp independently from first principles
+        chirp_rate = CHIRP_BW / T_LONG_CHIRP  # Hz/s
+        n_samples = FFT_SIZE  # 1024
+
+        model_i, model_q = [], []
+        for n in range(n_samples):
+            t = n / FS_SYS
+            phase = math.pi * chirp_rate * t * t
+            re_val = max(-32768, min(32767, round(32767 * 0.9 * math.cos(phase))))
+            im_val = max(-32768, min(32767, round(32767 * 0.9 * math.sin(phase))))
+            model_i.append(re_val)
+            model_q.append(im_val)
+
+        # Read seg0 from .mem
+        mem_i = read_mem_hex('long_chirp_seg0_i.mem')
+        mem_q = read_mem_hex('long_chirp_seg0_q.mem')
+
+        # Compare phase trajectories (shape match regardless of scaling)
+        model_phases = [math.atan2(q, i) for i, q in zip(model_i, model_q, strict=False)]
+        mem_phases = [math.atan2(q, i) for i, q in zip(mem_i, mem_q, strict=False)]
+
+        phase_diffs = []
+        for mp, fp in zip(model_phases, mem_phases, strict=False):
+            d = mp - fp
+            while d > math.pi:
+                d -= 2 * math.pi
+            while d < -math.pi:
+                d += 2 * math.pi
+            phase_diffs.append(d)
+
+        max_phase_diff = max(abs(d) for d in phase_diffs)
+        assert max_phase_diff < 0.5, (
+            f"Max phase difference {math.degrees(max_phase_diff):.1f} deg "
+            f"exceeds 28.6 deg tolerance"
+        )
+
+    def test_magnitude_scaling(self):
+        """Seg0 magnitude should be consistent with Q15 * 0.9 scaling."""
+        mem_i = read_mem_hex('long_chirp_seg0_i.mem')
+        mem_q = read_mem_hex('long_chirp_seg0_q.mem')
+        mags = compute_magnitudes(mem_i, mem_q)
+        max_mag = max(mags)
+
+        # Expected from 32767 * 0.9 scaling = ~29490
+        expected_max = 32767 * 0.9
+        # Should be at least 80% of expected (allows for different provenance)
+        if max_mag < expected_max * 0.8:
+            warnings.warn(
+                f"Seg0 max magnitude {max_mag:.0f} is below expected "
+                f"{expected_max:.0f} * 0.8 = {expected_max * 0.8:.0f}. "
+                f"The .mem files may have different provenance.",
+                stacklevel=1,
+            )
+
+
+# ============================================================================
+# TEST 6: Latency Buffer LATENCY=3187 Validation
+# ============================================================================
+class TestLatencyBuffer:
+    """Validate latency buffer parameter constraints."""
+
+    LATENCY = 3187
+    BRAM_SIZE = 4096
+
+    def test_latency_within_bram(self):
+        assert self.LATENCY < self.BRAM_SIZE, (
+            f"LATENCY ({self.LATENCY}) must be < BRAM size ({self.BRAM_SIZE})"
+        )
+
+    def test_latency_in_reasonable_range(self):
+        """LATENCY should be between 1000 and 4095 (empirically determined)."""
+        assert 1000 < self.LATENCY < 4095, (
+            f"LATENCY={self.LATENCY} outside reasonable range [1000, 4095]"
+        )
+
+    def test_read_ptr_no_overflow(self):
+        """Address arithmetic for read_ptr after initial wrap must stay valid."""
+        min_read_ptr = self.BRAM_SIZE + 0 - self.LATENCY
+        assert 0 <= min_read_ptr < self.BRAM_SIZE, (
+            f"min_read_ptr after wrap = {min_read_ptr}, must be in [0, {self.BRAM_SIZE})"
+        )
+
+
+# ============================================================================
+# TEST 7: Chirp Memory Loader Addressing
+# ============================================================================
+class TestMemoryAddressing:
+    """Validate {segment_select[1:0], sample_addr[9:0]} address mapping."""
+
+    @pytest.mark.parametrize("seg", range(4), ids=[f"seg{s}" for s in range(4)])
+    def test_segment_base_address(self, seg):
+        """Concatenated address {seg, 10'b0} should equal seg * 1024."""
+        addr = (seg << 10) | 0
+        expected = seg * 1024
+        assert addr == expected, (
+            f"Seg {seg}: {{seg[1:0], 10'b0}} = {addr}, expected {expected}"
+        )
+
+    @pytest.mark.parametrize("seg", range(4), ids=[f"seg{s}" for s in range(4)])
+    def test_segment_end_address(self, seg):
+        """Concatenated address {seg, 10'h3FF} should equal seg * 1024 + 1023."""
+        addr = (seg << 10) | 1023
+        expected = seg * 1024 + 1023
+        assert addr == expected, (
+            f"Seg {seg}: {{seg[1:0], 10'h3FF}} = {addr}, expected {expected}"
+        )
+
+    def test_full_address_space(self):
+        """4 segments x 1024 = 4096 addresses, covering full 12-bit range."""
+        all_addrs = set()
+        for seg in range(4):
+            for sample in range(1024):
+                all_addrs.add((seg << 10) | sample)
+        assert len(all_addrs) == 4096
+        assert min(all_addrs) == 0
+        assert max(all_addrs) == 4095
+
+
+# ============================================================================
+# TEST 8: Seg3 Zero-Padding Analysis
+# ============================================================================
+class TestSeg3Padding:
+    """Analyze seg3 content — chirp is 3000 samples but 4 segs x 1024 = 4096 slots."""
+
+    def test_seg3_content_analysis(self):
+        """Seg3 should either be full (4096-sample chirp) or have trailing zeros."""
+        seg3_i = read_mem_hex('long_chirp_seg3_i.mem')
+        seg3_q = read_mem_hex('long_chirp_seg3_q.mem')
+        mags = compute_magnitudes(seg3_i, seg3_q)
+
+        # Count trailing zeros
+        trailing_zeros = 0
+        for m in reversed(mags):
+            if m < 2:
+                trailing_zeros += 1
+            else:
+                break
+
+        nonzero = sum(1 for m in mags if m > 2)
+
+        if nonzero == 1024:
+            # .mem files encode 4096 chirp samples, not 3000
+            # This means the chirp duration used for .mem generation differs
+            actual_samples = 4 * 1024
+            actual_us = actual_samples / FS_SYS * 1e6
+            warnings.warn(
+                f"Chirp in .mem files is {actual_samples} samples ({actual_us:.1f} us), "
+                f"not {LONG_CHIRP_SAMPLES} samples ({T_LONG_CHIRP * 1e6:.1f} us). "
+                f"The .mem files use a different chirp duration than the system parameter.",
+                stacklevel=1,
+            )
+        elif trailing_zeros > 100:
+            # Some zero-padding at end — chirp ends partway through seg3
+            effective_chirp_end = 3072 + (1024 - trailing_zeros)
+            assert effective_chirp_end <= 4096, "Chirp end calculation overflow"
diff --git a/pyproject.toml b/pyproject.toml
index 9790436..841400c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,5 +49,7 @@ select = [
 "**/test_*.py" = ["ARG", "T20", "ERA"]
 # Re-export modules: unused imports are intentional
 "**/v7/hardware.py" = ["F401"]
-# CLI tool: print() is the intentional output mechanism
+# CLI tools & cosim scripts: print() is the intentional output mechanism
 "**/uart_capture.py" = ["T20"]
+"**/tb/cosim/**" = ["T20", "ERA", "ARG", "E501"]
+"**/tb/gen_mf_golden_ref.py" = ["T20", "ERA"]