fix(test,docs): remove dead xfft_32 files, update test infra for dual-16 FFT, add regression guide

- Remove xfft_32.v, tb_xfft_32.v, and fft_twiddle_32.mem (dead code since PR #33 moved Doppler to dual 16-pt FFT architecture) - Update run_regression.sh: xfft_16 in PROD_RTL, remove xfft_32 from EXTRA_RTL and all compile commands - Update tb_fft_engine.v to test with N=16 / fft_twiddle_16.mem - Update validate_mem_files.py: validate fft_twiddle_16.mem instead of 32 - Update testbenches and golden data from main_cleanup branch to match dual-16 architecture (tb_doppler_cosim, tb_doppler_realdata, tb_fullchain_realdata, tb_fullchain_mti_cfar_realdata, tb_system_e2e, radar_receiver_final, golden_doppler.mem) - Update CONTRIBUTING.md with full regression test instructions covering FPGA, MCU, GUI, co-simulation, and formal verification Regression: 23/23 FPGA, 20/20 MCU, 57/58 GUI, 56/56 mem validation, all co-sim scenarios PASS.
2026-04-07 02:51:48 +03:00
parent 04982a3176
commit 1e284767cd
15 changed files with 2265 additions and 2806 deletions
@@ -1,11 +0,0 @@
-// Quarter-wave cosine ROM for 32-point FFT
-// 8 entries, 16-bit signed Q15 ($readmemh format)
-// cos(2*pi*k/32) for k = 0..7
-7FFF
-7D89
-7641
-6A6D
-5A82
-471C
-30FB
-18F9
@@ -403,11 +403,12 @@ assign range_data_32bit = {mti_range_q, mti_range_i};
 assign range_data_valid = mti_range_valid;

 // ========== DOPPLER PROCESSOR ==========
-doppler_processor_optimized #(
-    .DOPPLER_FFT_SIZE(32),
-    .RANGE_BINS(64),
-    .CHIRPS_PER_FRAME(32)  // MUST MATCH YOUR ACTUAL FRAME SIZE!
-) doppler_proc (
+doppler_processor_optimized #(
+    .DOPPLER_FFT_SIZE(16),
+    .RANGE_BINS(64),
+    .CHIRPS_PER_FRAME(32),
+    .CHIRPS_PER_SUBFRAME(16)
+) doppler_proc (
    .clk(clk),
    .reset_n(reset_n),
    .range_data(range_data_32bit),
@@ -473,4 +474,4 @@ assign dbg_adc_i     = adc_i_scaled;
 assign dbg_adc_q     = adc_q_scaled;
 assign dbg_adc_valid = adc_valid_sync;

-endmodule
+endmodule
@@ -67,7 +67,7 @@ PROD_RTL=(
    matched_filter_processing_chain.v
    range_bin_decimator.v
    doppler_processor.v
-    xfft_32.v
+    xfft_16.v
    fft_engine.v
    usb_data_interface.v
    edge_detector.v
@@ -369,7 +369,7 @@ run_test "Chirp Contract" \

 run_test "Doppler Processor (DSP48)" \
    tb/tb_doppler_reg.vvp \
-    tb/tb_doppler_cosim.v doppler_processor.v xfft_32.v fft_engine.v
+    tb/tb_doppler_cosim.v doppler_processor.v xfft_16.v fft_engine.v

 run_test "Threshold Detector (detection bugs)" \
    tb/tb_threshold_detector.vvp \
@@ -414,7 +414,7 @@ if [[ "$QUICK" -eq 0 ]]; then
        cdc_modules.v fir_lowpass.v ddc_input_interface.v \
        chirp_memory_loader_param.v latency_buffer.v \
        matched_filter_multi_segment.v matched_filter_processing_chain.v \
-        range_bin_decimator.v doppler_processor.v xfft_32.v fft_engine.v \
+        range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v \
        rx_gain_control.v mti_canceller.v

    # Golden compare
@@ -426,7 +426,7 @@ if [[ "$QUICK" -eq 0 ]]; then
        cdc_modules.v fir_lowpass.v ddc_input_interface.v \
        chirp_memory_loader_param.v latency_buffer.v \
        matched_filter_multi_segment.v matched_filter_processing_chain.v \
-        range_bin_decimator.v doppler_processor.v xfft_32.v fft_engine.v \
+        range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v \
        rx_gain_control.v mti_canceller.v

    # Full system top (monitoring-only, legacy)
@@ -439,7 +439,7 @@ if [[ "$QUICK" -eq 0 ]]; then
        cdc_modules.v fir_lowpass.v ddc_input_interface.v \
        chirp_memory_loader_param.v latency_buffer.v \
        matched_filter_multi_segment.v matched_filter_processing_chain.v \
-        range_bin_decimator.v doppler_processor.v xfft_32.v fft_engine.v \
+        range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v \
        usb_data_interface.v edge_detector.v radar_mode_controller.v \
        rx_gain_control.v cfar_ca.v mti_canceller.v fpga_self_test.v

@@ -453,7 +453,7 @@ if [[ "$QUICK" -eq 0 ]]; then
        cdc_modules.v fir_lowpass.v ddc_input_interface.v \
        chirp_memory_loader_param.v latency_buffer.v \
        matched_filter_multi_segment.v matched_filter_processing_chain.v \
-        range_bin_decimator.v doppler_processor.v xfft_32.v fft_engine.v \
+        range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v \
        usb_data_interface.v edge_detector.v radar_mode_controller.v \
        rx_gain_control.v cfar_ca.v mti_canceller.v fpga_self_test.v
 else
@@ -472,10 +472,6 @@ run_test "FFT Engine" \
    tb/tb_fft_reg.vvp \
    tb/tb_fft_engine.v fft_engine.v

-run_test "XFFT-32 Wrapper" \
-    tb/tb_xfft_reg.vvp \
-    tb/tb_xfft_32.v xfft_32.v fft_engine.v
-
 run_test "NCO 400MHz" \
    tb/tb_nco_reg.vvp \
    tb/tb_nco_400m.v nco_400m_enhanced.v
@@ -487,7 +483,7 @@ run_test "FIR Lowpass" \
 run_test "Matched Filter Chain" \
    tb/tb_mf_reg.vvp \
    tb/tb_matched_filter_processing_chain.v matched_filter_processing_chain.v \
-    xfft_32.v fft_engine.v chirp_memory_loader_param.v
+    fft_engine.v chirp_memory_loader_param.v

 echo ""

@@ -30,7 +30,7 @@ T_LONG_CHIRP = 30e-6      # 30 us long chirp
 T_SHORT_CHIRP = 0.5e-6    # 0.5 us short chirp
 CIC_DECIMATION = 4
 FFT_SIZE = 1024
-DOPPLER_FFT_SIZE = 32
+DOPPLER_FFT_SIZE = 16
 LONG_CHIRP_SAMPLES = int(T_LONG_CHIRP * FS_SYS)  # 3000 at 100 MHz

 # Overlap-save parameters
@@ -84,7 +84,7 @@ def test_structural():
    expected = {
        # FFT twiddle files (quarter-wave cosine ROMs)
        'fft_twiddle_1024.mem': {'lines': 256, 'desc': '1024-pt FFT quarter-wave cos ROM'},
-        'fft_twiddle_32.mem':   {'lines': 8,   'desc': '32-pt FFT quarter-wave cos ROM'},
+        'fft_twiddle_16.mem':   {'lines': 4,   'desc': '16-pt FFT quarter-wave cos ROM'},
        # Long chirp segments (4 segments x 1024 samples each)
        'long_chirp_seg0_i.mem': {'lines': 1024, 'desc': 'Long chirp seg 0 I'},
        'long_chirp_seg0_q.mem': {'lines': 1024, 'desc': 'Long chirp seg 0 Q'},
@@ -145,13 +145,13 @@ def test_twiddle_1024():
    print(f"  Max twiddle error: {max_err} LSB across {len(vals)} entries")


-def test_twiddle_32():
-    print("\n=== TEST 2b: FFT Twiddle 32 Validation ===")
-    vals = read_mem_hex('fft_twiddle_32.mem')
+def test_twiddle_16():
+    print("\n=== TEST 2b: FFT Twiddle 16 Validation ===")
+    vals = read_mem_hex('fft_twiddle_16.mem')

    max_err = 0
-    for k in range(min(8, len(vals))):
-        angle = 2.0 * math.pi * k / 32.0
+    for k in range(min(4, len(vals))):
+        angle = 2.0 * math.pi * k / 16.0
        expected = int(round(math.cos(angle) * 32767.0))
        expected = max(-32768, min(32767, expected))
        actual = vals[k]
@@ -160,13 +160,13 @@ def test_twiddle_32():
            max_err = err

    check(max_err <= 1,
-          f"fft_twiddle_32.mem: max twiddle error = {max_err} LSB (tolerance: 1)")
+          f"fft_twiddle_16.mem: max twiddle error = {max_err} LSB (tolerance: 1)")
    print(f"  Max twiddle error: {max_err} LSB across {len(vals)} entries")

-    # Print all 8 entries for reference
-    print("  Twiddle 32 entries:")
-    for k in range(min(8, len(vals))):
-        angle = 2.0 * math.pi * k / 32.0
+    # Print all 4 entries for reference
+    print("  Twiddle 16 entries:")
+    for k in range(min(4, len(vals))):
+        angle = 2.0 * math.pi * k / 16.0
        expected = int(round(math.cos(angle) * 32767.0))
        print(f"    k={k}: file=0x{vals[k] & 0xFFFF:04x} ({vals[k]:6d}), "
              f"expected=0x{expected & 0xFFFF:04x} ({expected:6d}), "
@@ -605,7 +605,7 @@ def main():

    test_structural()
    test_twiddle_1024()
-    test_twiddle_32()
+    test_twiddle_16()
    test_long_chirp()
    test_short_chirp()
    test_chirp_vs_model()
@@ -6,8 +6,8 @@
 *
 * Tests the complete Doppler processing pipeline:
 *   - Accumulates 32 chirps x 64 range bins into BRAM
- *   - Processes each range bin: Hamming window -> 32-pt FFT
- *   - Outputs 2048 samples (64 range bins x 32 Doppler bins)
+ *   - Processes each range bin: Hamming window -> dual 16-pt FFT (staggered PRF)
+ *   - Outputs 2048 samples (64 range bins x 32 packed Doppler bins)
 *
 * Validates:
 *   1. FSM state transitions (IDLE -> ACCUMULATE -> LOAD_FFT -> ... -> OUTPUT)
@@ -20,10 +20,10 @@
 * RTL output written to:  tb/cosim/rtl_doppler_<scenario>.csv
 * RTL FFT inputs written:  tb/cosim/rtl_doppler_fft_in_<scenario>.csv
 *
- * Compile (SIMULATION branch — uses behavioral xfft_32/fft_engine):
+ * Compile (SIMULATION branch — uses behavioral xfft_16/fft_engine):
 *   iverilog -g2001 -DSIMULATION \
 *     -o tb/tb_doppler_cosim.vvp \
- *     tb/tb_doppler_cosim.v doppler_processor.v xfft_32.v fft_engine.v
+ *     tb/tb_doppler_cosim.v doppler_processor.v xfft_16.v fft_engine.v
 *
 * Scenarios (use -D flags):
 *   default:              stationary target
@@ -37,7 +37,7 @@ module tb_doppler_cosim;
 // Parameters
 // ============================================================================
 localparam CLK_PERIOD    = 10.0;           // 100 MHz
-localparam DOPPLER_FFT   = 32;
+localparam DOPPLER_FFT   = 32;             // Total packed Doppler bins (2 sub-frames x 16-pt FFT)
 localparam RANGE_BINS    = 64;
 localparam CHIRPS        = 32;
 localparam TOTAL_INPUTS  = CHIRPS * RANGE_BINS;  // 2048
@@ -193,7 +193,7 @@ initial begin
    $display("Doppler Processor Co-Sim Testbench");
    $display("Scenario: %0s", SCENARIO);
    $display("Input samples: %0d  (32 chirps x 64 range bins)", TOTAL_INPUTS);
-    $display("Expected outputs: %0d (64 range bins x 32 doppler bins)",
+    $display("Expected outputs: %0d (64 range bins x 32 packed Doppler bins, dual 16-pt FFT)",
             TOTAL_OUTPUTS);
    $display("============================================================");

@@ -17,7 +17,7 @@
 * Compile:
 *   iverilog -Wall -DSIMULATION -g2012 \
 *     -o tb/tb_doppler_realdata.vvp \
- *     tb/tb_doppler_realdata.v doppler_processor.v xfft_32.v fft_engine.v
+ *     tb/tb_doppler_realdata.v doppler_processor.v xfft_16.v fft_engine.v
 *
 * Run from: 9_Firmware/9_2_FPGA/
 *   vvp tb/tb_doppler_realdata.vvp
@@ -29,7 +29,7 @@ module tb_doppler_realdata;
 // PARAMETERS
 // ============================================================================
 localparam CLK_PERIOD    = 10.0;           // 100 MHz
-localparam DOPPLER_FFT   = 32;
+localparam DOPPLER_FFT   = 32;             // Total packed Doppler bins (2 sub-frames x 16-pt FFT)
 localparam RANGE_BINS    = 64;
 localparam CHIRPS        = 32;
 localparam TOTAL_INPUTS  = CHIRPS * RANGE_BINS;  // 2048
@@ -4,7 +4,7 @@
 * tb_fft_engine.v
 *
 * Testbench for the synthesizable FFT engine.
- * Tests with N=32 first (fast), then validates key properties.
+ * Tests with N=16 (matching the dual-16 Doppler architecture).
 *
 * Test Groups:
 *   1. Impulse response: FFT of delta[0] should be all 1s
@@ -19,10 +19,10 @@
 module tb_fft_engine;

 // ============================================================================
-// PARAMETERS — test with 32-pt for speed
+// PARAMETERS — test with 16-pt to match dual-FFT Doppler architecture
 // ============================================================================
-localparam N      = 32;
-localparam LOG2N  = 5;
+localparam N      = 16;
+localparam LOG2N  = 4;
 localparam DATA_W = 16;
 localparam INT_W  = 32;
 localparam TW_W   = 16;
@@ -47,7 +47,7 @@ fft_engine #(
    .DATA_W(DATA_W),
    .INTERNAL_W(INT_W),
    .TWIDDLE_W(TW_W),
-    .TWIDDLE_FILE("fft_twiddle_32.mem")
+    .TWIDDLE_FILE("fft_twiddle_16.mem")
 ) dut (
    .clk(clk),
    .reset_n(reset_n),
@@ -9,7 +9,7 @@
 *
 *   range_bin_decimator (peak detection, 1024->64)
 *     -> mti_canceller (2-pulse, mti_enable=1)
- *       -> doppler_processor_optimized (Hamming + 32-pt FFT)
+ *       -> doppler_processor_optimized (Hamming + dual 16-pt FFT)
 *         -> DC notch filter (width=2, inline logic)
 *           -> cfar_ca (CA mode, guard=2, train=8, alpha=0x30)
 *
@@ -41,7 +41,7 @@
 *     -o tb/tb_fullchain_mti_cfar_realdata.vvp \
 *     tb/tb_fullchain_mti_cfar_realdata.v \
 *     range_bin_decimator.v mti_canceller.v doppler_processor.v \
- *     xfft_32.v fft_engine.v cfar_ca.v
+ *     xfft_16.v fft_engine.v cfar_ca.v
 *
 * Run from: 9_Firmware/9_2_FPGA/
 *   vvp tb/tb_fullchain_mti_cfar_realdata.vvp
@@ -375,7 +375,7 @@ initial begin
    $display("  Full-Chain Real-Data Co-Simulation (MTI + CFAR)");
    $display("  range_bin_decimator (peak, 1024->64)");
    $display("    -> mti_canceller (2-pulse, enable=1)");
-    $display("      -> doppler_processor_optimized (Hamming + 32-pt FFT)");
+    $display("      -> doppler_processor_optimized (Hamming + dual 16-pt FFT)");
    $display("        -> DC notch filter (width=%0d)", DC_NOTCH_WIDTH);
    $display("          -> cfar_ca (CA, guard=2, train=8, alpha=0x30)");
    $display("  ADI CN0566 Phaser 10.525 GHz X-band FMCW");
@@ -7,7 +7,7 @@
 * (post-range-FFT, 32 chirps x 1024 bins) through:
 *
 *   range_bin_decimator (peak detection, 1024→64)
- *     → doppler_processor_optimized (Hamming + 32-pt FFT)
+ *     → doppler_processor_optimized (Hamming + dual 16-pt FFT)
 *
 * and compares the Doppler output bit-for-bit against the Python golden
 * reference that models the same chain (golden_reference.py).
@@ -27,7 +27,7 @@
 *   iverilog -Wall -DSIMULATION -g2012 \
 *     -o tb/tb_fullchain_realdata.vvp \
 *     tb/tb_fullchain_realdata.v \
- *     range_bin_decimator.v doppler_processor.v xfft_32.v fft_engine.v
+ *     range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v
 *
 * Run from: 9_Firmware/9_2_FPGA/
 *   vvp tb/tb_fullchain_realdata.vvp
@@ -243,7 +243,7 @@ initial begin
    $display("============================================================");
    $display("  Full-Chain Real-Data Co-Simulation");
    $display("  range_bin_decimator (peak, 1024->64)");
-    $display("    -> doppler_processor_optimized (Hamming + 32-pt FFT)");
+    $display("    -> doppler_processor_optimized (Hamming + dual 16-pt FFT)");
    $display("  ADI CN0566 Phaser 10.525 GHz X-band FMCW");
    $display("  Input:    %0d chirps x %0d range FFT bins = %0d samples",
             CHIRPS, INPUT_BINS, TOTAL_INPUT_SAMPLES);
@@ -34,7 +34,7 @@
 *     cdc_modules.v fir_lowpass.v ddc_input_interface.v \
 *     chirp_memory_loader_param.v latency_buffer.v \
 *     matched_filter_multi_segment.v matched_filter_processing_chain.v \
- *     range_bin_decimator.v doppler_processor.v xfft_32.v fft_engine.v \
+ *     range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v \
 *     usb_data_interface.v edge_detector.v radar_mode_controller.v
 *
 * Run:
@@ -1,355 +0,0 @@
-`timescale 1ns / 1ps
-
-/**
- * tb_xfft_32.v
- *
- * Testbench for xfft_32 AXI-Stream FFT wrapper.
- * Verifies the wrapper correctly interfaces with fft_engine via AXI-Stream.
- *
- * Test Groups:
- *   1. Impulse response (all output bins = input amplitude)
- *   2. DC input (bin 0 = A*N, rest ~= 0)
- *   3. Single tone detection
- *   4. AXI-Stream handshake correctness (tvalid, tlast, tready)
- *   5. Back-to-back transforms (no state leakage)
- */
-
-module tb_xfft_32;
-
-// ============================================================================
-// PARAMETERS
-// ============================================================================
-localparam N         = 32;
-localparam CLK_PERIOD = 10;
-
-// ============================================================================
-// SIGNALS
-// ============================================================================
-reg         aclk, aresetn;
-reg  [7:0]  cfg_tdata;
-reg         cfg_tvalid;
-wire        cfg_tready;
-reg  [31:0] din_tdata;
-reg         din_tvalid;
-reg         din_tlast;
-wire [31:0] dout_tdata;
-wire        dout_tvalid;
-wire        dout_tlast;
-reg         dout_tready;
-
-// ============================================================================
-// DUT
-// ============================================================================
-xfft_32 dut (
-    .aclk(aclk),
-    .aresetn(aresetn),
-    .s_axis_config_tdata(cfg_tdata),
-    .s_axis_config_tvalid(cfg_tvalid),
-    .s_axis_config_tready(cfg_tready),
-    .s_axis_data_tdata(din_tdata),
-    .s_axis_data_tvalid(din_tvalid),
-    .s_axis_data_tlast(din_tlast),
-    .m_axis_data_tdata(dout_tdata),
-    .m_axis_data_tvalid(dout_tvalid),
-    .m_axis_data_tlast(dout_tlast),
-    .m_axis_data_tready(dout_tready)
-);
-
-// ============================================================================
-// CLOCK
-// ============================================================================
-initial aclk = 0;
-always #(CLK_PERIOD/2) aclk = ~aclk;
-
-// ============================================================================
-// PASS/FAIL TRACKING
-// ============================================================================
-integer pass_count, fail_count;
-
-task check;
-    input cond;
-    input [512*8-1:0] label;
-    begin
-        if (cond) begin
-            $display("  [PASS] %0s", label);
-            pass_count = pass_count + 1;
-        end else begin
-            $display("  [FAIL] %0s", label);
-            fail_count = fail_count + 1;
-        end
-    end
-endtask
-
-// ============================================================================
-// OUTPUT CAPTURE
-// ============================================================================
-reg signed [15:0] out_re [0:N-1];
-reg signed [15:0] out_im [0:N-1];
-integer out_idx;
-reg got_tlast;
-integer tlast_count;
-
-// ============================================================================
-// HELPER TASKS
-// ============================================================================
-
-task do_reset;
-    begin
-        aresetn    = 0;
-        cfg_tdata  = 0;
-        cfg_tvalid = 0;
-        din_tdata  = 0;
-        din_tvalid = 0;
-        din_tlast  = 0;
-        dout_tready = 1;
-        repeat(5) @(posedge aclk);
-        aresetn = 1;
-        repeat(2) @(posedge aclk);
-    end
-endtask
-
-// Send config (forward FFT: tdata[0]=1)
-// Waits for cfg_tready (wrapper in S_IDLE) before sending
-task send_config;
-    input [7:0] cfg;
-    integer wait_cnt;
-    begin
-        // Wait for wrapper to be ready (S_IDLE)
-        wait_cnt = 0;
-        while (!cfg_tready && wait_cnt < 5000) begin
-            @(posedge aclk);
-            wait_cnt = wait_cnt + 1;
-        end
-        cfg_tdata  = cfg;
-        cfg_tvalid = 1;
-        @(posedge aclk);
-        cfg_tvalid = 0;
-        cfg_tdata  = 0;
-    end
-endtask
-
-// Feed N samples: each sample is {im[15:0], re[15:0]}
-// in_re_arr and in_im_arr must be pre-loaded
-reg signed [15:0] feed_re [0:N-1];
-reg signed [15:0] feed_im [0:N-1];
-
-task feed_data;
-    integer i;
-    begin
-        for (i = 0; i < N; i = i + 1) begin
-            din_tdata  = {feed_im[i], feed_re[i]};
-            din_tvalid = 1;
-            din_tlast  = (i == N - 1) ? 1 : 0;
-            @(posedge aclk);
-        end
-        din_tvalid = 0;
-        din_tlast  = 0;
-        din_tdata  = 0;
-    end
-endtask
-
-// Capture N output samples
-task capture_output;
-    integer timeout;
-    begin
-        out_idx    = 0;
-        got_tlast  = 0;
-        tlast_count = 0;
-        timeout    = 0;
-        while (out_idx < N && timeout < 5000) begin
-            @(posedge aclk);
-            if (dout_tvalid && dout_tready) begin
-                out_re[out_idx] = dout_tdata[15:0];
-                out_im[out_idx] = dout_tdata[31:16];
-                if (dout_tlast) begin
-                    got_tlast = 1;
-                    tlast_count = tlast_count + 1;
-                end
-                out_idx = out_idx + 1;
-            end
-            timeout = timeout + 1;
-        end
-    end
-endtask
-
-// ============================================================================
-// VCD
-// ============================================================================
-initial begin
-    $dumpfile("tb_xfft_32.vcd");
-    $dumpvars(0, tb_xfft_32);
-end
-
-// ============================================================================
-// MAIN TEST
-// ============================================================================
-integer i;
-reg signed [31:0] err;
-integer max_err;
-integer max_mag_bin;
-reg signed [31:0] max_mag, mag;
-real angle;
-
-initial begin
-    pass_count = 0;
-    fail_count = 0;
-
-    $display("============================================================");
-    $display("  xfft_32 AXI-Stream Wrapper Testbench");
-    $display("============================================================");
-
-    do_reset;
-
-    // ================================================================
-    // TEST 1: Impulse Response
-    // ================================================================
-    $display("");
-    $display("--- Test 1: Impulse Response ---");
-
-    for (i = 0; i < N; i = i + 1) begin
-        feed_re[i] = (i == 0) ? 16'sd1000 : 16'sd0;
-        feed_im[i] = 16'sd0;
-    end
-
-    send_config(8'h01);  // Forward FFT
-    feed_data;
-    capture_output;
-
-    check(out_idx == N, "Received N output samples");
-    check(got_tlast == 1, "Got tlast on output");
-
-    max_err = 0;
-    for (i = 0; i < N; i = i + 1) begin
-        err = out_re[i] - 1000;
-        if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-        err = out_im[i];
-        if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-    end
-    $display("  Impulse max error: %0d", max_err);
-    check(max_err < 10, "Impulse: all bins ~= 1000");
-
-    // ================================================================
-    // TEST 2: DC Input
-    // ================================================================
-    $display("");
-    $display("--- Test 2: DC Input ---");
-
-    for (i = 0; i < N; i = i + 1) begin
-        feed_re[i] = 16'sd100;
-        feed_im[i] = 16'sd0;
-    end
-
-    send_config(8'h01);
-    feed_data;
-    capture_output;
-
-    $display("  DC bin[0] = %0d + j%0d (expect ~3200)", out_re[0], out_im[0]);
-    check(out_re[0] >= 3100 && out_re[0] <= 3300, "DC: bin 0 ~= 3200 (5% tol)");
-
-    max_err = 0;
-    for (i = 1; i < N; i = i + 1) begin
-        err = out_re[i]; if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-        err = out_im[i]; if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-    end
-    $display("  DC max non-DC: %0d", max_err);
-    check(max_err < 25, "DC: non-DC bins ~= 0");
-
-    // ================================================================
-    // TEST 3: Single Tone (bin 4)
-    // ================================================================
-    $display("");
-    $display("--- Test 3: Single Tone (bin 4) ---");
-
-    for (i = 0; i < N; i = i + 1) begin
-        angle = 6.28318530718 * 4.0 * i / 32.0;
-        feed_re[i] = $rtoi($cos(angle) * 1000.0);
-        feed_im[i] = 16'sd0;
-    end
-
-    send_config(8'h01);
-    feed_data;
-    capture_output;
-
-    max_mag = 0;
-    max_mag_bin = 0;
-    for (i = 0; i < N; i = i + 1) begin
-        mag = out_re[i] * out_re[i] + out_im[i] * out_im[i];
-        if (mag > max_mag) begin
-            max_mag = mag;
-            max_mag_bin = i;
-        end
-    end
-    $display("  Tone peak bin: %0d (expect 4 or 28)", max_mag_bin);
-    check(max_mag_bin == 4 || max_mag_bin == 28, "Tone: peak at bin 4 or 28");
-
-    // ================================================================
-    // TEST 4: Back-to-back transforms
-    // ================================================================
-    $display("");
-    $display("--- Test 4: Back-to-Back Transforms ---");
-
-    // First: impulse
-    for (i = 0; i < N; i = i + 1) begin
-        feed_re[i] = (i == 0) ? 16'sd500 : 16'sd0;
-        feed_im[i] = 16'sd0;
-    end
-    send_config(8'h01);
-    feed_data;
-    capture_output;
-    check(out_idx == N, "Back-to-back 1st: got N outputs");
-
-    // Second: DC immediately after
-    for (i = 0; i < N; i = i + 1) begin
-        feed_re[i] = 16'sd50;
-        feed_im[i] = 16'sd0;
-    end
-    send_config(8'h01);
-    feed_data;
-    capture_output;
-    check(out_idx == N, "Back-to-back 2nd: got N outputs");
-    $display("  2nd transform bin[0] = %0d (expect ~1600)", out_re[0]);
-    check(out_re[0] >= 1500 && out_re[0] <= 1700, "Back-to-back 2nd: bin 0 ~= 1600");
-
-    // ================================================================
-    // TEST 5: Zero input
-    // ================================================================
-    $display("");
-    $display("--- Test 5: Zero Input ---");
-
-    for (i = 0; i < N; i = i + 1) begin
-        feed_re[i] = 16'sd0;
-        feed_im[i] = 16'sd0;
-    end
-    send_config(8'h01);
-    feed_data;
-    capture_output;
-
-    max_err = 0;
-    for (i = 0; i < N; i = i + 1) begin
-        err = out_re[i]; if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-        err = out_im[i]; if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-    end
-    check(max_err == 0, "Zero input: all outputs = 0");
-
-    // ================================================================
-    // SUMMARY
-    // ================================================================
-    $display("");
-    $display("============================================================");
-    $display("  RESULTS: %0d/%0d passed", pass_count, pass_count + fail_count);
-    if (fail_count == 0)
-        $display("  ALL TESTS PASSED");
-    else
-        $display("  SOME TESTS FAILED");
-    $display("============================================================");
-
-    $finish;
-end
-
-endmodule
@@ -5,7 +5,7 @@
 // Wraps the synthesizable fft_engine (radix-2 DIT) with the AXI-Stream port
 // interface expected by the doppler_processor dual-FFT architecture.
 //
-// Identical interface to xfft_32.v but with N=16.
+// Used by the doppler_processor dual-FFT architecture (2 x 16-pt sub-frames).
 //
 // Data format: {Q[15:0], I[15:0]} packed 32-bit.
 // Config tdata[0]: 1 = forward FFT, 0 = inverse FFT.
@@ -1,278 +0,0 @@
-`timescale 1ns / 1ps
-// ============================================================================
-// xfft_32.v — 32-point FFT with AXI-Stream interface
-// ============================================================================
-// Wraps the synthesizable fft_engine (radix-2 DIT) with the AXI-Stream port
-// interface expected by doppler_processor.v.
-//
-// Port interface matches the Xilinx LogiCORE IP Fast Fourier Transform
-// (AXI-Stream variant) as instantiated in doppler_processor.v.
-//
-// Data format: {Q[15:0], I[15:0]} packed 32-bit.
-// Config tdata[0]: 1 = forward FFT, 0 = inverse FFT.
-// ============================================================================
-
-module xfft_32 (
-    input  wire        aclk,
-    input  wire        aresetn,
-
-    // Configuration channel (AXI-Stream slave)
-    input  wire [7:0]  s_axis_config_tdata,
-    input  wire        s_axis_config_tvalid,
-    output wire        s_axis_config_tready,
-
-    // Data input channel (AXI-Stream slave)
-    input  wire [31:0] s_axis_data_tdata,
-    input  wire        s_axis_data_tvalid,
-    input  wire        s_axis_data_tlast,
-
-    // Data output channel (AXI-Stream master)
-    output wire [31:0] m_axis_data_tdata,
-    output wire        m_axis_data_tvalid,
-    output wire        m_axis_data_tlast,
-    input  wire        m_axis_data_tready
-);
-
-// ============================================================================
-// PARAMETERS
-// ============================================================================
-localparam N     = 32;
-localparam LOG2N = 5;
-
-// ============================================================================
-// INTERNAL SIGNALS
-// ============================================================================
-
-// FSM states
-localparam [2:0] S_IDLE    = 3'd0,
-                 S_CONFIG  = 3'd1,  // Latch config (fwd/inv)
-                 S_FEED    = 3'd2,  // Feed input to FFT engine
-                 S_WAIT    = 3'd3,  // Wait for FFT to complete
-                 S_OUTPUT  = 3'd4;  // Stream output
-
-reg [2:0] state;
-
-// Configuration
-reg inverse_reg;
-
-// Input buffering
-reg signed [15:0] in_buf_re [0:N-1];
-reg signed [15:0] in_buf_im [0:N-1];
-reg [5:0] in_count;    // 0..31 for loading, extra bit for overflow check
-
-// Output buffering
-reg signed [15:0] out_buf_re [0:N-1];
-reg signed [15:0] out_buf_im [0:N-1];
-reg [5:0] out_count;
-reg [5:0] out_total;   // counts how many outputs captured from engine
-
-// FFT engine interface
-reg fft_start;
-reg fft_inverse;
-reg signed [15:0] fft_din_re, fft_din_im;
-reg fft_din_valid;
-wire signed [15:0] fft_dout_re, fft_dout_im;
-wire fft_dout_valid;
-wire fft_busy;
-wire fft_done;
-
-// Feed counter for streaming into engine
-reg [5:0] feed_count;
-
-// ============================================================================
-// FFT ENGINE INSTANCE
-// ============================================================================
-fft_engine #(
-    .N(N),
-    .LOG2N(LOG2N),
-    .DATA_W(16),
-    .INTERNAL_W(32),
-    .TWIDDLE_W(16),
-    .TWIDDLE_FILE("fft_twiddle_32.mem")
-) fft_core (
-    .clk(aclk),
-    .reset_n(aresetn),
-    .start(fft_start),
-    .inverse(fft_inverse),
-    .din_re(fft_din_re),
-    .din_im(fft_din_im),
-    .din_valid(fft_din_valid),
-    .dout_re(fft_dout_re),
-    .dout_im(fft_dout_im),
-    .dout_valid(fft_dout_valid),
-    .busy(fft_busy),
-    .done(fft_done)
-);
-
-// ============================================================================
-// AXI-STREAM OUTPUTS
-// ============================================================================
-
-// Config is accepted when idle
-assign s_axis_config_tready = (state == S_IDLE);
-
-// Output data: {Q, I} packed
-assign m_axis_data_tdata  = {out_buf_im[out_count[4:0]], out_buf_re[out_count[4:0]]};
-assign m_axis_data_tvalid = (state == S_OUTPUT) && (out_count < N);
-assign m_axis_data_tlast  = (state == S_OUTPUT) && (out_count == N - 1);
-
-// ============================================================================
-// BUFFER WRITE LOGIC — separate always block, NO async reset
-// Allows Vivado to infer distributed RAM instead of dissolving into registers.
-// ============================================================================
-// Input buffer write enable
-reg in_buf_we;
-reg [4:0] in_buf_waddr;
-reg signed [15:0] in_buf_wdata_re, in_buf_wdata_im;
-
-// Output buffer write enable
-reg out_buf_we;
-reg [4:0] out_buf_waddr;
-reg signed [15:0] out_buf_wdata_re, out_buf_wdata_im;
-
-always @(posedge aclk) begin
-    if (in_buf_we) begin
-        in_buf_re[in_buf_waddr] <= in_buf_wdata_re;
-        in_buf_im[in_buf_waddr] <= in_buf_wdata_im;
-    end
-    if (out_buf_we) begin
-        out_buf_re[out_buf_waddr] <= out_buf_wdata_re;
-        out_buf_im[out_buf_waddr] <= out_buf_wdata_im;
-    end
-end
-
-// ============================================================================
-// MAIN FSM
-// ============================================================================
-always @(posedge aclk or negedge aresetn) begin
-    if (!aresetn) begin
-        state        <= S_IDLE;
-        inverse_reg  <= 1'b0;
-        in_count     <= 0;
-        out_count    <= 0;
-        out_total    <= 0;
-        feed_count   <= 0;
-        fft_start    <= 1'b0;
-        fft_inverse  <= 1'b0;
-        fft_din_re   <= 0;
-        fft_din_im   <= 0;
-        fft_din_valid <= 1'b0;
-        in_buf_we    <= 1'b0;
-        in_buf_waddr <= 0;
-        in_buf_wdata_re <= 0;
-        in_buf_wdata_im <= 0;
-        out_buf_we   <= 1'b0;
-        out_buf_waddr <= 0;
-        out_buf_wdata_re <= 0;
-        out_buf_wdata_im <= 0;
-    end else begin
-        // Defaults
-        fft_start     <= 1'b0;
-        fft_din_valid <= 1'b0;
-        in_buf_we     <= 1'b0;
-        out_buf_we    <= 1'b0;
-
-        case (state)
-
-        // ================================================================
-        S_IDLE: begin
-            in_count <= 0;
-            if (s_axis_config_tvalid) begin
-                // Config tdata[0]: 1=forward, 0=inverse
-                // fft_engine: inverse=0 means forward, inverse=1 means inverse
-                inverse_reg <= ~s_axis_config_tdata[0];
-                state       <= S_FEED;
-                in_count    <= 0;
-                feed_count  <= 0;
-            end
-        end
-
-        // ================================================================
-        // S_FEED: Buffer all N inputs first, then start engine.
-        // ================================================================
-        S_FEED: begin
-            if (in_count < N) begin
-                // Still accepting input data
-                if (s_axis_data_tvalid) begin
-                    in_buf_we       <= 1'b1;
-                    in_buf_waddr    <= in_count[4:0];
-                    in_buf_wdata_re <= s_axis_data_tdata[15:0];
-                    in_buf_wdata_im <= s_axis_data_tdata[31:16];
-                    in_count <= in_count + 1;
-                end
-            end else if (feed_count == 0) begin
-                // All N inputs buffered, start the FFT engine
-                fft_start   <= 1'b1;
-                fft_inverse <= inverse_reg;
-                feed_count  <= 0;
-                state       <= S_WAIT;
-                out_total   <= 0;
-            end
-        end
-
-        // ================================================================
-        // S_WAIT: Feed buffered data to engine, then wait for output
-        // ================================================================
-        S_WAIT: begin
-            if (feed_count < N) begin
-                fft_din_re   <= in_buf_re[feed_count[4:0]];
-                fft_din_im   <= in_buf_im[feed_count[4:0]];
-                fft_din_valid <= 1'b1;
-                feed_count   <= feed_count + 1;
-            end
-
-            // Capture engine outputs
-            if (fft_dout_valid && out_total < N) begin
-                out_buf_we       <= 1'b1;
-                out_buf_waddr    <= out_total[4:0];
-                out_buf_wdata_re <= fft_dout_re;
-                out_buf_wdata_im <= fft_dout_im;
-                out_total <= out_total + 1;
-            end
-
-            // Engine done
-            if (fft_done) begin
-                state     <= S_OUTPUT;
-                out_count <= 0;
-            end
-        end
-
-        // ================================================================
-        // S_OUTPUT: Stream buffered results via AXI-Stream master
-        // ================================================================
-        S_OUTPUT: begin
-            if (m_axis_data_tready || !m_axis_data_tvalid) begin
-                if (out_count < N) begin
-                    // m_axis_data_tdata driven combinationally from out_buf
-                    if (m_axis_data_tready) begin
-                        out_count <= out_count + 1;
-                    end
-                end
-                if (out_count >= N - 1 && m_axis_data_tready) begin
-                    state <= S_IDLE;
-                end
-            end
-        end
-
-        default: state <= S_IDLE;
-
-        endcase
-    end
-end
-
-// ============================================================================
-// MEMORY INIT (simulation only)
-// ============================================================================
-`ifdef SIMULATION
-integer init_k;
-initial begin
-    for (init_k = 0; init_k < N; init_k = init_k + 1) begin
-        in_buf_re[init_k]  = 0;
-        in_buf_im[init_k]  = 0;
-        out_buf_re[init_k] = 0;
-        out_buf_im[init_k] = 0;
-    end
-end
-`endif
-
-endmodule
@@ -28,6 +28,112 @@ for getting a change reviewed and merged.
  if not, note which scripts your change affects
 - **Whitespace** — `git diff --check` should be clean
 - Keep PRs focused: one logical change per PR is easier to review
+- **Run the regression tests** (see below)
+
+## Running regression tests
+
+After any change, run the relevant test suites to verify nothing is
+broken. All commands assume you are at the repository root.
+
+### Prerequisites
+
+| Tool | Used by | Install |
+|------|---------|---------|
+| [Icarus Verilog](http://iverilog.icarus.com/) (`iverilog`) | FPGA regression | `brew install icarus-verilog` / `apt install iverilog` |
+| Python 3.8+ | GUI tests, co-sim | Usually pre-installed |
+| GNU Make | MCU tests | Usually pre-installed |
+| [SymbiYosys](https://symbiyosys.readthedocs.io/) (`sby`) | Formal verification | Optional — see SymbiYosys docs |
+
+### FPGA regression (RTL lint + unit/integration/signal-processing tests)
+
+```bash
+cd 9_Firmware/9_2_FPGA
+bash run_regression.sh
+```
+
+This runs four phases:
+
+| Phase | What it checks |
+|-------|----------------|
+| 0 — Lint | `iverilog -Wall` on all production RTL + static regex checks |
+| 1 — Changed Modules | Unit tests for individual blocks (CIC, Doppler, CFAR, etc.) |
+| 2 — Integration | DDC chain, receiver golden-compare, system-top, end-to-end |
+| 3 — Signal Processing | FFT engine, NCO, FIR, matched filter chain |
+| 4 — Infrastructure | CDC modules, edge detector, USB interface, range-bin decimator, mode controller |
+
+All tests must pass (exit code 0). Advisory lint warnings (e.g., `case
+without default`) are non-blocking.
+
+### MCU unit tests
+
+```bash
+cd 9_Firmware/9_1_Microcontroller/tests
+make clean && make all
+```
+
+Runs 20 C-based unit tests covering safety, bug-fix, and gap-3 tests.
+Every test binary must exit 0.
+
+### GUI / dashboard tests
+
+```bash
+cd 9_Firmware/9_3_GUI
+python3 -m pytest test_radar_dashboard.py -v
+# or without pytest:
+python3 -m unittest test_radar_dashboard -v
+```
+
+57+ protocol and rendering tests. The `test_record_and_stop` test
+requires `h5py` and will be skipped if it is not installed.
+
+### Co-simulation (Python vs RTL golden comparison)
+
+Run from the co-sim directory after a successful FPGA regression (the
+regression generates the RTL CSV outputs that the co-sim scripts compare
+against):
+
+```bash
+cd 9_Firmware/9_2_FPGA/tb/cosim
+
+# Validate all .mem files (twiddles, chirp ROMs, addressing)
+python3 validate_mem_files.py
+
+# DDC chain: RTL vs Python model (5 scenarios)
+python3 compare.py dc
+python3 compare.py single_target
+python3 compare.py multi_target
+python3 compare.py noise_only
+python3 compare.py sine_1mhz
+
+# Doppler processor: RTL vs golden reference
+python3 compare_doppler.py stationary
+
+# Matched filter: RTL vs Python model (4 scenarios)
+python3 compare_mf.py all
+```
+
+Each script prints PASS/FAIL per scenario and exits non-zero on failure.
+
+### Formal verification (optional)
+
+Requires SymbiYosys (`sby`), Yosys, and a solver (z3 or boolector):
+
+```bash
+cd 9_Firmware/9_2_FPGA/formal
+sby -f fv_doppler_processor.sby
+sby -f fv_radar_mode_controller.sby
+```
+
+### Quick checklist
+
+Before pushing, confirm:
+
+1. `bash run_regression.sh` — all phases pass
+2. `make all` (MCU tests) — 20/20 pass
+3. `python3 -m unittest test_radar_dashboard -v` — all pass
+4. `python3 validate_mem_files.py` — all checks pass
+5. `python3 compare.py dc && python3 compare_doppler.py stationary && python3 compare_mf.py all`
+6. `git diff --check` — no whitespace issues

 ## Areas where help is especially welcome