fix(test,docs): remove dead xfft_32 files, update test infra for dual-16 FFT, add regression guide

- Remove xfft_32.v, tb_xfft_32.v, and fft_twiddle_32.mem (dead code since PR #33 moved Doppler to dual 16-pt FFT architecture) - Update run_regression.sh: xfft_16 in PROD_RTL, remove xfft_32 from EXTRA_RTL and all compile commands - Update tb_fft_engine.v to test with N=16 / fft_twiddle_16.mem - Update validate_mem_files.py: validate fft_twiddle_16.mem instead of 32 - Update testbenches and golden data from main_cleanup branch to match dual-16 architecture (tb_doppler_cosim, tb_doppler_realdata, tb_fullchain_realdata, tb_fullchain_mti_cfar_realdata, tb_system_e2e, radar_receiver_final, golden_doppler.mem) - Update CONTRIBUTING.md with full regression test instructions covering FPGA, MCU, GUI, co-simulation, and formal verification Regression: 23/23 FPGA, 20/20 MCU, 57/58 GUI, 56/56 mem validation, all co-sim scenarios PASS.
2026-04-07 02:51:48 +03:00
parent 04982a3176
commit 1e284767cd
15 changed files with 2265 additions and 2806 deletions
@@ -30,7 +30,7 @@ T_LONG_CHIRP = 30e-6      # 30 us long chirp
 T_SHORT_CHIRP = 0.5e-6    # 0.5 us short chirp
 CIC_DECIMATION = 4
 FFT_SIZE = 1024
-DOPPLER_FFT_SIZE = 32
+DOPPLER_FFT_SIZE = 16
 LONG_CHIRP_SAMPLES = int(T_LONG_CHIRP * FS_SYS)  # 3000 at 100 MHz

 # Overlap-save parameters
@@ -84,7 +84,7 @@ def test_structural():
    expected = {
        # FFT twiddle files (quarter-wave cosine ROMs)
        'fft_twiddle_1024.mem': {'lines': 256, 'desc': '1024-pt FFT quarter-wave cos ROM'},
-        'fft_twiddle_32.mem':   {'lines': 8,   'desc': '32-pt FFT quarter-wave cos ROM'},
+        'fft_twiddle_16.mem':   {'lines': 4,   'desc': '16-pt FFT quarter-wave cos ROM'},
        # Long chirp segments (4 segments x 1024 samples each)
        'long_chirp_seg0_i.mem': {'lines': 1024, 'desc': 'Long chirp seg 0 I'},
        'long_chirp_seg0_q.mem': {'lines': 1024, 'desc': 'Long chirp seg 0 Q'},
@@ -145,13 +145,13 @@ def test_twiddle_1024():
    print(f"  Max twiddle error: {max_err} LSB across {len(vals)} entries")


-def test_twiddle_32():
-    print("\n=== TEST 2b: FFT Twiddle 32 Validation ===")
-    vals = read_mem_hex('fft_twiddle_32.mem')
+def test_twiddle_16():
+    print("\n=== TEST 2b: FFT Twiddle 16 Validation ===")
+    vals = read_mem_hex('fft_twiddle_16.mem')

    max_err = 0
-    for k in range(min(8, len(vals))):
-        angle = 2.0 * math.pi * k / 32.0
+    for k in range(min(4, len(vals))):
+        angle = 2.0 * math.pi * k / 16.0
        expected = int(round(math.cos(angle) * 32767.0))
        expected = max(-32768, min(32767, expected))
        actual = vals[k]
@@ -160,13 +160,13 @@ def test_twiddle_32():
            max_err = err

    check(max_err <= 1,
-          f"fft_twiddle_32.mem: max twiddle error = {max_err} LSB (tolerance: 1)")
+          f"fft_twiddle_16.mem: max twiddle error = {max_err} LSB (tolerance: 1)")
    print(f"  Max twiddle error: {max_err} LSB across {len(vals)} entries")

-    # Print all 8 entries for reference
-    print("  Twiddle 32 entries:")
-    for k in range(min(8, len(vals))):
-        angle = 2.0 * math.pi * k / 32.0
+    # Print all 4 entries for reference
+    print("  Twiddle 16 entries:")
+    for k in range(min(4, len(vals))):
+        angle = 2.0 * math.pi * k / 16.0
        expected = int(round(math.cos(angle) * 32767.0))
        print(f"    k={k}: file=0x{vals[k] & 0xFFFF:04x} ({vals[k]:6d}), "
              f"expected=0x{expected & 0xFFFF:04x} ({expected:6d}), "
@@ -605,7 +605,7 @@ def main():

    test_structural()
    test_twiddle_1024()
-    test_twiddle_32()
+    test_twiddle_16()
    test_long_chirp()
    test_short_chirp()
    test_chirp_vs_model()
@@ -6,8 +6,8 @@
 *
 * Tests the complete Doppler processing pipeline:
 *   - Accumulates 32 chirps x 64 range bins into BRAM
- *   - Processes each range bin: Hamming window -> 32-pt FFT
- *   - Outputs 2048 samples (64 range bins x 32 Doppler bins)
+ *   - Processes each range bin: Hamming window -> dual 16-pt FFT (staggered PRF)
+ *   - Outputs 2048 samples (64 range bins x 32 packed Doppler bins)
 *
 * Validates:
 *   1. FSM state transitions (IDLE -> ACCUMULATE -> LOAD_FFT -> ... -> OUTPUT)
@@ -20,10 +20,10 @@
 * RTL output written to:  tb/cosim/rtl_doppler_<scenario>.csv
 * RTL FFT inputs written:  tb/cosim/rtl_doppler_fft_in_<scenario>.csv
 *
- * Compile (SIMULATION branch — uses behavioral xfft_32/fft_engine):
+ * Compile (SIMULATION branch — uses behavioral xfft_16/fft_engine):
 *   iverilog -g2001 -DSIMULATION \
 *     -o tb/tb_doppler_cosim.vvp \
- *     tb/tb_doppler_cosim.v doppler_processor.v xfft_32.v fft_engine.v
+ *     tb/tb_doppler_cosim.v doppler_processor.v xfft_16.v fft_engine.v
 *
 * Scenarios (use -D flags):
 *   default:              stationary target
@@ -37,7 +37,7 @@ module tb_doppler_cosim;
 // Parameters
 // ============================================================================
 localparam CLK_PERIOD    = 10.0;           // 100 MHz
-localparam DOPPLER_FFT   = 32;
+localparam DOPPLER_FFT   = 32;             // Total packed Doppler bins (2 sub-frames x 16-pt FFT)
 localparam RANGE_BINS    = 64;
 localparam CHIRPS        = 32;
 localparam TOTAL_INPUTS  = CHIRPS * RANGE_BINS;  // 2048
@@ -193,7 +193,7 @@ initial begin
    $display("Doppler Processor Co-Sim Testbench");
    $display("Scenario: %0s", SCENARIO);
    $display("Input samples: %0d  (32 chirps x 64 range bins)", TOTAL_INPUTS);
-    $display("Expected outputs: %0d (64 range bins x 32 doppler bins)",
+    $display("Expected outputs: %0d (64 range bins x 32 packed Doppler bins, dual 16-pt FFT)",
             TOTAL_OUTPUTS);
    $display("============================================================");

@@ -17,7 +17,7 @@
 * Compile:
 *   iverilog -Wall -DSIMULATION -g2012 \
 *     -o tb/tb_doppler_realdata.vvp \
- *     tb/tb_doppler_realdata.v doppler_processor.v xfft_32.v fft_engine.v
+ *     tb/tb_doppler_realdata.v doppler_processor.v xfft_16.v fft_engine.v
 *
 * Run from: 9_Firmware/9_2_FPGA/
 *   vvp tb/tb_doppler_realdata.vvp
@@ -29,7 +29,7 @@ module tb_doppler_realdata;
 // PARAMETERS
 // ============================================================================
 localparam CLK_PERIOD    = 10.0;           // 100 MHz
-localparam DOPPLER_FFT   = 32;
+localparam DOPPLER_FFT   = 32;             // Total packed Doppler bins (2 sub-frames x 16-pt FFT)
 localparam RANGE_BINS    = 64;
 localparam CHIRPS        = 32;
 localparam TOTAL_INPUTS  = CHIRPS * RANGE_BINS;  // 2048
@@ -4,7 +4,7 @@
 * tb_fft_engine.v
 *
 * Testbench for the synthesizable FFT engine.
- * Tests with N=32 first (fast), then validates key properties.
+ * Tests with N=16 (matching the dual-16 Doppler architecture).
 *
 * Test Groups:
 *   1. Impulse response: FFT of delta[0] should be all 1s
@@ -19,10 +19,10 @@
 module tb_fft_engine;

 // ============================================================================
-// PARAMETERS — test with 32-pt for speed
+// PARAMETERS — test with 16-pt to match dual-FFT Doppler architecture
 // ============================================================================
-localparam N      = 32;
-localparam LOG2N  = 5;
+localparam N      = 16;
+localparam LOG2N  = 4;
 localparam DATA_W = 16;
 localparam INT_W  = 32;
 localparam TW_W   = 16;
@@ -47,7 +47,7 @@ fft_engine #(
    .DATA_W(DATA_W),
    .INTERNAL_W(INT_W),
    .TWIDDLE_W(TW_W),
-    .TWIDDLE_FILE("fft_twiddle_32.mem")
+    .TWIDDLE_FILE("fft_twiddle_16.mem")
 ) dut (
    .clk(clk),
    .reset_n(reset_n),
@@ -9,7 +9,7 @@
 *
 *   range_bin_decimator (peak detection, 1024->64)
 *     -> mti_canceller (2-pulse, mti_enable=1)
- *       -> doppler_processor_optimized (Hamming + 32-pt FFT)
+ *       -> doppler_processor_optimized (Hamming + dual 16-pt FFT)
 *         -> DC notch filter (width=2, inline logic)
 *           -> cfar_ca (CA mode, guard=2, train=8, alpha=0x30)
 *
@@ -41,7 +41,7 @@
 *     -o tb/tb_fullchain_mti_cfar_realdata.vvp \
 *     tb/tb_fullchain_mti_cfar_realdata.v \
 *     range_bin_decimator.v mti_canceller.v doppler_processor.v \
- *     xfft_32.v fft_engine.v cfar_ca.v
+ *     xfft_16.v fft_engine.v cfar_ca.v
 *
 * Run from: 9_Firmware/9_2_FPGA/
 *   vvp tb/tb_fullchain_mti_cfar_realdata.vvp
@@ -375,7 +375,7 @@ initial begin
    $display("  Full-Chain Real-Data Co-Simulation (MTI + CFAR)");
    $display("  range_bin_decimator (peak, 1024->64)");
    $display("    -> mti_canceller (2-pulse, enable=1)");
-    $display("      -> doppler_processor_optimized (Hamming + 32-pt FFT)");
+    $display("      -> doppler_processor_optimized (Hamming + dual 16-pt FFT)");
    $display("        -> DC notch filter (width=%0d)", DC_NOTCH_WIDTH);
    $display("          -> cfar_ca (CA, guard=2, train=8, alpha=0x30)");
    $display("  ADI CN0566 Phaser 10.525 GHz X-band FMCW");
@@ -7,7 +7,7 @@
 * (post-range-FFT, 32 chirps x 1024 bins) through:
 *
 *   range_bin_decimator (peak detection, 1024→64)
- *     → doppler_processor_optimized (Hamming + 32-pt FFT)
+ *     → doppler_processor_optimized (Hamming + dual 16-pt FFT)
 *
 * and compares the Doppler output bit-for-bit against the Python golden
 * reference that models the same chain (golden_reference.py).
@@ -27,7 +27,7 @@
 *   iverilog -Wall -DSIMULATION -g2012 \
 *     -o tb/tb_fullchain_realdata.vvp \
 *     tb/tb_fullchain_realdata.v \
- *     range_bin_decimator.v doppler_processor.v xfft_32.v fft_engine.v
+ *     range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v
 *
 * Run from: 9_Firmware/9_2_FPGA/
 *   vvp tb/tb_fullchain_realdata.vvp
@@ -243,7 +243,7 @@ initial begin
    $display("============================================================");
    $display("  Full-Chain Real-Data Co-Simulation");
    $display("  range_bin_decimator (peak, 1024->64)");
-    $display("    -> doppler_processor_optimized (Hamming + 32-pt FFT)");
+    $display("    -> doppler_processor_optimized (Hamming + dual 16-pt FFT)");
    $display("  ADI CN0566 Phaser 10.525 GHz X-band FMCW");
    $display("  Input:    %0d chirps x %0d range FFT bins = %0d samples",
             CHIRPS, INPUT_BINS, TOTAL_INPUT_SAMPLES);
@@ -34,7 +34,7 @@
 *     cdc_modules.v fir_lowpass.v ddc_input_interface.v \
 *     chirp_memory_loader_param.v latency_buffer.v \
 *     matched_filter_multi_segment.v matched_filter_processing_chain.v \
- *     range_bin_decimator.v doppler_processor.v xfft_32.v fft_engine.v \
+ *     range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v \
 *     usb_data_interface.v edge_detector.v radar_mode_controller.v
 *
 * Run:
@@ -1,355 +0,0 @@
-`timescale 1ns / 1ps
-
-/**
- * tb_xfft_32.v
- *
- * Testbench for xfft_32 AXI-Stream FFT wrapper.
- * Verifies the wrapper correctly interfaces with fft_engine via AXI-Stream.
- *
- * Test Groups:
- *   1. Impulse response (all output bins = input amplitude)
- *   2. DC input (bin 0 = A*N, rest ~= 0)
- *   3. Single tone detection
- *   4. AXI-Stream handshake correctness (tvalid, tlast, tready)
- *   5. Back-to-back transforms (no state leakage)
- */
-
-module tb_xfft_32;
-
-// ============================================================================
-// PARAMETERS
-// ============================================================================
-localparam N         = 32;
-localparam CLK_PERIOD = 10;
-
-// ============================================================================
-// SIGNALS
-// ============================================================================
-reg         aclk, aresetn;
-reg  [7:0]  cfg_tdata;
-reg         cfg_tvalid;
-wire        cfg_tready;
-reg  [31:0] din_tdata;
-reg         din_tvalid;
-reg         din_tlast;
-wire [31:0] dout_tdata;
-wire        dout_tvalid;
-wire        dout_tlast;
-reg         dout_tready;
-
-// ============================================================================
-// DUT
-// ============================================================================
-xfft_32 dut (
-    .aclk(aclk),
-    .aresetn(aresetn),
-    .s_axis_config_tdata(cfg_tdata),
-    .s_axis_config_tvalid(cfg_tvalid),
-    .s_axis_config_tready(cfg_tready),
-    .s_axis_data_tdata(din_tdata),
-    .s_axis_data_tvalid(din_tvalid),
-    .s_axis_data_tlast(din_tlast),
-    .m_axis_data_tdata(dout_tdata),
-    .m_axis_data_tvalid(dout_tvalid),
-    .m_axis_data_tlast(dout_tlast),
-    .m_axis_data_tready(dout_tready)
-);
-
-// ============================================================================
-// CLOCK
-// ============================================================================
-initial aclk = 0;
-always #(CLK_PERIOD/2) aclk = ~aclk;
-
-// ============================================================================
-// PASS/FAIL TRACKING
-// ============================================================================
-integer pass_count, fail_count;
-
-task check;
-    input cond;
-    input [512*8-1:0] label;
-    begin
-        if (cond) begin
-            $display("  [PASS] %0s", label);
-            pass_count = pass_count + 1;
-        end else begin
-            $display("  [FAIL] %0s", label);
-            fail_count = fail_count + 1;
-        end
-    end
-endtask
-
-// ============================================================================
-// OUTPUT CAPTURE
-// ============================================================================
-reg signed [15:0] out_re [0:N-1];
-reg signed [15:0] out_im [0:N-1];
-integer out_idx;
-reg got_tlast;
-integer tlast_count;
-
-// ============================================================================
-// HELPER TASKS
-// ============================================================================
-
-task do_reset;
-    begin
-        aresetn    = 0;
-        cfg_tdata  = 0;
-        cfg_tvalid = 0;
-        din_tdata  = 0;
-        din_tvalid = 0;
-        din_tlast  = 0;
-        dout_tready = 1;
-        repeat(5) @(posedge aclk);
-        aresetn = 1;
-        repeat(2) @(posedge aclk);
-    end
-endtask
-
-// Send config (forward FFT: tdata[0]=1)
-// Waits for cfg_tready (wrapper in S_IDLE) before sending
-task send_config;
-    input [7:0] cfg;
-    integer wait_cnt;
-    begin
-        // Wait for wrapper to be ready (S_IDLE)
-        wait_cnt = 0;
-        while (!cfg_tready && wait_cnt < 5000) begin
-            @(posedge aclk);
-            wait_cnt = wait_cnt + 1;
-        end
-        cfg_tdata  = cfg;
-        cfg_tvalid = 1;
-        @(posedge aclk);
-        cfg_tvalid = 0;
-        cfg_tdata  = 0;
-    end
-endtask
-
-// Feed N samples: each sample is {im[15:0], re[15:0]}
-// in_re_arr and in_im_arr must be pre-loaded
-reg signed [15:0] feed_re [0:N-1];
-reg signed [15:0] feed_im [0:N-1];
-
-task feed_data;
-    integer i;
-    begin
-        for (i = 0; i < N; i = i + 1) begin
-            din_tdata  = {feed_im[i], feed_re[i]};
-            din_tvalid = 1;
-            din_tlast  = (i == N - 1) ? 1 : 0;
-            @(posedge aclk);
-        end
-        din_tvalid = 0;
-        din_tlast  = 0;
-        din_tdata  = 0;
-    end
-endtask
-
-// Capture N output samples
-task capture_output;
-    integer timeout;
-    begin
-        out_idx    = 0;
-        got_tlast  = 0;
-        tlast_count = 0;
-        timeout    = 0;
-        while (out_idx < N && timeout < 5000) begin
-            @(posedge aclk);
-            if (dout_tvalid && dout_tready) begin
-                out_re[out_idx] = dout_tdata[15:0];
-                out_im[out_idx] = dout_tdata[31:16];
-                if (dout_tlast) begin
-                    got_tlast = 1;
-                    tlast_count = tlast_count + 1;
-                end
-                out_idx = out_idx + 1;
-            end
-            timeout = timeout + 1;
-        end
-    end
-endtask
-
-// ============================================================================
-// VCD
-// ============================================================================
-initial begin
-    $dumpfile("tb_xfft_32.vcd");
-    $dumpvars(0, tb_xfft_32);
-end
-
-// ============================================================================
-// MAIN TEST
-// ============================================================================
-integer i;
-reg signed [31:0] err;
-integer max_err;
-integer max_mag_bin;
-reg signed [31:0] max_mag, mag;
-real angle;
-
-initial begin
-    pass_count = 0;
-    fail_count = 0;
-
-    $display("============================================================");
-    $display("  xfft_32 AXI-Stream Wrapper Testbench");
-    $display("============================================================");
-
-    do_reset;
-
-    // ================================================================
-    // TEST 1: Impulse Response
-    // ================================================================
-    $display("");
-    $display("--- Test 1: Impulse Response ---");
-
-    for (i = 0; i < N; i = i + 1) begin
-        feed_re[i] = (i == 0) ? 16'sd1000 : 16'sd0;
-        feed_im[i] = 16'sd0;
-    end
-
-    send_config(8'h01);  // Forward FFT
-    feed_data;
-    capture_output;
-
-    check(out_idx == N, "Received N output samples");
-    check(got_tlast == 1, "Got tlast on output");
-
-    max_err = 0;
-    for (i = 0; i < N; i = i + 1) begin
-        err = out_re[i] - 1000;
-        if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-        err = out_im[i];
-        if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-    end
-    $display("  Impulse max error: %0d", max_err);
-    check(max_err < 10, "Impulse: all bins ~= 1000");
-
-    // ================================================================
-    // TEST 2: DC Input
-    // ================================================================
-    $display("");
-    $display("--- Test 2: DC Input ---");
-
-    for (i = 0; i < N; i = i + 1) begin
-        feed_re[i] = 16'sd100;
-        feed_im[i] = 16'sd0;
-    end
-
-    send_config(8'h01);
-    feed_data;
-    capture_output;
-
-    $display("  DC bin[0] = %0d + j%0d (expect ~3200)", out_re[0], out_im[0]);
-    check(out_re[0] >= 3100 && out_re[0] <= 3300, "DC: bin 0 ~= 3200 (5% tol)");
-
-    max_err = 0;
-    for (i = 1; i < N; i = i + 1) begin
-        err = out_re[i]; if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-        err = out_im[i]; if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-    end
-    $display("  DC max non-DC: %0d", max_err);
-    check(max_err < 25, "DC: non-DC bins ~= 0");
-
-    // ================================================================
-    // TEST 3: Single Tone (bin 4)
-    // ================================================================
-    $display("");
-    $display("--- Test 3: Single Tone (bin 4) ---");
-
-    for (i = 0; i < N; i = i + 1) begin
-        angle = 6.28318530718 * 4.0 * i / 32.0;
-        feed_re[i] = $rtoi($cos(angle) * 1000.0);
-        feed_im[i] = 16'sd0;
-    end
-
-    send_config(8'h01);
-    feed_data;
-    capture_output;
-
-    max_mag = 0;
-    max_mag_bin = 0;
-    for (i = 0; i < N; i = i + 1) begin
-        mag = out_re[i] * out_re[i] + out_im[i] * out_im[i];
-        if (mag > max_mag) begin
-            max_mag = mag;
-            max_mag_bin = i;
-        end
-    end
-    $display("  Tone peak bin: %0d (expect 4 or 28)", max_mag_bin);
-    check(max_mag_bin == 4 || max_mag_bin == 28, "Tone: peak at bin 4 or 28");
-
-    // ================================================================
-    // TEST 4: Back-to-back transforms
-    // ================================================================
-    $display("");
-    $display("--- Test 4: Back-to-Back Transforms ---");
-
-    // First: impulse
-    for (i = 0; i < N; i = i + 1) begin
-        feed_re[i] = (i == 0) ? 16'sd500 : 16'sd0;
-        feed_im[i] = 16'sd0;
-    end
-    send_config(8'h01);
-    feed_data;
-    capture_output;
-    check(out_idx == N, "Back-to-back 1st: got N outputs");
-
-    // Second: DC immediately after
-    for (i = 0; i < N; i = i + 1) begin
-        feed_re[i] = 16'sd50;
-        feed_im[i] = 16'sd0;
-    end
-    send_config(8'h01);
-    feed_data;
-    capture_output;
-    check(out_idx == N, "Back-to-back 2nd: got N outputs");
-    $display("  2nd transform bin[0] = %0d (expect ~1600)", out_re[0]);
-    check(out_re[0] >= 1500 && out_re[0] <= 1700, "Back-to-back 2nd: bin 0 ~= 1600");
-
-    // ================================================================
-    // TEST 5: Zero input
-    // ================================================================
-    $display("");
-    $display("--- Test 5: Zero Input ---");
-
-    for (i = 0; i < N; i = i + 1) begin
-        feed_re[i] = 16'sd0;
-        feed_im[i] = 16'sd0;
-    end
-    send_config(8'h01);
-    feed_data;
-    capture_output;
-
-    max_err = 0;
-    for (i = 0; i < N; i = i + 1) begin
-        err = out_re[i]; if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-        err = out_im[i]; if (err < 0) err = -err;
-        if (err > max_err) max_err = err;
-    end
-    check(max_err == 0, "Zero input: all outputs = 0");
-
-    // ================================================================
-    // SUMMARY
-    // ================================================================
-    $display("");
-    $display("============================================================");
-    $display("  RESULTS: %0d/%0d passed", pass_count, pass_count + fail_count);
-    if (fail_count == 0)
-        $display("  ALL TESTS PASSED");
-    else
-        $display("  SOME TESTS FAILED");
-    $display("============================================================");
-
-    $finish;
-end
-
-endmodule