fix(rtl,gui,cosim,formal): adapt surrounding files for dual 16-pt FFT (follow-up to PR #33)

- radar_system_top.v: DC notch now masks to dop_bin[3:0] per sub-frame so both sub-frames get their DC zeroed correctly; rename DOPPLER_FFT_SIZE → DOPPLER_FRAME_CHIRPS to avoid confusion with the per-FFT size (now 16) - radar_dashboard.py: remove fftshift (crosses sub-frame boundary), display raw Doppler bins, remove dead velocity constants - golden_reference.py: model dual 16-pt FFT with per-sub-frame Hamming window, update DC notch and CFAR to match RTL - fv_doppler_processor.sby: reference xfft_16.v / fft_twiddle_16.mem, raise BMC depth to 512 and cover to 1024 - fv_radar_mode_controller.sby: raise cover depth to 600 - fv_radar_mode_controller.v: pin cfg_* to reduced constants (documented as single-config proof), fix Property 5 mode guard, strengthen Cover 1 - STALE_NOTICE.md: document that real-data hex files are stale and need regeneration with external dataset Closes #39
2026-04-06 23:15:50 +03:00
parent 22758fa370
commit ffc89f0bbd
8 changed files with 218 additions and 215 deletions
@@ -4,24 +4,24 @@ cover

 [options]
 bmc: mode bmc
-bmc: depth 150
+bmc: depth 512
 cover: mode cover
-cover: depth 150
+cover: depth 1024

 [engines]
 smtbmc z3

 [script]
 read_verilog -formal doppler_processor.v
-read_verilog -formal xfft_32.v
+read_verilog -formal xfft_16.v
 read_verilog -formal fft_engine.v
 read_verilog -formal fv_doppler_processor.v
 prep -top fv_doppler_processor

 [files]
 ../doppler_processor.v
-../xfft_32.v
+../xfft_16.v
 ../fft_engine.v
-../fft_twiddle_32.mem
+../fft_twiddle_16.mem
 ../fft_twiddle_1024.mem
 fv_doppler_processor.v
@@ -8,7 +8,8 @@
 // Single-clock design: clk is an input wire, async2sync handles async reset.
 // Each formal step = one clock edge.
 //
-// Parameters reduced: RANGE_BINS=4, CHIRPS_PER_FRAME=4, CHIRPS_PER_SUBFRAME=2, DOPPLER_FFT_SIZE=2.
+// Parameters: RANGE_BINS reduced for tractability, but the FFT/sub-frame size
+// remains 16 so the wrapper matches the real xfft_16 interface.
 // Includes full xfft_16 and fft_engine sub-modules.
 //
 // Focus: memory address bounds (highest-value finding) and state encoding.
@@ -17,12 +18,12 @@ module fv_doppler_processor (
    input wire clk
 );

-    // Reduced parameters for tractable BMC
-    localparam RANGE_BINS       = 4;
-    localparam CHIRPS_PER_FRAME = 4;
-    localparam CHIRPS_PER_SUBFRAME = 2;  // Dual sub-frame: 2 chirps per sub-frame
-    localparam DOPPLER_FFT_SIZE = 2;     // FFT size matches sub-frame size
-    localparam MEM_DEPTH        = RANGE_BINS * CHIRPS_PER_FRAME;  // 16
+    // Only RANGE_BINS is reduced; the FFT wrapper still expects 16 samples.
+    localparam RANGE_BINS       = 2;
+    localparam CHIRPS_PER_FRAME = 32;
+    localparam CHIRPS_PER_SUBFRAME = 16;
+    localparam DOPPLER_FFT_SIZE = 16;
+    localparam MEM_DEPTH        = RANGE_BINS * CHIRPS_PER_FRAME;

    // State encoding (mirrors DUT localparams)
    localparam S_IDLE       = 3'b000;
@@ -130,9 +131,11 @@ module fv_doppler_processor (
            assume(!data_valid);
    end

-    // new_chirp_frame must be a clean pulse (not during active processing)
+    // new_chirp_frame may assert during accumulation at the 16-chirp boundary.
+    // Only suppress it during FFT-processing states.
    always @(posedge clk) begin
-        if (reset_n && state != S_IDLE)
+        if (reset_n && (state == S_PRE_READ || state == S_LOAD_FFT ||
+                        state == S_FFT_WAIT || state == S_OUTPUT))
            assume(!new_chirp_frame);
    end

@@ -201,11 +204,17 @@ module fv_doppler_processor (
    end

    // ================================================================
-    // COVER 1: Complete processing of all range bins
+    // COVER 1: Complete processing of all range bins after a full frame was
+    // actually accumulated.
    // ================================================================
+    reg f_seen_full;
+    initial f_seen_full = 1'b0;
+    always @(posedge clk)
+        if (frame_buffer_full) f_seen_full <= 1'b1;
+
    always @(posedge clk) begin
        if (reset_n)
-            cover(frame_complete && f_past_valid);
+            cover(frame_complete && f_seen_full);
    end

    // ================================================================
@@ -6,7 +6,7 @@ cover
 bmc: mode bmc
 bmc: depth 200
 cover: mode cover
-cover: depth 200
+cover: depth 600

 [engines]
 smtbmc z3
@@ -66,13 +66,15 @@ module fv_radar_mode_controller (
    (* anyseq *) wire       stm32_new_azimuth;
    (* anyseq *) wire       trigger;

-    // Gap 2: Formal cfg_* inputs — solver-driven for exhaustive coverage
-    (* anyseq *) wire [15:0] cfg_long_chirp_cycles;
-    (* anyseq *) wire [15:0] cfg_long_listen_cycles;
-    (* anyseq *) wire [15:0] cfg_guard_cycles;
-    (* anyseq *) wire [15:0] cfg_short_chirp_cycles;
-    (* anyseq *) wire [15:0] cfg_short_listen_cycles;
-    (* anyseq *) wire [5:0]  cfg_chirps_per_elev;
+    // Runtime config inputs are pinned to the reduced localparams so this
+    // wrapper proves one tractable configuration. It does not sweep the full
+    // runtime-configurable cfg_* space.
+    wire [15:0] cfg_long_chirp_cycles   = LONG_CHIRP_CYCLES;
+    wire [15:0] cfg_long_listen_cycles  = LONG_LISTEN_CYCLES;
+    wire [15:0] cfg_guard_cycles        = GUARD_CYCLES;
+    wire [15:0] cfg_short_chirp_cycles  = SHORT_CHIRP_CYCLES;
+    wire [15:0] cfg_short_listen_cycles = SHORT_LISTEN_CYCLES;
+    wire [5:0]  cfg_chirps_per_elev     = CHIRPS_PER_ELEVATION;

    // ================================================================
    // DUT outputs
@@ -109,7 +111,8 @@ module fv_radar_mode_controller (
        .stm32_new_elevation(stm32_new_elevation),
        .stm32_new_azimuth  (stm32_new_azimuth),
        .trigger            (trigger),
-        // Gap 2: Runtime-configurable timing inputs
+        // Runtime-configurable timing ports, bound here to the reduced wrapper
+        // constants for tractable proof depth.
        .cfg_long_chirp_cycles  (cfg_long_chirp_cycles),
        .cfg_long_listen_cycles (cfg_long_listen_cycles),
        .cfg_guard_cycles       (cfg_guard_cycles),
@@ -181,7 +184,7 @@ module fv_radar_mode_controller (
    // ================================================================
    always @(posedge clk) begin
        if (reset_n && f_past_valid) begin
-            if ($past(mode) == 2'b10 &&
+            if ($past(mode) == 2'b10 && mode == 2'b10 &&
                $past(scan_state) == S_LONG_LISTEN &&
                $past(timer) == LONG_LISTEN_CYCLES - 1)
                assert(scan_state == S_IDLE);
@@ -202,11 +205,11 @@ module fv_radar_mode_controller (
    end

    // ================================================================
-    // COVER 1: Full scan completes (scan_complete pulses)
+    // COVER 1: Full auto-scan completes
    // ================================================================
    always @(posedge clk) begin
        if (reset_n)
-            cover(scan_complete);
+            cover(scan_complete && mode == 2'b01);
    end

    // ================================================================
@@ -217,11 +217,12 @@ reg [5:0]  host_chirps_per_elev;      // Opcode 0x15 (default 32)
 reg        host_status_request;       // Opcode 0xFF (self-clearing pulse)

 // Fix 4: Doppler/chirps mismatch protection
-// DOPPLER_FFT_SIZE is compile-time (32). If host sets chirps_per_elev to a
-// different value, Doppler accumulation is corrupted. Clamp at command decode
-// and flag the mismatch so the host knows.
-localparam DOPPLER_FFT_SIZE = 32;     // Must match doppler_processor parameter
-reg        chirps_mismatch_error;     // Set if host tried to set chirps != FFT size
+// DOPPLER_FRAME_CHIRPS is the fixed chirp count expected by the staggered-PRI
+// Doppler path (16 long + 16 short). If host sets chirps_per_elev to a
+// different value, Doppler accumulation is corrupted. Clamp at command decode
+// and flag the mismatch so the host knows.
+localparam DOPPLER_FRAME_CHIRPS = 32; // Total chirps per Doppler frame
+reg        chirps_mismatch_error;     // Set if host tried to set chirps != FFT size

 // Fix 7: Range-mode register (opcode 0x20)
 // Future-proofing for 3km/10km antenna switching.
@@ -532,16 +533,21 @@ assign rx_doppler_data_valid = rx_doppler_valid;
 // ============================================================================
 // DC NOTCH FILTER (post-Doppler-FFT, pre-CFAR)
 // ============================================================================
-// Zeros out Doppler bins within ±host_dc_notch_width of DC (bin 0).
-// In a 32-point FFT, DC is bin 0; negative Doppler wraps to bins 31,30,...
-// notch_width=1 → zero bins {0}. notch_width=2 → zero bins {0,1,31}. etc.
-// When host_dc_notch_width=0: pass-through (no zeroing).
-
-wire dc_notch_active;
-wire [4:0] dop_bin_unsigned = rx_doppler_bin;
-assign dc_notch_active = (host_dc_notch_width != 3'd0) &&
-                          (dop_bin_unsigned < {2'b0, host_dc_notch_width} ||
-                           dop_bin_unsigned > (5'd31 - {2'b0, host_dc_notch_width} + 5'd1));
+// Zeros out Doppler bins within ±host_dc_notch_width of DC for BOTH
+// sub-frames in the dual 16-pt FFT architecture.
+// doppler_bin[4:0] = {sub_frame, bin[3:0]}:
+//   Sub-frame 0: bins 0-15,  DC = bin 0,  wrap = bin 15
+//   Sub-frame 1: bins 16-31, DC = bin 16, wrap = bin 31
+// notch_width=1 → zero bins {0,16}. notch_width=2 → zero bins
+// {0,1,15,16,17,31}. etc.
+// When host_dc_notch_width=0: pass-through (no zeroing).
+
+wire dc_notch_active;
+wire [4:0] dop_bin_unsigned = rx_doppler_bin;
+wire [3:0] bin_within_sf = dop_bin_unsigned[3:0];
+assign dc_notch_active = (host_dc_notch_width != 3'd0) &&
+                          (bin_within_sf < {1'b0, host_dc_notch_width} ||
+                           bin_within_sf > (4'd15 - {1'b0, host_dc_notch_width} + 4'd1));

 // Notched Doppler data: zero I/Q when in notch zone, pass through otherwise
 wire [31:0] notched_doppler_data  = dc_notch_active ? 32'd0 : rx_doppler_output;
@@ -814,19 +820,19 @@ always @(posedge clk_100m_buf or negedge sys_reset_n) begin
                8'h13: host_short_chirp_cycles <= usb_cmd_value;
                8'h14: host_short_listen_cycles <= usb_cmd_value;
                8'h15: begin
-                    // Fix 4: Clamp chirps_per_elev to DOPPLER_FFT_SIZE.
-                    // If host requests a different value, clamp and set error flag.
-                    if (usb_cmd_value[5:0] > DOPPLER_FFT_SIZE[5:0]) begin
-                        host_chirps_per_elev  <= DOPPLER_FFT_SIZE[5:0];
-                        chirps_mismatch_error <= 1'b1;
-                    end else if (usb_cmd_value[5:0] == 6'd0) begin
-                        host_chirps_per_elev  <= DOPPLER_FFT_SIZE[5:0];
-                        chirps_mismatch_error <= 1'b1;
-                    end else begin
-                        host_chirps_per_elev  <= usb_cmd_value[5:0];
-                        // Clear error only if value matches FFT size exactly
-                        chirps_mismatch_error <= (usb_cmd_value[5:0] != DOPPLER_FFT_SIZE[5:0]);
-                    end
+                    // Fix 4: Clamp chirps_per_elev to the fixed Doppler frame size.
+                    // If host requests a different value, clamp and set error flag.
+                    if (usb_cmd_value[5:0] > DOPPLER_FRAME_CHIRPS[5:0]) begin
+                        host_chirps_per_elev  <= DOPPLER_FRAME_CHIRPS[5:0];
+                        chirps_mismatch_error <= 1'b1;
+                    end else if (usb_cmd_value[5:0] == 6'd0) begin
+                        host_chirps_per_elev  <= DOPPLER_FRAME_CHIRPS[5:0];
+                        chirps_mismatch_error <= 1'b1;
+                    end else begin
+                        host_chirps_per_elev  <= usb_cmd_value[5:0];
+                        // Clear error only if value matches FFT size exactly
+                        chirps_mismatch_error <= (usb_cmd_value[5:0] != DOPPLER_FRAME_CHIRPS[5:0]);
+                    end
                end
                8'h16: host_gain_shift         <= usb_cmd_value[3:0];  // Fix 3: digital gain
                8'h20: host_range_mode         <= usb_cmd_value[1:0];  // Fix 7: range mode
@@ -912,4 +918,4 @@ always @(posedge clk_100m_buf) begin
 end
 `endif

-endmodule
+endmodule
@@ -76,23 +76,20 @@ FFT_DATA_W = 16
 FFT_INTERNAL_W = 32
 FFT_TWIDDLE_W = 16

-# Doppler
-DOPPLER_FFT_SIZE = 32
+# Doppler — dual 16-pt FFT architecture
+DOPPLER_FFT_SIZE = 16            # per sub-frame
+DOPPLER_TOTAL_BINS = 32          # total output (2 sub-frames x 16)
 DOPPLER_RANGE_BINS = 64
 DOPPLER_CHIRPS = 32
+CHIRPS_PER_SUBFRAME = 16
 DOPPLER_WINDOW_TYPE = 0          # Hamming

-# Hamming window coefficients from doppler_processor.v (Q15)
+# 16-point Hamming window coefficients from doppler_processor.v (Q15)
 HAMMING_Q15 = [
-    0x0800, 0x0862, 0x09CB, 0x0C3B,
-    0x0FB2, 0x142F, 0x19B2, 0x2039,
-    0x27C4, 0x3050, 0x39DB, 0x4462,
-    0x4FE3, 0x5C5A, 0x69C4, 0x781D,
-    0x7FFF,  # Peak
-    0x781D, 0x69C4, 0x5C5A, 0x4FE3,
-    0x4462, 0x39DB, 0x3050, 0x27C4,
-    0x2039, 0x19B2, 0x142F, 0x0FB2,
-    0x0C3B, 0x09CB, 0x0862,
+    0x0A3D, 0x0E5C, 0x1B6D, 0x3088,
+    0x4B33, 0x6573, 0x7642, 0x7F62,
+    0x7F62, 0x7642, 0x6573, 0x4B33,
+    0x3088, 0x1B6D, 0x0E5C, 0x0A3D,
 ]

 # ADI dataset parameters
@@ -652,110 +649,111 @@ def run_range_bin_decimator(range_fft_i, range_fft_q,


 # ===========================================================================
-# Stage 3: Doppler FFT (32-point with Hamming window, bit-accurate)
+# Stage 3: Doppler FFT (dual 16-point with Hamming window, bit-accurate)
 # ===========================================================================
-def run_doppler_fft(range_data_i, range_data_q, twiddle_file_32=None):
+def run_doppler_fft(range_data_i, range_data_q, twiddle_file_16=None):
    """
-    Bit-accurate Doppler processor matching doppler_processor.v.
-    
+    Bit-accurate Doppler processor matching doppler_processor.v (dual 16-pt FFT).
+
    Input: range_data_i/q shape (DOPPLER_CHIRPS, FFT_SIZE) — 16-bit signed
           Only first DOPPLER_RANGE_BINS columns are processed.
-    Output: doppler_map_i/q shape (DOPPLER_RANGE_BINS, DOPPLER_FFT_SIZE) — 16-bit signed
-    
-    Pipeline per range bin:
-    1. Read 32 chirps for this range bin
-    2. Apply Hamming window (Q15 multiply + round >>> 15)
-    3. 32-point FFT
+    Output: doppler_map_i/q shape (DOPPLER_RANGE_BINS, DOPPLER_TOTAL_BINS) — 16-bit signed
+
+    Architecture per range bin:
+      Sub-frame 0 (long PRI):  chirps 0..15  → 16-pt Hamming → 16-pt FFT → bins 0-15
+      Sub-frame 1 (short PRI): chirps 16..31 → 16-pt Hamming → 16-pt FFT → bins 16-31
    """
    n_chirps = DOPPLER_CHIRPS
    n_range = DOPPLER_RANGE_BINS
    n_fft = DOPPLER_FFT_SIZE
-    
-    print(f"[DOPPLER] Processing {n_range} range bins x {n_chirps} chirps → {n_fft}-point FFT")
-    
-    # Build Hamming window as signed 16-bit
+    n_total = DOPPLER_TOTAL_BINS
+    n_sf = CHIRPS_PER_SUBFRAME
+
+    print(f"[DOPPLER] Processing {n_range} range bins x {n_chirps} chirps → dual {n_fft}-point FFT")
+
+    # Build 16-point Hamming window as signed 16-bit
    hamming = np.array([int(v) for v in HAMMING_Q15], dtype=np.int64)
    assert len(hamming) == n_fft, f"Hamming length {len(hamming)} != {n_fft}"
-    
-    # Build 32-point twiddle factors
-    if twiddle_file_32 and os.path.exists(twiddle_file_32):
-        cos_rom_32 = load_twiddle_rom(twiddle_file_32)
+
+    # Build 16-point twiddle factors
+    if twiddle_file_16 and os.path.exists(twiddle_file_16):
+        cos_rom_16 = load_twiddle_rom(twiddle_file_16)
    else:
-        cos_rom_32 = np.round(32767 * np.cos(2 * np.pi * np.arange(n_fft // 4) / n_fft)).astype(np.int64)
-    
-    doppler_map_i = np.zeros((n_range, n_fft), dtype=np.int64)
-    doppler_map_q = np.zeros((n_range, n_fft), dtype=np.int64)
-    
+        cos_rom_16 = np.round(32767 * np.cos(2 * np.pi * np.arange(n_fft // 4) / n_fft)).astype(np.int64)
+
+    LOG2N_16 = 4
+    doppler_map_i = np.zeros((n_range, n_total), dtype=np.int64)
+    doppler_map_q = np.zeros((n_range, n_total), dtype=np.int64)
+
    for rbin in range(n_range):
-        # Extract chirp stack for this range bin
        chirp_i = np.zeros(n_chirps, dtype=np.int64)
        chirp_q = np.zeros(n_chirps, dtype=np.int64)
        for c in range(n_chirps):
            chirp_i[c] = int(range_data_i[c, rbin])
            chirp_q[c] = int(range_data_q[c, rbin])
-        
-        # Apply Hamming window (Q15 multiply with rounding)
-        windowed_i = np.zeros(n_fft, dtype=np.int64)
-        windowed_q = np.zeros(n_fft, dtype=np.int64)
-        for k in range(n_fft):
-            # 16-bit x 16-bit = 32-bit, then round and shift >>> 15
-            mult_i = chirp_i[k] * hamming[k]
-            mult_q = chirp_q[k] * hamming[k]
-            windowed_i[k] = saturate((mult_i + (1 << 14)) >> 15, 16)
-            windowed_q[k] = saturate((mult_q + (1 << 14)) >> 15, 16)
-        
-        # 32-point FFT (same algorithm as range FFT, different N)
-        LOG2N_32 = 5
-        mem_re = np.zeros(n_fft, dtype=np.int64)
-        mem_im = np.zeros(n_fft, dtype=np.int64)
-        
-        # Bit-reversed loading, sign-extend to 32-bit
-        for n in range(n_fft):
-            br = 0
-            for b in range(LOG2N_32):
-                if n & (1 << b):
-                    br |= (1 << (LOG2N_32 - 1 - b))
-            mem_re[br] = windowed_i[n]
-            mem_im[br] = windowed_q[n]
-        
-        # Butterfly stages
-        half = 1
-        for stg in range(LOG2N_32):
-            for bfly in range(n_fft // 2):
-                idx = bfly & (half - 1)
-                grp = bfly - idx
-                addr_even = (grp << 1) | idx
-                addr_odd = addr_even + half
-                
-                tw_idx = (idx << (LOG2N_32 - 1 - stg)) % (n_fft // 2)
-                tw_cos, tw_sin = fft_twiddle_lookup(tw_idx, n_fft, cos_rom_32)
-                
-                a_re = mem_re[addr_even]
-                a_im = mem_im[addr_even]
-                b_re = mem_re[addr_odd]
-                b_im = mem_im[addr_odd]
-                
-                prod_re = b_re * tw_cos + b_im * tw_sin
-                prod_im = b_im * tw_cos - b_re * tw_sin
-                
-                prod_re_shifted = prod_re >> 15
-                prod_im_shifted = prod_im >> 15
-                
-                mem_re[addr_even] = a_re + prod_re_shifted
-                mem_im[addr_even] = a_im + prod_im_shifted
-                mem_re[addr_odd] = a_re - prod_re_shifted
-                mem_im[addr_odd] = a_im - prod_im_shifted
-            
-            half <<= 1
-        
-        # Saturate 32-bit → 16-bit
-        for n in range(n_fft):
-            doppler_map_i[rbin, n] = saturate(mem_re[n], 16)
-            doppler_map_q[rbin, n] = saturate(mem_im[n], 16)
-    
-    print(f"  Doppler map: shape ({n_range}, {n_fft}), "
+
+        # Process each sub-frame independently
+        for sf in range(2):
+            chirp_start = sf * n_sf
+            bin_offset = sf * n_fft
+
+            windowed_i = np.zeros(n_fft, dtype=np.int64)
+            windowed_q = np.zeros(n_fft, dtype=np.int64)
+            for k in range(n_fft):
+                ci = chirp_i[chirp_start + k]
+                cq = chirp_q[chirp_start + k]
+                mult_i = ci * hamming[k]
+                mult_q = cq * hamming[k]
+                windowed_i[k] = saturate((mult_i + (1 << 14)) >> 15, 16)
+                windowed_q[k] = saturate((mult_q + (1 << 14)) >> 15, 16)
+
+            mem_re = np.zeros(n_fft, dtype=np.int64)
+            mem_im = np.zeros(n_fft, dtype=np.int64)
+
+            for n in range(n_fft):
+                br = 0
+                for b in range(LOG2N_16):
+                    if n & (1 << b):
+                        br |= (1 << (LOG2N_16 - 1 - b))
+                mem_re[br] = windowed_i[n]
+                mem_im[br] = windowed_q[n]
+
+            half = 1
+            for stg in range(LOG2N_16):
+                for bfly in range(n_fft // 2):
+                    idx = bfly & (half - 1)
+                    grp = bfly - idx
+                    addr_even = (grp << 1) | idx
+                    addr_odd = addr_even + half
+
+                    tw_idx = (idx << (LOG2N_16 - 1 - stg)) % (n_fft // 2)
+                    tw_cos, tw_sin = fft_twiddle_lookup(tw_idx, n_fft, cos_rom_16)
+
+                    a_re = mem_re[addr_even]
+                    a_im = mem_im[addr_even]
+                    b_re = mem_re[addr_odd]
+                    b_im = mem_im[addr_odd]
+
+                    prod_re = b_re * tw_cos + b_im * tw_sin
+                    prod_im = b_im * tw_cos - b_re * tw_sin
+
+                    prod_re_shifted = prod_re >> 15
+                    prod_im_shifted = prod_im >> 15
+
+                    mem_re[addr_even] = a_re + prod_re_shifted
+                    mem_im[addr_even] = a_im + prod_im_shifted
+                    mem_re[addr_odd] = a_re - prod_re_shifted
+                    mem_im[addr_odd] = a_im - prod_im_shifted
+
+                half <<= 1
+
+            for n in range(n_fft):
+                doppler_map_i[rbin, bin_offset + n] = saturate(mem_re[n], 16)
+                doppler_map_q[rbin, bin_offset + n] = saturate(mem_im[n], 16)
+
+    print(f"  Doppler map: shape ({n_range}, {n_total}), "
          f"I range [{doppler_map_i.min()}, {doppler_map_i.max()}]")
-    
+
    return doppler_map_i, doppler_map_q


@@ -821,23 +819,24 @@ def run_dc_notch(doppler_i, doppler_q, width=2):
    Input:  doppler_i/q — shape (NUM_RANGE_BINS, NUM_DOPPLER_BINS), 16-bit signed
    Output: notched_i/q — shape (NUM_RANGE_BINS, NUM_DOPPLER_BINS), 16-bit signed

-    Zeros Doppler bins within ±width of DC (bin 0).
-    In a 32-point FFT, DC is bin 0; negative Doppler wraps to bins 31,30,...
+    Zeros Doppler bins within ±width of DC for BOTH sub-frames.
+    doppler_bin[4:0] = {sub_frame, bin[3:0]}:
+      Sub-frame 0: bins 0-15,  DC = bin 0,  wrap = bin 15
+      Sub-frame 1: bins 16-31, DC = bin 16, wrap = bin 31
      width=0: pass-through
-      width=1: zero bins {0}
-      width=2: zero bins {0, 1, 31}
-      width=3: zero bins {0, 1, 2, 30, 31}  etc.
+      width=1: zero bins {0, 16}
+      width=2: zero bins {0, 1, 15, 16, 17, 31}  etc.

-    RTL logic (from radar_system_top.v lines 517-524):
+    RTL logic (from radar_system_top.v):
+      bin_within_sf = dop_bin[3:0]
      dc_notch_active = (width != 0) &&
-                        (dop_bin < width || dop_bin > (31 - width + 1))
-      notched_data = dc_notch_active ? 0 : doppler_data
+                        (bin_within_sf < width || bin_within_sf > (15 - width + 1))
    """
    n_range, n_doppler = doppler_i.shape
    notched_i = doppler_i.copy()
    notched_q = doppler_q.copy()

-    print(f"[DC NOTCH] width={width}, {n_range} range bins x {n_doppler} Doppler bins")
+    print(f"[DC NOTCH] width={width}, {n_range} range bins x {n_doppler} Doppler bins (dual sub-frame)")

    if width == 0:
        print(f"  Pass-through (width=0)")
@@ -845,9 +844,8 @@ def run_dc_notch(doppler_i, doppler_q, width=2):

    zeroed_count = 0
    for dbin in range(n_doppler):
-        # Replicate RTL comparison (unsigned 5-bit):
-        #   dop_bin < width  OR  dop_bin > (31 - width + 1)
-        active = (dbin < width) or (dbin > (31 - width + 1))
+        bin_within_sf = dbin & 0xF
+        active = (bin_within_sf < width) or (bin_within_sf > (15 - width + 1))
        if active:
            notched_i[:, dbin] = 0
            notched_q[:, dbin] = 0
@@ -1049,11 +1047,15 @@ def run_float_reference(iq_i, iq_q):
    n_range = min(DOPPLER_RANGE_BINS, n_samples)
    hamming_float = np.array(HAMMING_Q15, dtype=np.float64) / 32768.0
    
-    doppler_map = np.zeros((n_range, DOPPLER_FFT_SIZE), dtype=np.complex128)
+    doppler_map = np.zeros((n_range, DOPPLER_TOTAL_BINS), dtype=np.complex128)
    for rbin in range(n_range):
        chirp_stack = range_fft[:DOPPLER_CHIRPS, rbin]
-        windowed = chirp_stack * hamming_float
-        doppler_map[rbin, :] = np.fft.fft(windowed)
+        for sf in range(2):
+            sf_start = sf * CHIRPS_PER_SUBFRAME
+            sf_end = sf_start + CHIRPS_PER_SUBFRAME
+            bin_offset = sf * DOPPLER_FFT_SIZE
+            windowed = chirp_stack[sf_start:sf_end] * hamming_float
+            doppler_map[rbin, bin_offset:bin_offset + DOPPLER_FFT_SIZE] = np.fft.fft(windowed)
    
    return range_fft, doppler_map

@@ -1235,10 +1237,10 @@ def main():
    # Run Doppler FFT (bit-accurate) — "direct" path (first 64 bins)
    # -----------------------------------------------------------------------
    print(f"\n{'=' * 72}")
-    print("Stage 3: Doppler FFT (32-point with Hamming window)")
+    print("Stage 3: Doppler FFT (dual 16-point with Hamming window)")
    print("  [direct path: first 64 range bins, no decimation]")
-    twiddle_32 = os.path.join(fpga_dir, "fft_twiddle_32.mem")
-    doppler_i, doppler_q = run_doppler_fft(all_range_i, all_range_q, twiddle_file_32=twiddle_32)
+    twiddle_16 = os.path.join(fpga_dir, "fft_twiddle_16.mem")
+    doppler_i, doppler_q = run_doppler_fft(all_range_i, all_range_q, twiddle_file_16=twiddle_16)
    write_hex_files(output_dir, doppler_i, doppler_q, "doppler_map")
    
    # -----------------------------------------------------------------------
@@ -1276,7 +1278,7 @@ def main():
    print(f"\n{'=' * 72}")
    print("Stage 3b: Doppler FFT on decimated data (full-chain path)")
    fc_doppler_i, fc_doppler_q = run_doppler_fft(
-        decim_i, decim_q, twiddle_file_32=twiddle_32
+        decim_i, decim_q, twiddle_file_16=twiddle_16
    )
    write_hex_files(output_dir, fc_doppler_i, fc_doppler_q, "fullchain_doppler_ref")
    
@@ -1284,12 +1286,12 @@ def main():
    fc_doppler_packed_file = os.path.join(output_dir, "fullchain_doppler_ref_packed.hex")
    with open(fc_doppler_packed_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                i_val = int(fc_doppler_i[rbin, dbin]) & 0xFFFF
                q_val = int(fc_doppler_q[rbin, dbin]) & 0xFFFF
                packed = (q_val << 16) | i_val
                f.write(f"{packed:08X}\n")
-    print(f"  Wrote {fc_doppler_packed_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} packed IQ words)")
+    print(f"  Wrote {fc_doppler_packed_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} packed IQ words)")
    
    # Save numpy arrays for the full-chain path
    np.save(os.path.join(output_dir, "decimated_range_i.npy"), decim_i)
@@ -1313,7 +1315,7 @@ def main():
    print(f"\n{'=' * 72}")
    print("Stage 3b+c: Doppler FFT on MTI-filtered decimated data")
    mti_doppler_i, mti_doppler_q = run_doppler_fft(
-        mti_i, mti_q, twiddle_file_32=twiddle_32
+        mti_i, mti_q, twiddle_file_16=twiddle_16
    )
    write_hex_files(output_dir, mti_doppler_i, mti_doppler_q, "fullchain_mti_doppler_ref")
    np.save(os.path.join(output_dir, "fullchain_mti_doppler_i.npy"), mti_doppler_i)
@@ -1330,12 +1332,12 @@ def main():
    fc_notched_packed_file = os.path.join(output_dir, "fullchain_notched_ref_packed.hex")
    with open(fc_notched_packed_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                i_val = int(notched_i[rbin, dbin]) & 0xFFFF
                q_val = int(notched_q[rbin, dbin]) & 0xFFFF
                packed = (q_val << 16) | i_val
                f.write(f"{packed:08X}\n")
-    print(f"  Wrote {fc_notched_packed_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} packed IQ words)")
+    print(f"  Wrote {fc_notched_packed_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} packed IQ words)")
    
    # CFAR on DC-notched data
    CFAR_GUARD = 2
@@ -1355,28 +1357,28 @@ def main():
    cfar_mag_file = os.path.join(output_dir, "fullchain_cfar_mag.hex")
    with open(cfar_mag_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                m = int(cfar_mag[rbin, dbin]) & 0x1FFFF
                f.write(f"{m:05X}\n")
-    print(f"  Wrote {cfar_mag_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} mag values)")
+    print(f"  Wrote {cfar_mag_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} mag values)")
    
    # 2. Threshold map (17-bit unsigned)
    cfar_thr_file = os.path.join(output_dir, "fullchain_cfar_thr.hex")
    with open(cfar_thr_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                t = int(cfar_thr[rbin, dbin]) & 0x1FFFF
                f.write(f"{t:05X}\n")
-    print(f"  Wrote {cfar_thr_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} threshold values)")
+    print(f"  Wrote {cfar_thr_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} threshold values)")
    
    # 3. Detection flags (1-bit per cell)
    cfar_det_file = os.path.join(output_dir, "fullchain_cfar_det.hex")
    with open(cfar_det_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                d = 1 if cfar_flags[rbin, dbin] else 0
                f.write(f"{d:01X}\n")
-    print(f"  Wrote {cfar_det_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} detection flags)")
+    print(f"  Wrote {cfar_det_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} detection flags)")
    
    # 4. Detection list (text)
    cfar_detections = np.argwhere(cfar_flags)
@@ -1416,10 +1418,10 @@ def main():
    fc_det_mag_file = os.path.join(output_dir, "fullchain_detection_mag.hex")
    with open(fc_det_mag_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                m = int(fc_mag[rbin, dbin]) & 0x1FFFF  # 17-bit unsigned
                f.write(f"{m:05X}\n")
-    print(f"  Wrote {fc_det_mag_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} magnitude values)")
+    print(f"  Wrote {fc_det_mag_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} magnitude values)")
    
    # -----------------------------------------------------------------------
    # Run detection on direct-path Doppler map (for backward compatibility)