fix(rtl,gui,cosim,formal): adapt surrounding files for dual 16-pt FFT (follow-up to PR #33)

- radar_system_top.v: DC notch now masks to dop_bin[3:0] per sub-frame so both sub-frames get their DC zeroed correctly; rename DOPPLER_FFT_SIZE → DOPPLER_FRAME_CHIRPS to avoid confusion with the per-FFT size (now 16) - radar_dashboard.py: remove fftshift (crosses sub-frame boundary), display raw Doppler bins, remove dead velocity constants - golden_reference.py: model dual 16-pt FFT with per-sub-frame Hamming window, update DC notch and CFAR to match RTL - fv_doppler_processor.sby: reference xfft_16.v / fft_twiddle_16.mem, raise BMC depth to 512 and cover to 1024 - fv_radar_mode_controller.sby: raise cover depth to 600 - fv_radar_mode_controller.v: pin cfg_* to reduced constants (documented as single-config proof), fix Property 5 mode guard, strengthen Cover 1 - STALE_NOTICE.md: document that real-data hex files are stale and need regeneration with external dataset Closes #39
2026-04-06 23:15:50 +03:00
parent 22758fa370
commit ffc89f0bbd
8 changed files with 218 additions and 215 deletions
@@ -76,23 +76,20 @@ FFT_DATA_W = 16
 FFT_INTERNAL_W = 32
 FFT_TWIDDLE_W = 16

-# Doppler
-DOPPLER_FFT_SIZE = 32
+# Doppler — dual 16-pt FFT architecture
+DOPPLER_FFT_SIZE = 16            # per sub-frame
+DOPPLER_TOTAL_BINS = 32          # total output (2 sub-frames x 16)
 DOPPLER_RANGE_BINS = 64
 DOPPLER_CHIRPS = 32
+CHIRPS_PER_SUBFRAME = 16
 DOPPLER_WINDOW_TYPE = 0          # Hamming

-# Hamming window coefficients from doppler_processor.v (Q15)
+# 16-point Hamming window coefficients from doppler_processor.v (Q15)
 HAMMING_Q15 = [
-    0x0800, 0x0862, 0x09CB, 0x0C3B,
-    0x0FB2, 0x142F, 0x19B2, 0x2039,
-    0x27C4, 0x3050, 0x39DB, 0x4462,
-    0x4FE3, 0x5C5A, 0x69C4, 0x781D,
-    0x7FFF,  # Peak
-    0x781D, 0x69C4, 0x5C5A, 0x4FE3,
-    0x4462, 0x39DB, 0x3050, 0x27C4,
-    0x2039, 0x19B2, 0x142F, 0x0FB2,
-    0x0C3B, 0x09CB, 0x0862,
+    0x0A3D, 0x0E5C, 0x1B6D, 0x3088,
+    0x4B33, 0x6573, 0x7642, 0x7F62,
+    0x7F62, 0x7642, 0x6573, 0x4B33,
+    0x3088, 0x1B6D, 0x0E5C, 0x0A3D,
 ]

 # ADI dataset parameters
@@ -652,110 +649,111 @@ def run_range_bin_decimator(range_fft_i, range_fft_q,


 # ===========================================================================
-# Stage 3: Doppler FFT (32-point with Hamming window, bit-accurate)
+# Stage 3: Doppler FFT (dual 16-point with Hamming window, bit-accurate)
 # ===========================================================================
-def run_doppler_fft(range_data_i, range_data_q, twiddle_file_32=None):
+def run_doppler_fft(range_data_i, range_data_q, twiddle_file_16=None):
    """
-    Bit-accurate Doppler processor matching doppler_processor.v.
-    
+    Bit-accurate Doppler processor matching doppler_processor.v (dual 16-pt FFT).
+
    Input: range_data_i/q shape (DOPPLER_CHIRPS, FFT_SIZE) — 16-bit signed
           Only first DOPPLER_RANGE_BINS columns are processed.
-    Output: doppler_map_i/q shape (DOPPLER_RANGE_BINS, DOPPLER_FFT_SIZE) — 16-bit signed
-    
-    Pipeline per range bin:
-    1. Read 32 chirps for this range bin
-    2. Apply Hamming window (Q15 multiply + round >>> 15)
-    3. 32-point FFT
+    Output: doppler_map_i/q shape (DOPPLER_RANGE_BINS, DOPPLER_TOTAL_BINS) — 16-bit signed
+
+    Architecture per range bin:
+      Sub-frame 0 (long PRI):  chirps 0..15  → 16-pt Hamming → 16-pt FFT → bins 0-15
+      Sub-frame 1 (short PRI): chirps 16..31 → 16-pt Hamming → 16-pt FFT → bins 16-31
    """
    n_chirps = DOPPLER_CHIRPS
    n_range = DOPPLER_RANGE_BINS
    n_fft = DOPPLER_FFT_SIZE
-    
-    print(f"[DOPPLER] Processing {n_range} range bins x {n_chirps} chirps → {n_fft}-point FFT")
-    
-    # Build Hamming window as signed 16-bit
+    n_total = DOPPLER_TOTAL_BINS
+    n_sf = CHIRPS_PER_SUBFRAME
+
+    print(f"[DOPPLER] Processing {n_range} range bins x {n_chirps} chirps → dual {n_fft}-point FFT")
+
+    # Build 16-point Hamming window as signed 16-bit
    hamming = np.array([int(v) for v in HAMMING_Q15], dtype=np.int64)
    assert len(hamming) == n_fft, f"Hamming length {len(hamming)} != {n_fft}"
-    
-    # Build 32-point twiddle factors
-    if twiddle_file_32 and os.path.exists(twiddle_file_32):
-        cos_rom_32 = load_twiddle_rom(twiddle_file_32)
+
+    # Build 16-point twiddle factors
+    if twiddle_file_16 and os.path.exists(twiddle_file_16):
+        cos_rom_16 = load_twiddle_rom(twiddle_file_16)
    else:
-        cos_rom_32 = np.round(32767 * np.cos(2 * np.pi * np.arange(n_fft // 4) / n_fft)).astype(np.int64)
-    
-    doppler_map_i = np.zeros((n_range, n_fft), dtype=np.int64)
-    doppler_map_q = np.zeros((n_range, n_fft), dtype=np.int64)
-    
+        cos_rom_16 = np.round(32767 * np.cos(2 * np.pi * np.arange(n_fft // 4) / n_fft)).astype(np.int64)
+
+    LOG2N_16 = 4
+    doppler_map_i = np.zeros((n_range, n_total), dtype=np.int64)
+    doppler_map_q = np.zeros((n_range, n_total), dtype=np.int64)
+
    for rbin in range(n_range):
-        # Extract chirp stack for this range bin
        chirp_i = np.zeros(n_chirps, dtype=np.int64)
        chirp_q = np.zeros(n_chirps, dtype=np.int64)
        for c in range(n_chirps):
            chirp_i[c] = int(range_data_i[c, rbin])
            chirp_q[c] = int(range_data_q[c, rbin])
-        
-        # Apply Hamming window (Q15 multiply with rounding)
-        windowed_i = np.zeros(n_fft, dtype=np.int64)
-        windowed_q = np.zeros(n_fft, dtype=np.int64)
-        for k in range(n_fft):
-            # 16-bit x 16-bit = 32-bit, then round and shift >>> 15
-            mult_i = chirp_i[k] * hamming[k]
-            mult_q = chirp_q[k] * hamming[k]
-            windowed_i[k] = saturate((mult_i + (1 << 14)) >> 15, 16)
-            windowed_q[k] = saturate((mult_q + (1 << 14)) >> 15, 16)
-        
-        # 32-point FFT (same algorithm as range FFT, different N)
-        LOG2N_32 = 5
-        mem_re = np.zeros(n_fft, dtype=np.int64)
-        mem_im = np.zeros(n_fft, dtype=np.int64)
-        
-        # Bit-reversed loading, sign-extend to 32-bit
-        for n in range(n_fft):
-            br = 0
-            for b in range(LOG2N_32):
-                if n & (1 << b):
-                    br |= (1 << (LOG2N_32 - 1 - b))
-            mem_re[br] = windowed_i[n]
-            mem_im[br] = windowed_q[n]
-        
-        # Butterfly stages
-        half = 1
-        for stg in range(LOG2N_32):
-            for bfly in range(n_fft // 2):
-                idx = bfly & (half - 1)
-                grp = bfly - idx
-                addr_even = (grp << 1) | idx
-                addr_odd = addr_even + half
-                
-                tw_idx = (idx << (LOG2N_32 - 1 - stg)) % (n_fft // 2)
-                tw_cos, tw_sin = fft_twiddle_lookup(tw_idx, n_fft, cos_rom_32)
-                
-                a_re = mem_re[addr_even]
-                a_im = mem_im[addr_even]
-                b_re = mem_re[addr_odd]
-                b_im = mem_im[addr_odd]
-                
-                prod_re = b_re * tw_cos + b_im * tw_sin
-                prod_im = b_im * tw_cos - b_re * tw_sin
-                
-                prod_re_shifted = prod_re >> 15
-                prod_im_shifted = prod_im >> 15
-                
-                mem_re[addr_even] = a_re + prod_re_shifted
-                mem_im[addr_even] = a_im + prod_im_shifted
-                mem_re[addr_odd] = a_re - prod_re_shifted
-                mem_im[addr_odd] = a_im - prod_im_shifted
-            
-            half <<= 1
-        
-        # Saturate 32-bit → 16-bit
-        for n in range(n_fft):
-            doppler_map_i[rbin, n] = saturate(mem_re[n], 16)
-            doppler_map_q[rbin, n] = saturate(mem_im[n], 16)
-    
-    print(f"  Doppler map: shape ({n_range}, {n_fft}), "
+
+        # Process each sub-frame independently
+        for sf in range(2):
+            chirp_start = sf * n_sf
+            bin_offset = sf * n_fft
+
+            windowed_i = np.zeros(n_fft, dtype=np.int64)
+            windowed_q = np.zeros(n_fft, dtype=np.int64)
+            for k in range(n_fft):
+                ci = chirp_i[chirp_start + k]
+                cq = chirp_q[chirp_start + k]
+                mult_i = ci * hamming[k]
+                mult_q = cq * hamming[k]
+                windowed_i[k] = saturate((mult_i + (1 << 14)) >> 15, 16)
+                windowed_q[k] = saturate((mult_q + (1 << 14)) >> 15, 16)
+
+            mem_re = np.zeros(n_fft, dtype=np.int64)
+            mem_im = np.zeros(n_fft, dtype=np.int64)
+
+            for n in range(n_fft):
+                br = 0
+                for b in range(LOG2N_16):
+                    if n & (1 << b):
+                        br |= (1 << (LOG2N_16 - 1 - b))
+                mem_re[br] = windowed_i[n]
+                mem_im[br] = windowed_q[n]
+
+            half = 1
+            for stg in range(LOG2N_16):
+                for bfly in range(n_fft // 2):
+                    idx = bfly & (half - 1)
+                    grp = bfly - idx
+                    addr_even = (grp << 1) | idx
+                    addr_odd = addr_even + half
+
+                    tw_idx = (idx << (LOG2N_16 - 1 - stg)) % (n_fft // 2)
+                    tw_cos, tw_sin = fft_twiddle_lookup(tw_idx, n_fft, cos_rom_16)
+
+                    a_re = mem_re[addr_even]
+                    a_im = mem_im[addr_even]
+                    b_re = mem_re[addr_odd]
+                    b_im = mem_im[addr_odd]
+
+                    prod_re = b_re * tw_cos + b_im * tw_sin
+                    prod_im = b_im * tw_cos - b_re * tw_sin
+
+                    prod_re_shifted = prod_re >> 15
+                    prod_im_shifted = prod_im >> 15
+
+                    mem_re[addr_even] = a_re + prod_re_shifted
+                    mem_im[addr_even] = a_im + prod_im_shifted
+                    mem_re[addr_odd] = a_re - prod_re_shifted
+                    mem_im[addr_odd] = a_im - prod_im_shifted
+
+                half <<= 1
+
+            for n in range(n_fft):
+                doppler_map_i[rbin, bin_offset + n] = saturate(mem_re[n], 16)
+                doppler_map_q[rbin, bin_offset + n] = saturate(mem_im[n], 16)
+
+    print(f"  Doppler map: shape ({n_range}, {n_total}), "
          f"I range [{doppler_map_i.min()}, {doppler_map_i.max()}]")
-    
+
    return doppler_map_i, doppler_map_q


@@ -821,23 +819,24 @@ def run_dc_notch(doppler_i, doppler_q, width=2):
    Input:  doppler_i/q — shape (NUM_RANGE_BINS, NUM_DOPPLER_BINS), 16-bit signed
    Output: notched_i/q — shape (NUM_RANGE_BINS, NUM_DOPPLER_BINS), 16-bit signed

-    Zeros Doppler bins within ±width of DC (bin 0).
-    In a 32-point FFT, DC is bin 0; negative Doppler wraps to bins 31,30,...
+    Zeros Doppler bins within ±width of DC for BOTH sub-frames.
+    doppler_bin[4:0] = {sub_frame, bin[3:0]}:
+      Sub-frame 0: bins 0-15,  DC = bin 0,  wrap = bin 15
+      Sub-frame 1: bins 16-31, DC = bin 16, wrap = bin 31
      width=0: pass-through
-      width=1: zero bins {0}
-      width=2: zero bins {0, 1, 31}
-      width=3: zero bins {0, 1, 2, 30, 31}  etc.
+      width=1: zero bins {0, 16}
+      width=2: zero bins {0, 1, 15, 16, 17, 31}  etc.

-    RTL logic (from radar_system_top.v lines 517-524):
+    RTL logic (from radar_system_top.v):
+      bin_within_sf = dop_bin[3:0]
      dc_notch_active = (width != 0) &&
-                        (dop_bin < width || dop_bin > (31 - width + 1))
-      notched_data = dc_notch_active ? 0 : doppler_data
+                        (bin_within_sf < width || bin_within_sf > (15 - width + 1))
    """
    n_range, n_doppler = doppler_i.shape
    notched_i = doppler_i.copy()
    notched_q = doppler_q.copy()

-    print(f"[DC NOTCH] width={width}, {n_range} range bins x {n_doppler} Doppler bins")
+    print(f"[DC NOTCH] width={width}, {n_range} range bins x {n_doppler} Doppler bins (dual sub-frame)")

    if width == 0:
        print(f"  Pass-through (width=0)")
@@ -845,9 +844,8 @@ def run_dc_notch(doppler_i, doppler_q, width=2):

    zeroed_count = 0
    for dbin in range(n_doppler):
-        # Replicate RTL comparison (unsigned 5-bit):
-        #   dop_bin < width  OR  dop_bin > (31 - width + 1)
-        active = (dbin < width) or (dbin > (31 - width + 1))
+        bin_within_sf = dbin & 0xF
+        active = (bin_within_sf < width) or (bin_within_sf > (15 - width + 1))
        if active:
            notched_i[:, dbin] = 0
            notched_q[:, dbin] = 0
@@ -1049,11 +1047,15 @@ def run_float_reference(iq_i, iq_q):
    n_range = min(DOPPLER_RANGE_BINS, n_samples)
    hamming_float = np.array(HAMMING_Q15, dtype=np.float64) / 32768.0
    
-    doppler_map = np.zeros((n_range, DOPPLER_FFT_SIZE), dtype=np.complex128)
+    doppler_map = np.zeros((n_range, DOPPLER_TOTAL_BINS), dtype=np.complex128)
    for rbin in range(n_range):
        chirp_stack = range_fft[:DOPPLER_CHIRPS, rbin]
-        windowed = chirp_stack * hamming_float
-        doppler_map[rbin, :] = np.fft.fft(windowed)
+        for sf in range(2):
+            sf_start = sf * CHIRPS_PER_SUBFRAME
+            sf_end = sf_start + CHIRPS_PER_SUBFRAME
+            bin_offset = sf * DOPPLER_FFT_SIZE
+            windowed = chirp_stack[sf_start:sf_end] * hamming_float
+            doppler_map[rbin, bin_offset:bin_offset + DOPPLER_FFT_SIZE] = np.fft.fft(windowed)
    
    return range_fft, doppler_map

@@ -1235,10 +1237,10 @@ def main():
    # Run Doppler FFT (bit-accurate) — "direct" path (first 64 bins)
    # -----------------------------------------------------------------------
    print(f"\n{'=' * 72}")
-    print("Stage 3: Doppler FFT (32-point with Hamming window)")
+    print("Stage 3: Doppler FFT (dual 16-point with Hamming window)")
    print("  [direct path: first 64 range bins, no decimation]")
-    twiddle_32 = os.path.join(fpga_dir, "fft_twiddle_32.mem")
-    doppler_i, doppler_q = run_doppler_fft(all_range_i, all_range_q, twiddle_file_32=twiddle_32)
+    twiddle_16 = os.path.join(fpga_dir, "fft_twiddle_16.mem")
+    doppler_i, doppler_q = run_doppler_fft(all_range_i, all_range_q, twiddle_file_16=twiddle_16)
    write_hex_files(output_dir, doppler_i, doppler_q, "doppler_map")
    
    # -----------------------------------------------------------------------
@@ -1276,7 +1278,7 @@ def main():
    print(f"\n{'=' * 72}")
    print("Stage 3b: Doppler FFT on decimated data (full-chain path)")
    fc_doppler_i, fc_doppler_q = run_doppler_fft(
-        decim_i, decim_q, twiddle_file_32=twiddle_32
+        decim_i, decim_q, twiddle_file_16=twiddle_16
    )
    write_hex_files(output_dir, fc_doppler_i, fc_doppler_q, "fullchain_doppler_ref")
    
@@ -1284,12 +1286,12 @@ def main():
    fc_doppler_packed_file = os.path.join(output_dir, "fullchain_doppler_ref_packed.hex")
    with open(fc_doppler_packed_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                i_val = int(fc_doppler_i[rbin, dbin]) & 0xFFFF
                q_val = int(fc_doppler_q[rbin, dbin]) & 0xFFFF
                packed = (q_val << 16) | i_val
                f.write(f"{packed:08X}\n")
-    print(f"  Wrote {fc_doppler_packed_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} packed IQ words)")
+    print(f"  Wrote {fc_doppler_packed_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} packed IQ words)")
    
    # Save numpy arrays for the full-chain path
    np.save(os.path.join(output_dir, "decimated_range_i.npy"), decim_i)
@@ -1313,7 +1315,7 @@ def main():
    print(f"\n{'=' * 72}")
    print("Stage 3b+c: Doppler FFT on MTI-filtered decimated data")
    mti_doppler_i, mti_doppler_q = run_doppler_fft(
-        mti_i, mti_q, twiddle_file_32=twiddle_32
+        mti_i, mti_q, twiddle_file_16=twiddle_16
    )
    write_hex_files(output_dir, mti_doppler_i, mti_doppler_q, "fullchain_mti_doppler_ref")
    np.save(os.path.join(output_dir, "fullchain_mti_doppler_i.npy"), mti_doppler_i)
@@ -1330,12 +1332,12 @@ def main():
    fc_notched_packed_file = os.path.join(output_dir, "fullchain_notched_ref_packed.hex")
    with open(fc_notched_packed_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                i_val = int(notched_i[rbin, dbin]) & 0xFFFF
                q_val = int(notched_q[rbin, dbin]) & 0xFFFF
                packed = (q_val << 16) | i_val
                f.write(f"{packed:08X}\n")
-    print(f"  Wrote {fc_notched_packed_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} packed IQ words)")
+    print(f"  Wrote {fc_notched_packed_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} packed IQ words)")
    
    # CFAR on DC-notched data
    CFAR_GUARD = 2
@@ -1355,28 +1357,28 @@ def main():
    cfar_mag_file = os.path.join(output_dir, "fullchain_cfar_mag.hex")
    with open(cfar_mag_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                m = int(cfar_mag[rbin, dbin]) & 0x1FFFF
                f.write(f"{m:05X}\n")
-    print(f"  Wrote {cfar_mag_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} mag values)")
+    print(f"  Wrote {cfar_mag_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} mag values)")
    
    # 2. Threshold map (17-bit unsigned)
    cfar_thr_file = os.path.join(output_dir, "fullchain_cfar_thr.hex")
    with open(cfar_thr_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                t = int(cfar_thr[rbin, dbin]) & 0x1FFFF
                f.write(f"{t:05X}\n")
-    print(f"  Wrote {cfar_thr_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} threshold values)")
+    print(f"  Wrote {cfar_thr_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} threshold values)")
    
    # 3. Detection flags (1-bit per cell)
    cfar_det_file = os.path.join(output_dir, "fullchain_cfar_det.hex")
    with open(cfar_det_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                d = 1 if cfar_flags[rbin, dbin] else 0
                f.write(f"{d:01X}\n")
-    print(f"  Wrote {cfar_det_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} detection flags)")
+    print(f"  Wrote {cfar_det_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} detection flags)")
    
    # 4. Detection list (text)
    cfar_detections = np.argwhere(cfar_flags)
@@ -1416,10 +1418,10 @@ def main():
    fc_det_mag_file = os.path.join(output_dir, "fullchain_detection_mag.hex")
    with open(fc_det_mag_file, 'w') as f:
        for rbin in range(DOPPLER_RANGE_BINS):
-            for dbin in range(DOPPLER_FFT_SIZE):
+            for dbin in range(DOPPLER_TOTAL_BINS):
                m = int(fc_mag[rbin, dbin]) & 0x1FFFF  # 17-bit unsigned
                f.write(f"{m:05X}\n")
-    print(f"  Wrote {fc_det_mag_file} ({DOPPLER_RANGE_BINS * DOPPLER_FFT_SIZE} magnitude values)")
+    print(f"  Wrote {fc_det_mag_file} ({DOPPLER_RANGE_BINS * DOPPLER_TOTAL_BINS} magnitude values)")
    
    # -----------------------------------------------------------------------
    # Run detection on direct-path Doppler map (for backward compatibility)