feat: 2048-pt FFT upgrade with decimation=4, 512 output bins, 6m spacing

Complete cross-layer upgrade from 1024-pt/64-bin to 2048-pt/512-bin FFT:

FPGA RTL (14+ modules):
- radar_params.vh: FFT_SIZE=2048, RANGE_BINS=512, 9-bit range, 6-bit stream
- fft_engine.v: 2048-pt FFT with XPM BRAM
- chirp_memory_loader_param.v: 2 segments x 2048 (was 4 x 1024)
- matched_filter_multi_segment.v: BRAM inference for overlap_cache, explicit ov_waddr
- mti_canceller.v: BRAM inference for prev_i/q arrays (was fabric FFs)
- doppler_processor.v: 16384-deep memory, 14-bit addressing
- cfar_ca.v: 512 rows, indentation fix
- radar_receiver_final.v: rising-edge detector for frame_complete, 11-bit sample_addr
- range_bin_decimator.v: 512 output bins
- usb_data_interface_ft2232h.v: bulk per-frame with Manhattan magnitude
- radar_mode_controller.v: XOR edge detector for toggle signals
- rx_gain_control.v: updated for new bin count

Python GUI + Protocol (8 files):
- radar_protocol.py: 512-bin bulk frame parser, LSB-first bitmap
- GUI_V65_Tk.py, v7/*.py: updated for 512 bins, 6m range resolution

Golden data + tests:
- All .hex/.csv/.npy golden references regenerated for 2048/512
- fft_twiddle_2048.mem added
- Deleted stale seg2/seg3 chirp mem files
- 9 new bulk frame cross-layer tests, deleted 6 stale per-sample tests
- Deleted stale tb_cross_layer_ft2232h.v and dead contract_parser functions
- Updated validate_mem_files.py for 2048/2-segment config

MCU: RadarSettings.cpp max_distance/map_size 1536->3072

All 4 CI jobs pass: 285 tests, 0 failures, 0 skips
This commit is contained in:
Jason
2026-04-16 17:27:55 +05:45
parent affa40a9d3
commit e9705e40b7
178 changed files with 687738 additions and 122880 deletions
+22 -20
View File
@@ -16,9 +16,9 @@
*
* Phase 2 (CFAR): After frame_complete pulse from Doppler processor,
* process each Doppler column independently:
* a) Read 64 magnitudes from BRAM for one Doppler bin (ST_COL_LOAD)
* a) Read 512 magnitudes from BRAM for one Doppler bin (ST_COL_LOAD)
* b) Compute initial sliding window sums (ST_CFAR_INIT)
* c) Slide CUT through all 64 range bins:
* c) Slide CUT through all 512 range bins:
* - 3 sub-cycles per CUT:
* ST_CFAR_THR: register noise_sum (mode select + cross-multiply)
* ST_CFAR_MUL: compute alpha * noise_sum_reg in DSP
@@ -47,21 +47,23 @@
* typically clutter).
*
* Timing:
* Phase 2 takes ~(66 + T + 3*64) * 32 8500 cycles per frame @ 100 MHz
* = 85 µs. Frame period @ PRF=1932 Hz, 32 chirps = 16.6 ms. Fits easily.
* Phase 2 takes ~(514 + T + 3*512) * 32 55000 cycles per frame @ 100 MHz
* = 0.55 ms. Frame period @ PRF=1932 Hz, 32 chirps = 16.6 ms. Fits easily.
* (3 cycles per CUT due to pipeline: THR MUL CMP)
*
* Resources:
* - 1 BRAM18K for magnitude buffer (2048 x 17 bits)
* - 1 BRAM36K for magnitude buffer (16384 x 17 bits)
* - 1 DSP48 for alpha multiply
* - ~300 LUTs for FSM + sliding window + comparators
*
* Clock domain: clk (100 MHz, same as Doppler processor)
*/
`include "radar_params.vh"
module cfar_ca #(
parameter NUM_RANGE_BINS = 64,
parameter NUM_DOPPLER_BINS = 32,
parameter NUM_RANGE_BINS = `RP_NUM_RANGE_BINS, // 512
parameter NUM_DOPPLER_BINS = `RP_NUM_DOPPLER_BINS, // 32
parameter MAG_WIDTH = 17,
parameter ALPHA_WIDTH = 8,
parameter MAX_GUARD = 8,
@@ -74,7 +76,7 @@ module cfar_ca #(
input wire [31:0] doppler_data,
input wire doppler_valid,
input wire [4:0] doppler_bin_in,
input wire [5:0] range_bin_in,
input wire [`RP_RANGE_BIN_BITS-1:0] range_bin_in, // 9-bit
input wire frame_complete,
// ========== CONFIGURATION ==========
@@ -88,7 +90,7 @@ module cfar_ca #(
// ========== DETECTION OUTPUTS ==========
output reg detect_flag,
output reg detect_valid,
output reg [5:0] detect_range,
output reg [`RP_RANGE_BIN_BITS-1:0] detect_range, // 9-bit
output reg [4:0] detect_doppler,
output reg [MAG_WIDTH-1:0] detect_magnitude,
output reg [MAG_WIDTH-1:0] detect_threshold,
@@ -103,11 +105,11 @@ module cfar_ca #(
// INTERNAL PARAMETERS
// ============================================================================
localparam TOTAL_CELLS = NUM_RANGE_BINS * NUM_DOPPLER_BINS;
localparam ADDR_WIDTH = 11;
localparam ADDR_WIDTH = `RP_CFAR_MAG_ADDR_W; // 14
localparam COL_BITS = 5;
localparam ROW_BITS = 6;
localparam SUM_WIDTH = MAG_WIDTH + 6; // 23 bits: sum of up to 64 magnitudes
localparam PROD_WIDTH = SUM_WIDTH + ALPHA_WIDTH; // 31 bits
localparam ROW_BITS = `RP_RANGE_BIN_BITS; // 9
localparam SUM_WIDTH = MAG_WIDTH + ROW_BITS; // 26 bits: sum of up to 512 magnitudes
localparam PROD_WIDTH = SUM_WIDTH + ALPHA_WIDTH; // 34 bits
localparam ALPHA_FRAC_BITS = 4; // Q4.4
// ============================================================================
@@ -136,7 +138,7 @@ wire [15:0] abs_q = dop_q[15] ? (~dop_q + 16'd1) : dop_q;
wire [MAG_WIDTH-1:0] cur_mag = {1'b0, abs_i} + {1'b0, abs_q};
// ============================================================================
// MAGNITUDE BRAM (2048 x 17 bits)
// MAGNITUDE BRAM (16384 x 17 bits)
// ============================================================================
reg mag_we;
reg [ADDR_WIDTH-1:0] mag_waddr;
@@ -153,7 +155,7 @@ always @(posedge clk) begin
end
// ============================================================================
// COLUMN LINE BUFFER (64 x 17 bits distributed RAM)
// COLUMN LINE BUFFER (512 x 17 bits BRAM)
// ============================================================================
reg [MAG_WIDTH-1:0] col_buf [0:NUM_RANGE_BINS-1];
reg [ROW_BITS:0] col_load_idx;
@@ -267,7 +269,7 @@ always @(posedge clk or negedge reset_n) begin
state <= ST_IDLE;
detect_flag <= 1'b0;
detect_valid <= 1'b0;
detect_range <= 6'd0;
detect_range <= {ROW_BITS{1'b0}};
detect_doppler <= 5'd0;
detect_magnitude <= {MAG_WIDTH{1'b0}};
detect_threshold <= {MAG_WIDTH{1'b0}};
@@ -364,7 +366,7 @@ always @(posedge clk or negedge reset_n) begin
if (r_enable) begin
col_idx <= 0;
col_load_idx <= 0;
mag_raddr <= {6'd0, 5'd0};
mag_raddr <= {{ROW_BITS{1'b0}}, 5'd0};
state <= ST_COL_LOAD;
end else begin
state <= ST_DONE;
@@ -382,14 +384,14 @@ always @(posedge clk or negedge reset_n) begin
if (col_load_idx == 0) begin
// First address already presented, advance to range=1
mag_raddr <= {6'd1, col_idx};
mag_raddr <= {{{(ROW_BITS-1){1'b0}}, 1'b1}, col_idx};
col_load_idx <= 1;
end else if (col_load_idx <= NUM_RANGE_BINS) begin
// Capture previous read
col_buf[col_load_idx - 1] <= mag_rdata;
if (col_load_idx < NUM_RANGE_BINS) begin
mag_raddr <= {col_load_idx[ROW_BITS-1:0] + 6'd1, col_idx};
mag_raddr <= {col_load_idx[ROW_BITS-1:0] + {{(ROW_BITS-1){1'b0}}, 1'b1}, col_idx};
end
col_load_idx <= col_load_idx + 1;
@@ -513,7 +515,7 @@ always @(posedge clk or negedge reset_n) begin
if (col_idx < NUM_DOPPLER_BINS - 1) begin
col_idx <= col_idx + 1;
col_load_idx <= 0;
mag_raddr <= {6'd0, col_idx + 5'd1};
mag_raddr <= {{ROW_BITS{1'b0}}, col_idx + 5'd1};
state <= ST_COL_LOAD;
end else begin
state <= ST_DONE;