Merge pull request #33 from JJassonn69/fix/staggered-prf-dual16-doppler

Fix staggered-PRF Doppler path using dual 16-point FFT sub-frames
This commit is contained in:
NawfalMotii79
2026-03-27 22:09:08 +01:00
committed by GitHub
18 changed files with 12801 additions and 12657 deletions
+163 -148
View File
@@ -1,9 +1,42 @@
`timescale 1ns / 1ps `timescale 1ns / 1ps
// ============================================================================
// doppler_processor.v Staggered-PRF Doppler Processor (CORRECTED)
// ============================================================================
//
// ARCHITECTURE:
// This module implements dual 16-point FFTs for the AERIS-10 staggered-PRF
// waveform. The radar transmits 16 long-PRI chirps followed by 16 short-PRI
// chirps per frame (32 total). Rather than a single 32-point FFT over the
// non-uniformly sampled frame (which is signal-processing invalid), this
// module processes each sub-frame independently:
//
// Sub-frame 0 (long PRI): chirps 0..15 16-pt windowed FFT
// Sub-frame 1 (short PRI): chirps 16..31 16-pt windowed FFT
//
// Each sub-frame produces 16 Doppler bins per range bin. The outputs are
// tagged with a sub_frame bit and the 4-bit bin index is packed into the
// existing 5-bit doppler_bin port as {sub_frame, bin[3:0]}.
//
// This architecture enables downstream staggered-PRF ambiguity resolution:
// the same target velocity maps to DIFFERENT Doppler bins at different PRIs,
// and comparing the two sub-frame results resolves velocity ambiguity.
//
// INTERFACE COMPATIBILITY:
// The port list is a superset of the original module. Existing instantiations
// that don't connect `sub_frame` will still work. The FORMAL ports are
// retained. CHIRPS_PER_FRAME must be 32 (16 per sub-frame).
//
// WINDOW:
// 16-point Hamming window (Q15), symmetric. Computed as:
// w[n] = 0.54 - 0.46 * cos(2*pi*n/15), n=0..15
// ============================================================================
module doppler_processor_optimized #( module doppler_processor_optimized #(
parameter DOPPLER_FFT_SIZE = 32, parameter DOPPLER_FFT_SIZE = 16, // FFT size per sub-frame (was 32)
parameter RANGE_BINS = 64, parameter RANGE_BINS = 64,
parameter CHIRPS_PER_FRAME = 32, parameter CHIRPS_PER_FRAME = 32, // Total chirps in frame (16+16)
parameter CHIRPS_PER_SUBFRAME = 16, // Chirps per sub-frame
parameter WINDOW_TYPE = 0, // 0=Hamming, 1=Rectangular parameter WINDOW_TYPE = 0, // 0=Hamming, 1=Rectangular
parameter DATA_WIDTH = 16 parameter DATA_WIDTH = 16
)( )(
@@ -14,8 +47,9 @@ module doppler_processor_optimized #(
input wire new_chirp_frame, input wire new_chirp_frame,
output reg [31:0] doppler_output, output reg [31:0] doppler_output,
output reg doppler_valid, output reg doppler_valid,
output reg [4:0] doppler_bin, output reg [4:0] doppler_bin, // {sub_frame, bin[3:0]}
output reg [5:0] range_bin, output reg [5:0] range_bin,
output reg sub_frame, // 0=long PRI, 1=short PRI
output wire processing_active, output wire processing_active,
output wire frame_complete, output wire frame_complete,
output reg [3:0] status output reg [3:0] status
@@ -37,35 +71,35 @@ module doppler_processor_optimized #(
); );
// ============================================== // ==============================================
// Window Coefficients (Simple Implementation) // Window Coefficients 16-point Hamming (Q15)
// ============================================== // ==============================================
reg [DATA_WIDTH-1:0] window_coeff [0:31]; // w[n] = 0.54 - 0.46 * cos(2*pi*n/15), n=0..15
// Symmetric: w[n] = w[15-n]
reg [DATA_WIDTH-1:0] window_coeff [0:15];
// Generate window coefficients
integer w; integer w;
initial begin initial begin
if (WINDOW_TYPE == 0) begin if (WINDOW_TYPE == 0) begin
// Pre-calculated Hamming window (Q15 format) // 16-point Hamming window, Q15 format
window_coeff[0] = 16'h0800; window_coeff[1] = 16'h0862; // Computed: round(32767 * (0.54 - 0.46*cos(2*pi*n/15)))
window_coeff[2] = 16'h09CB; window_coeff[3] = 16'h0C3B; window_coeff[0] = 16'h0A3D; // 0.0800 * 32767 = 2621
window_coeff[4] = 16'h0FB2; window_coeff[5] = 16'h142F; window_coeff[1] = 16'h0E5C; // 0.1116 * 32767 = 3676
window_coeff[6] = 16'h19B2; window_coeff[7] = 16'h2039; window_coeff[2] = 16'h1B6D; // 0.2138 * 32767 = 7021
window_coeff[8] = 16'h27C4; window_coeff[9] = 16'h3050; window_coeff[3] = 16'h3088; // 0.3790 * 32767 = 12424
window_coeff[10] = 16'h39DB; window_coeff[11] = 16'h4462; window_coeff[4] = 16'h4B33; // 0.5868 * 32767 = 19251
window_coeff[12] = 16'h4FE3; window_coeff[13] = 16'h5C5A; window_coeff[5] = 16'h6573; // 0.7930 * 32767 = 25971
window_coeff[14] = 16'h69C4; window_coeff[15] = 16'h781D; window_coeff[6] = 16'h7642; // 0.9245 * 32767 = 30274
window_coeff[16] = 16'h7FFF; // Peak window_coeff[7] = 16'h7F62; // 0.9932 * 32767 = 32610
window_coeff[17] = 16'h781D; window_coeff[18] = 16'h69C4; window_coeff[8] = 16'h7F62; // symmetric
window_coeff[19] = 16'h5C5A; window_coeff[20] = 16'h4FE3; window_coeff[9] = 16'h7642;
window_coeff[21] = 16'h4462; window_coeff[22] = 16'h39DB; window_coeff[10] = 16'h6573;
window_coeff[23] = 16'h3050; window_coeff[24] = 16'h27C4; window_coeff[11] = 16'h4B33;
window_coeff[25] = 16'h2039; window_coeff[26] = 16'h19B2; window_coeff[12] = 16'h3088;
window_coeff[27] = 16'h142F; window_coeff[28] = 16'h0FB2; window_coeff[13] = 16'h1B6D;
window_coeff[29] = 16'h0C3B; window_coeff[30] = 16'h09CB; window_coeff[14] = 16'h0E5C;
window_coeff[31] = 16'h0862; window_coeff[15] = 16'h0A3D;
end else begin end else begin
// Rectangular window (all ones) for (w = 0; w < 16; w = w + 1) begin
for (w = 0; w < 32; w = w + 1) begin
window_coeff[w] = 16'h7FFF; window_coeff[w] = 16'h7FFF;
end end
end end
@@ -81,14 +115,16 @@ localparam MEM_DEPTH = RANGE_BINS * CHIRPS_PER_FRAME;
// ============================================== // ==============================================
// Control Registers // Control Registers
// ============================================== // ==============================================
reg [5:0] write_range_bin; // Changed to match RANGE_BINS width reg [5:0] write_range_bin;
reg [4:0] write_chirp_index; // Changed to match CHIRPS_PER_FRAME width reg [4:0] write_chirp_index;
reg [5:0] read_range_bin; reg [5:0] read_range_bin;
reg [4:0] read_doppler_index; // Changed name for clarity reg [4:0] read_doppler_index;
reg frame_buffer_full; reg frame_buffer_full;
reg [9:0] chirps_received; // Enough for up to 1024 chirps reg [9:0] chirps_received;
reg [1:0] chirp_state; // Track chirp accumulation state reg [1:0] chirp_state;
// Sub-frame tracking
reg current_sub_frame; // 0=processing long, 1=processing short
// ============================================== // ==============================================
// FFT Interface // FFT Interface
@@ -97,9 +133,9 @@ reg fft_start;
wire fft_ready; wire fft_ready;
reg [DATA_WIDTH-1:0] fft_input_i; reg [DATA_WIDTH-1:0] fft_input_i;
reg [DATA_WIDTH-1:0] fft_input_q; reg [DATA_WIDTH-1:0] fft_input_q;
reg signed [31:0] mult_i, mult_q; // 32-bit to avoid overflow reg signed [31:0] mult_i, mult_q;
reg signed [DATA_WIDTH-1:0] window_val_reg; // BREG pipeline stage reg signed [DATA_WIDTH-1:0] window_val_reg;
reg signed [31:0] mult_i_raw, mult_q_raw; // MREG pipeline stage reg signed [31:0] mult_i_raw, mult_q_raw;
reg fft_input_valid; reg fft_input_valid;
reg fft_input_last; reg fft_input_last;
@@ -114,22 +150,16 @@ wire fft_output_last;
wire [10:0] mem_write_addr; wire [10:0] mem_write_addr;
wire [10:0] mem_read_addr; wire [10:0] mem_read_addr;
// Proper address calculation using parameters
assign mem_write_addr = (write_chirp_index * RANGE_BINS) + write_range_bin; assign mem_write_addr = (write_chirp_index * RANGE_BINS) + write_range_bin;
assign mem_read_addr = (read_doppler_index * RANGE_BINS) + read_range_bin; assign mem_read_addr = (read_doppler_index * RANGE_BINS) + read_range_bin;
// Alternative organization (choose one):
// If you want range-major organization (all chirps for one range bin together):
// assign mem_write_addr = (write_range_bin * CHIRPS_PER_FRAME) + write_chirp_index;
// assign mem_read_addr = (read_range_bin * CHIRPS_PER_FRAME) + read_doppler_index;
// ============================================== // ==============================================
// State Machine // State Machine
// ============================================== // ==============================================
reg [2:0] state; reg [2:0] state;
localparam S_IDLE = 3'b000; localparam S_IDLE = 3'b000;
localparam S_ACCUMULATE = 3'b001; localparam S_ACCUMULATE = 3'b001;
localparam S_PRE_READ = 3'b101; // Prime BRAM pipeline before FFT load localparam S_PRE_READ = 3'b101;
localparam S_LOAD_FFT = 3'b010; localparam S_LOAD_FFT = 3'b010;
localparam S_FFT_WAIT = 3'b011; localparam S_FFT_WAIT = 3'b011;
localparam S_OUTPUT = 3'b100; localparam S_OUTPUT = 3'b100;
@@ -143,17 +173,17 @@ end
wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1; wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1;
// ============================================== // ==============================================
// Main State Machine - FIXED // Main State Machine
// ============================================== // ==============================================
reg [5:0] fft_sample_counter; reg [4:0] fft_sample_counter; // Reduced: only need 0..17 for 16-pt FFT
reg [9:0] processing_timeout; reg [9:0] processing_timeout;
// Memory write enable and data signals (extracted for BRAM inference) // Memory write enable and data signals
reg mem_we; reg mem_we;
reg [10:0] mem_waddr_r; reg [10:0] mem_waddr_r;
reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q; reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q;
// Memory read data (registered for BRAM read latency) // Memory read data
reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q; reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q;
`ifdef FORMAL `ifdef FORMAL
@@ -172,33 +202,24 @@ assign fv_mem_waddr_r = mem_waddr_r;
// ---------------------------------------------------------- // ----------------------------------------------------------
// Separate always block for memory writes NO async reset // Separate always block for memory writes NO async reset
// in sensitivity list, so Vivado can infer Block RAM.
// ---------------------------------------------------------- // ----------------------------------------------------------
always @(posedge clk) begin always @(posedge clk) begin
if (mem_we) begin if (mem_we) begin
doppler_i_mem[mem_waddr_r] <= mem_wdata_i; doppler_i_mem[mem_waddr_r] <= mem_wdata_i;
doppler_q_mem[mem_waddr_r] <= mem_wdata_q; doppler_q_mem[mem_waddr_r] <= mem_wdata_q;
end end
// Registered read address driven by mem_read_addr from FSM
mem_rdata_i <= doppler_i_mem[mem_read_addr]; mem_rdata_i <= doppler_i_mem[mem_read_addr];
mem_rdata_q <= doppler_q_mem[mem_read_addr]; mem_rdata_q <= doppler_q_mem[mem_read_addr];
end end
// ---------------------------------------------------------- // ----------------------------------------------------------
// Block 1: FSM / Control async reset (posedge clk or negedge reset_n). // Block 1: FSM / Control async reset
// Only state-machine and control registers live here.
// BRAM-driving and DSP datapath registers are intentionally
// excluded to avoid Vivado REQP-1839 (async-reset on BRAM
// address) and DPOR-1/DPIP-1 (async-reset blocking DSP48
// absorption) DRC warnings.
// ---------------------------------------------------------- // ----------------------------------------------------------
always @(posedge clk or negedge reset_n) begin always @(posedge clk or negedge reset_n) begin
if (!reset_n) begin if (!reset_n) begin
state <= S_IDLE; state <= S_IDLE;
write_range_bin <= 0; write_range_bin <= 0;
write_chirp_index <= 0; write_chirp_index <= 0;
// read_range_bin, read_doppler_index moved to Block 2 (sync reset)
// to enable BRAM address register absorption (REQP-1839 fix)
frame_buffer_full <= 0; frame_buffer_full <= 0;
doppler_valid <= 0; doppler_valid <= 0;
fft_start <= 0; fft_start <= 0;
@@ -212,6 +233,8 @@ always @(posedge clk or negedge reset_n) begin
doppler_output <= 0; doppler_output <= 0;
doppler_bin <= 0; doppler_bin <= 0;
range_bin <= 0; range_bin <= 0;
sub_frame <= 0;
current_sub_frame <= 0;
end else begin end else begin
doppler_valid <= 0; doppler_valid <= 0;
fft_input_valid <= 0; fft_input_valid <= 0;
@@ -224,7 +247,6 @@ always @(posedge clk or negedge reset_n) begin
case (state) case (state)
S_IDLE: begin S_IDLE: begin
if (frame_start_pulse) begin if (frame_start_pulse) begin
// Start new frame
write_chirp_index <= 0; write_chirp_index <= 0;
write_range_bin <= 0; write_range_bin <= 0;
frame_buffer_full <= 0; frame_buffer_full <= 0;
@@ -239,42 +261,30 @@ always @(posedge clk or negedge reset_n) begin
S_ACCUMULATE: begin S_ACCUMULATE: begin
if (data_valid) begin if (data_valid) begin
// Increment range bin
if (write_range_bin < RANGE_BINS - 1) begin if (write_range_bin < RANGE_BINS - 1) begin
write_range_bin <= write_range_bin + 1; write_range_bin <= write_range_bin + 1;
end else begin end else begin
// Completed one chirp
write_range_bin <= 0; write_range_bin <= 0;
write_chirp_index <= write_chirp_index + 1; write_chirp_index <= write_chirp_index + 1;
chirps_received <= chirps_received + 1; chirps_received <= chirps_received + 1;
// Check if frame is complete
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
frame_buffer_full <= 1; frame_buffer_full <= 1;
chirp_state <= 0; chirp_state <= 0;
state <= S_PRE_READ; state <= S_PRE_READ;
// read_range_bin/read_doppler_index zeroed in Block 2
fft_sample_counter <= 0; fft_sample_counter <= 0;
// Reset write pointers no longer needed for
// this frame, and prevents stale overflow of
// write_chirp_index (which was just incremented
// past CHIRPS_PER_FRAME-1 above).
write_chirp_index <= 0; write_chirp_index <= 0;
write_range_bin <= 0; write_range_bin <= 0;
// Start with sub-frame 0 (long PRI chirps 0..15)
current_sub_frame <= 0;
end end
end end
end end
end end
S_PRE_READ: begin S_PRE_READ: begin
// Prime the BRAM pipeline: present addr for chirp 0 of // Prime BRAM pipeline for current sub-frame
// current read_range_bin. read_doppler_index is already 0. // read_doppler_index already set in Block 2 to sub-frame base
// mem_read_addr = 0 * RANGE_BINS + read_range_bin.
// After this cycle, mem_rdata_i will hold data[chirp=0][rbin].
// Advance read_doppler_index to 1 so the NEXT BRAM read
// (which happens every cycle in the memory block) will
// fetch chirp 1.
// read_doppler_index <= 1 moved to Block 2
fft_start <= 1; fft_start <= 1;
state <= S_LOAD_FFT; state <= S_LOAD_FFT;
end end
@@ -282,32 +292,13 @@ always @(posedge clk or negedge reset_n) begin
S_LOAD_FFT: begin S_LOAD_FFT: begin
fft_start <= 0; fft_start <= 0;
// Pipeline alignment (after S_PRE_READ primed the BRAM // Pipeline: 2 priming cycles + CHIRPS_PER_SUBFRAME data cycles
// and pre-registered window_val_reg = window_coeff[0]):
//
// With DSP48 BREG+MREG pipelining, data flows through:
// sub=0: multiply mem_rdata * window_val_reg -> mult_i_raw
// pre-register window_coeff[1] into window_val_reg
// sub=1: MREG capture mult_i_raw -> mult_i (sample 0)
// new multiply for sample 1
// sub=2..DOPPLER_FFT_SIZE+1: steady state
// fft_input = rounding(mult_i), mult_i = mult_i_raw,
// mult_i_raw = new multiply, window_val_reg = next coeff
//
// fft_input_valid asserted at sub=2..DOPPLER_FFT_SIZE+1
// fft_input_last asserted at sub=DOPPLER_FFT_SIZE+1
// read_doppler_index updates moved to Block 2 (sync reset)
if (fft_sample_counter <= 1) begin if (fft_sample_counter <= 1) begin
// Sub 0..1: pipeline priming no valid FFT data yet
fft_sample_counter <= fft_sample_counter + 1; fft_sample_counter <= fft_sample_counter + 1;
end else if (fft_sample_counter <= DOPPLER_FFT_SIZE + 1) begin end else if (fft_sample_counter <= CHIRPS_PER_SUBFRAME + 1) begin
// Sub 2..DOPPLER_FFT_SIZE+1: steady state
// (fft_input_i/fft_input_q captured in Block 2)
fft_input_valid <= 1; fft_input_valid <= 1;
if (fft_sample_counter == DOPPLER_FFT_SIZE + 1) begin if (fft_sample_counter == CHIRPS_PER_SUBFRAME + 1) begin
// Last sample: flush
fft_input_last <= 1; fft_input_last <= 1;
state <= S_FFT_WAIT; state <= S_FFT_WAIT;
fft_sample_counter <= 0; fft_sample_counter <= 0;
@@ -321,8 +312,10 @@ always @(posedge clk or negedge reset_n) begin
S_FFT_WAIT: begin S_FFT_WAIT: begin
if (fft_output_valid) begin if (fft_output_valid) begin
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]}; doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]};
doppler_bin <= fft_sample_counter; // Pack: {sub_frame, bin[3:0]}
doppler_bin <= {current_sub_frame, fft_sample_counter[3:0]};
range_bin <= read_range_bin; range_bin <= read_range_bin;
sub_frame <= current_sub_frame;
doppler_valid <= 1; doppler_valid <= 1;
fft_sample_counter <= fft_sample_counter + 1; fft_sample_counter <= fft_sample_counter + 1;
@@ -339,15 +332,26 @@ always @(posedge clk or negedge reset_n) begin
end end
S_OUTPUT: begin S_OUTPUT: begin
if (read_range_bin < RANGE_BINS - 1) begin if (current_sub_frame == 0) begin
// read_range_bin/read_doppler_index updated in Block 2 // Just finished long PRI sub-frame now do short PRI
current_sub_frame <= 1;
fft_sample_counter <= 0; fft_sample_counter <= 0;
state <= S_PRE_READ; state <= S_PRE_READ;
// read_range_bin stays the same, read_doppler_index
// will be set to CHIRPS_PER_SUBFRAME in Block 2
end else begin
// Finished both sub-frames for this range bin
current_sub_frame <= 0;
if (read_range_bin < RANGE_BINS - 1) begin
fft_sample_counter <= 0;
state <= S_PRE_READ;
// read_range_bin incremented in Block 2
end else begin end else begin
state <= S_IDLE; state <= S_IDLE;
frame_buffer_full <= 0; frame_buffer_full <= 0;
end end
end end
end
endcase endcase
@@ -356,14 +360,7 @@ always @(posedge clk or negedge reset_n) begin
end end
// ---------------------------------------------------------- // ----------------------------------------------------------
// Block 2: BRAM address/data & DSP datapath synchronous reset only. // Block 2: BRAM address/data & DSP datapath synchronous reset
// Uses always @(posedge clk) so Vivado can absorb multipliers
// into DSP48 primitives and does not flag REQP-1839/1840 on
// BRAM address registers. Replicates the same state/condition
// structure as Block 1 for the registers:
// mem_we, mem_waddr_r, mem_wdata_i, mem_wdata_q,
// mult_i, mult_q, fft_input_i, fft_input_q,
// read_range_bin, read_doppler_index
// ---------------------------------------------------------- // ----------------------------------------------------------
always @(posedge clk) begin always @(posedge clk) begin
if (!reset_n) begin if (!reset_n) begin
@@ -386,9 +383,6 @@ always @(posedge clk) begin
case (state) case (state)
S_IDLE: begin S_IDLE: begin
if (data_valid && !frame_buffer_full) begin if (data_valid && !frame_buffer_full) begin
// Write the first sample immediately (Bug #3 fix:
// previously this transition consumed data_valid
// without writing to BRAM)
mem_we <= 1; mem_we <= 1;
mem_waddr_r <= mem_write_addr; mem_waddr_r <= mem_write_addr;
mem_wdata_i <= range_data[15:0]; mem_wdata_i <= range_data[15:0];
@@ -398,105 +392,127 @@ always @(posedge clk) begin
S_ACCUMULATE: begin S_ACCUMULATE: begin
if (data_valid) begin if (data_valid) begin
// Drive memory write signals (actual write in separate block)
mem_we <= 1; mem_we <= 1;
mem_waddr_r <= mem_write_addr; mem_waddr_r <= mem_write_addr;
mem_wdata_i <= range_data[15:0]; mem_wdata_i <= range_data[15:0];
mem_wdata_q <= range_data[31:16]; mem_wdata_q <= range_data[31:16];
// Transition to S_PRE_READ when frame complete
if (write_range_bin >= RANGE_BINS - 1 && if (write_range_bin >= RANGE_BINS - 1 &&
write_chirp_index >= CHIRPS_PER_FRAME - 1) begin write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
read_range_bin <= 0; read_range_bin <= 0;
// Start reading from chirp 0 (long PRI sub-frame)
read_doppler_index <= 0; read_doppler_index <= 0;
end end
end end
end end
S_PRE_READ: begin S_PRE_READ: begin
// Advance read_doppler_index to 1 so next BRAM read // Set read_doppler_index to first chirp of current sub-frame + 1
// fetches chirp 1 // (because address is presented this cycle, data arrives next)
read_doppler_index <= 1; if (current_sub_frame == 0)
// BREG priming: pre-register window coeff for sample 0 read_doppler_index <= 1; // Long PRI: chirps 0..15
// so it is ready when S_LOAD_FFT sub=0 performs the multiply else
read_doppler_index <= CHIRPS_PER_SUBFRAME + 1; // Short PRI: chirps 16..31
// BREG priming: window coeff for sample 0
window_val_reg <= $signed(window_coeff[0]); window_val_reg <= $signed(window_coeff[0]);
end end
S_LOAD_FFT: begin S_LOAD_FFT: begin
if (fft_sample_counter == 0) begin if (fft_sample_counter == 0) begin
// Pipe stage 1: multiply using pre-registered BREG value // Pipe stage 1: multiply using pre-registered BREG value
// mem_rdata_i = data[chirp=0][rbin] (primed by S_PRE_READ)
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg; mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg; mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
// Pre-register next window coeff (sample 1)
window_val_reg <= $signed(window_coeff[1]); window_val_reg <= $signed(window_coeff[1]);
// Present BRAM addr for chirp 2 // Advance to chirp base+2
read_doppler_index <= (2 < DOPPLER_FFT_SIZE) ? 2 if (current_sub_frame == 0)
: DOPPLER_FFT_SIZE - 1; read_doppler_index <= (2 < CHIRPS_PER_SUBFRAME) ? 2
: CHIRPS_PER_SUBFRAME - 1;
else
read_doppler_index <= (CHIRPS_PER_SUBFRAME + 2 < CHIRPS_PER_FRAME)
? CHIRPS_PER_SUBFRAME + 2
: CHIRPS_PER_FRAME - 1;
end else if (fft_sample_counter == 1) begin end else if (fft_sample_counter == 1) begin
// Pipe stage 2 (MREG): capture sample 0 multiply result
mult_i <= mult_i_raw; mult_i <= mult_i_raw;
mult_q <= mult_q_raw; mult_q <= mult_q_raw;
// Multiply sample 1 using registered window value
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg; mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg; mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
// Pre-register next window coeff (sample 2) if (2 < CHIRPS_PER_SUBFRAME)
if (2 < DOPPLER_FFT_SIZE)
window_val_reg <= $signed(window_coeff[2]); window_val_reg <= $signed(window_coeff[2]);
// Advance BRAM read to chirp 3 // Advance to chirp base+3
if (3 < DOPPLER_FFT_SIZE) begin : advance_chirp3
read_doppler_index <= 3; reg [4:0] next_chirp;
next_chirp = (current_sub_frame == 0) ? 3 : CHIRPS_PER_SUBFRAME + 3;
if (next_chirp < CHIRPS_PER_FRAME)
read_doppler_index <= next_chirp;
else else
read_doppler_index <= DOPPLER_FFT_SIZE - 1; read_doppler_index <= CHIRPS_PER_FRAME - 1;
end else if (fft_sample_counter <= DOPPLER_FFT_SIZE + 1) begin end
// Sub 2..DOPPLER_FFT_SIZE+1: steady state end else if (fft_sample_counter <= CHIRPS_PER_SUBFRAME + 1) begin
// Capture rounding into fft_input from MREG output // Steady state
fft_input_i <= (mult_i + (1 << 14)) >>> 15; fft_input_i <= (mult_i + (1 << 14)) >>> 15;
fft_input_q <= (mult_q + (1 << 14)) >>> 15; fft_input_q <= (mult_q + (1 << 14)) >>> 15;
// MREG: capture multiply result
mult_i <= mult_i_raw; mult_i <= mult_i_raw;
mult_q <= mult_q_raw; mult_q <= mult_q_raw;
if (fft_sample_counter <= DOPPLER_FFT_SIZE - 1) begin if (fft_sample_counter <= CHIRPS_PER_SUBFRAME - 1) begin
// New multiply from current BRAM data
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg; mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg; mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
// Pre-register next window coeff (clamped) // Window coeff index within sub-frame
if (fft_sample_counter + 1 < DOPPLER_FFT_SIZE) begin : advance_window
window_val_reg <= $signed(window_coeff[fft_sample_counter + 1]); reg [4:0] win_idx;
win_idx = fft_sample_counter[3:0] + 1;
if (win_idx < CHIRPS_PER_SUBFRAME)
window_val_reg <= $signed(window_coeff[win_idx]);
end
// Advance BRAM read // Advance BRAM read
if (fft_sample_counter + 2 < DOPPLER_FFT_SIZE) begin : advance_bram
read_doppler_index <= fft_sample_counter + 2; reg [4:0] chirp_offset;
reg [4:0] chirp_base;
chirp_offset = fft_sample_counter[3:0] + 2;
chirp_base = (current_sub_frame == 0) ? 0 : CHIRPS_PER_SUBFRAME;
if (chirp_base + chirp_offset < CHIRPS_PER_FRAME)
read_doppler_index <= chirp_base + chirp_offset;
else else
read_doppler_index <= DOPPLER_FFT_SIZE - 1; read_doppler_index <= CHIRPS_PER_FRAME - 1;
end
end end
if (fft_sample_counter == DOPPLER_FFT_SIZE + 1) begin if (fft_sample_counter == CHIRPS_PER_SUBFRAME + 1) begin
// Flush complete reset read index // Reset read index for potential next operation
if (current_sub_frame == 0)
read_doppler_index <= CHIRPS_PER_SUBFRAME; // Ready for short sub-frame
else
read_doppler_index <= 0; read_doppler_index <= 0;
end end
end end
end end
S_OUTPUT: begin S_OUTPUT: begin
if (current_sub_frame == 0) begin
// Transitioning to short PRI sub-frame
// Set read_doppler_index to start of short sub-frame
read_doppler_index <= CHIRPS_PER_SUBFRAME;
end else begin
// Both sub-frames done
if (read_range_bin < RANGE_BINS - 1) begin if (read_range_bin < RANGE_BINS - 1) begin
read_range_bin <= read_range_bin + 1; read_range_bin <= read_range_bin + 1;
read_doppler_index <= 0; read_doppler_index <= 0; // Next range bin starts with long sub-frame
end
end end
end end
default: begin default: begin
// S_IDLE, S_FFT_WAIT: // S_FFT_WAIT: no BRAM-write or address operations needed
// no BRAM-write, DSP, or read-address operations needed
end end
endcase endcase
end end
end end
// ============================================== // ==============================================
// FFT Module // FFT Module 16-point
// ============================================== // ==============================================
xfft_32 fft_inst ( xfft_16 fft_inst (
.aclk(clk), .aclk(clk),
.aresetn(reset_n), .aresetn(reset_n),
.s_axis_config_tdata(8'h01), .s_axis_config_tdata(8'h01),
@@ -517,5 +533,4 @@ xfft_32 fft_inst (
assign processing_active = (state != S_IDLE); assign processing_active = (state != S_IDLE);
assign frame_complete = (state == S_IDLE && frame_buffer_full == 0); assign frame_complete = (state == S_IDLE && frame_buffer_full == 0);
endmodule endmodule
+8
View File
@@ -0,0 +1,8 @@
// Quarter-wave cosine ROM for 16-point FFT
// 4 entries (N/4), 16-bit signed Q15 format
// cos(2*pi*k/16) for k = 0..3
// Used by fft_engine with N=16, LOG2N=4
7FFF
7641
5A82
30FB
@@ -8,8 +8,8 @@
// Single-clock design: clk is an input wire, async2sync handles async reset. // Single-clock design: clk is an input wire, async2sync handles async reset.
// Each formal step = one clock edge. // Each formal step = one clock edge.
// //
// Parameters reduced: RANGE_BINS=4, CHIRPS_PER_FRAME=4, DOPPLER_FFT_SIZE=4. // Parameters reduced: RANGE_BINS=4, CHIRPS_PER_FRAME=4, CHIRPS_PER_SUBFRAME=2, DOPPLER_FFT_SIZE=2.
// Includes full xfft_32 and fft_engine sub-modules. // Includes full xfft_16 and fft_engine sub-modules.
// //
// Focus: memory address bounds (highest-value finding) and state encoding. // Focus: memory address bounds (highest-value finding) and state encoding.
// ============================================================================ // ============================================================================
@@ -20,7 +20,8 @@ module fv_doppler_processor (
// Reduced parameters for tractable BMC // Reduced parameters for tractable BMC
localparam RANGE_BINS = 4; localparam RANGE_BINS = 4;
localparam CHIRPS_PER_FRAME = 4; localparam CHIRPS_PER_FRAME = 4;
localparam DOPPLER_FFT_SIZE = 4; localparam CHIRPS_PER_SUBFRAME = 2; // Dual sub-frame: 2 chirps per sub-frame
localparam DOPPLER_FFT_SIZE = 2; // FFT size matches sub-frame size
localparam MEM_DEPTH = RANGE_BINS * CHIRPS_PER_FRAME; // 16 localparam MEM_DEPTH = RANGE_BINS * CHIRPS_PER_FRAME; // 16
// State encoding (mirrors DUT localparams) // State encoding (mirrors DUT localparams)
@@ -62,6 +63,7 @@ module fv_doppler_processor (
wire doppler_valid; wire doppler_valid;
wire [4:0] doppler_bin; wire [4:0] doppler_bin;
wire [5:0] range_bin; wire [5:0] range_bin;
wire sub_frame;
wire processing_active; wire processing_active;
wire frame_complete; wire frame_complete;
wire [3:0] status; wire [3:0] status;
@@ -86,6 +88,7 @@ module fv_doppler_processor (
.DOPPLER_FFT_SIZE (DOPPLER_FFT_SIZE), .DOPPLER_FFT_SIZE (DOPPLER_FFT_SIZE),
.RANGE_BINS (RANGE_BINS), .RANGE_BINS (RANGE_BINS),
.CHIRPS_PER_FRAME (CHIRPS_PER_FRAME), .CHIRPS_PER_FRAME (CHIRPS_PER_FRAME),
.CHIRPS_PER_SUBFRAME (CHIRPS_PER_SUBFRAME),
.WINDOW_TYPE (1), // Rectangular — simpler for formal .WINDOW_TYPE (1), // Rectangular — simpler for formal
.DATA_WIDTH (16) .DATA_WIDTH (16)
) dut ( ) dut (
@@ -98,6 +101,7 @@ module fv_doppler_processor (
.doppler_valid (doppler_valid), .doppler_valid (doppler_valid),
.doppler_bin (doppler_bin), .doppler_bin (doppler_bin),
.range_bin (range_bin), .range_bin (range_bin),
.sub_frame (sub_frame),
.processing_active(processing_active), .processing_active(processing_active),
.frame_complete (frame_complete), .frame_complete (frame_complete),
.status (status), .status (status),
@@ -36,6 +36,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
DOPPLER_FFT = 32 DOPPLER_FFT = 32
RANGE_BINS = 64 RANGE_BINS = 64
TOTAL_OUTPUTS = RANGE_BINS * DOPPLER_FFT # 2048 TOTAL_OUTPUTS = RANGE_BINS * DOPPLER_FFT # 2048
SUBFRAME_SIZE = 16
SCENARIOS = { SCENARIOS = {
'stationary': { 'stationary': {
@@ -125,6 +126,19 @@ def find_peak_bin(i_arr, q_arr):
return max(range(len(mags)), key=lambda k: mags[k]) return max(range(len(mags)), key=lambda k: mags[k])
def peak_bins_match(py_peak, rtl_peak):
"""Return True if peaks match within +/-1 bin inside the same sub-frame."""
py_sf = py_peak // SUBFRAME_SIZE
rtl_sf = rtl_peak // SUBFRAME_SIZE
if py_sf != rtl_sf:
return False
py_bin = py_peak % SUBFRAME_SIZE
rtl_bin = rtl_peak % SUBFRAME_SIZE
diff = abs(py_bin - rtl_bin)
return diff <= 1 or diff >= SUBFRAME_SIZE - 1
def total_energy(data_dict): def total_energy(data_dict):
"""Sum of I^2 + Q^2 across all range bins and Doppler bins.""" """Sum of I^2 + Q^2 across all range bins and Doppler bins."""
total = 0 total = 0
@@ -207,8 +221,8 @@ def compare_scenario(name, config, base_dir):
py_peak = find_peak_bin(py_i, py_q) py_peak = find_peak_bin(py_i, py_q)
rtl_peak = find_peak_bin(rtl_i, rtl_q) rtl_peak = find_peak_bin(rtl_i, rtl_q)
# Peak agreement (allow +/- 1 bin tolerance) # Peak agreement (allow +/-1 bin tolerance, but only within a sub-frame)
if abs(py_peak - rtl_peak) <= 1 or abs(py_peak - rtl_peak) >= DOPPLER_FFT - 1: if peak_bins_match(py_peak, rtl_peak):
peak_agreements += 1 peak_agreements += 1
py_mag = magnitude_l1(py_i, py_q) py_mag = magnitude_l1(py_i, py_q)
@@ -242,7 +256,7 @@ def compare_scenario(name, config, base_dir):
avg_corr_q = sum(q_correlations) / len(q_correlations) avg_corr_q = sum(q_correlations) / len(q_correlations)
print(f"\n Per-range-bin metrics:") print(f"\n Per-range-bin metrics:")
print(f" Peak Doppler bin agreement (+/-1): {peak_agreements}/{RANGE_BINS} " print(f" Peak Doppler bin agreement (+/-1 within sub-frame): {peak_agreements}/{RANGE_BINS} "
f"({peak_agreement_frac:.0%})") f"({peak_agreement_frac:.0%})")
print(f" Avg magnitude correlation: {avg_mag_corr:.4f}") print(f" Avg magnitude correlation: {avg_mag_corr:.4f}")
print(f" Avg I-channel correlation: {avg_corr_i:.4f}") print(f" Avg I-channel correlation: {avg_corr_i:.4f}")
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1106,8 +1106,8 @@ FFFF0000
00000000 00000000
00000000 00000000
00000000 00000000
FFFF0001 00000001
FFFF0000 00000000
FFFF0005 FFFF0005
00000001 00000001
00000001 00000001
@@ -1172,7 +1172,7 @@ FFFF0000
00010000 00010000
00010000 00010000
00010000 00010000
00060003 00060002
00010001 00010001
00000001 00000001
00000000 00000000
@@ -1236,7 +1236,7 @@ FFFF0000
00000000 00000000
0001FFFF 0001FFFF
0002FFFF 0002FFFF
0006FFFD 0005FFFC
00010000 00010000
0001FFFF 0001FFFF
00000001 00000001
@@ -1300,7 +1300,7 @@ FFFF0000
00000000 00000000
00000000 00000000
FFFFFFFF FFFFFFFF
FFFFFFFA FFFEFFFA
0000FFFF 0000FFFF
0000FFFF 0000FFFF
00010001 00010001
@@ -1364,9 +1364,9 @@ FFFF0000
00000000 00000000
00000000 00000000
FFFF0000 FFFF0000
FFFAFFFD FFFAFFFF
FFFFFFFF FFFFFFFF
00000000 00000001
00000001 00000001
FFFF0000 FFFF0000
00000000 00000000
@@ -1427,74 +1427,74 @@ FFFF0000
FFFF0000 FFFF0000
00000000 00000000
FFFF0000 FFFF0000
00000001
FFFB0005
FFFE0001
00000000
00010000
00000000
00000000
00000001
00000000
0000FFFF
00010001
00000000
00000000
00000000
00000000
00000000
00000001
00000001
00000000
00010001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
FFFFFFFF
FFFFFFFF
0000FFFF
00000000
00000000
00000001
00000000
00000000
FFFF0000
FFFF0000
00000001
00010000
00000000
FFFF0000
00010000
00000001
FFFF0000
FFFF0000
00010001
FFFF0000
FFFFFFFF
00000000
00010000
FFFF0000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00010001
00000000
00000000
FFFF0000
00000000
00010001
00000001
00010006
00000002 00000002
FFFD0006
FFFE0001
00000001 00000001
00010000
00000000
00000000
00000001
00000000
0000FFFF
00010001
00000000
00000000
00000000
00000000
00000000
00000001
00000001
00000000
00010001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
FFFFFFFF
FFFFFFFF
0000FFFF
00000000
00000000
00000001
00000000
00000000
FFFF0000
FFFF0000
00000001
00010000
00000000
FFFF0000
00010000
00000001
FFFF0000
FFFF0000
00010001
FFFF0000
FFFFFFFF
00000000
00010000
FFFF0000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00010001
00000000
00000000
FFFF0000
00000000
00010000
00010001
00030005
00010001
00010001
00000000 00000000
00000000 00000000
FFFF0000 FFFF0000
@@ -1556,8 +1556,8 @@ FFFFFFFF
00000000 00000000
00010000 00010000
00020000 00020000
00060001 0006FFFE
00010000 0001FFFF
00010000 00010000
FFFF0000 FFFF0000
00000001 00000001
@@ -1619,9 +1619,9 @@ FFFFFFFE
00000001 00000001
0000FFFF 0000FFFF
00010000 00010000
0001FFFF 0001FFFE
0004FFFB 0001FFFA
0002FFFF 0002FFFE
00010000 00010000
FFFF0000 FFFF0000
FFFF0000 FFFF0000
@@ -1682,9 +1682,9 @@ FFFF0000
00000000 00000000
00000001 00000001
00000001 00000001
00000000
FFFF0000 FFFF0000
FFFEFFFA FFFF0000
FFFBFFFC
FFFFFFFF FFFFFFFF
FFFF0000 FFFF0000
0000FFFF 0000FFFF
@@ -1747,9 +1747,9 @@ FFFFFFFF
00000000 00000000
0000FFFF 0000FFFF
FFFF0001 FFFF0001
FFFF0000 FFFF0001
FFFA0000 FFFA0003
FFFE0000 FFFF0001
FFFF0000 FFFF0000
00000000 00000000
00000001 00000001
@@ -1811,74 +1811,74 @@ FFFF0001
00010000 00010000
0000FFFF 0000FFFF
00000000 00000000
FFFF0002
FFFD0005
FFFF0001
00000001
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000001
00000000
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
0000FFFF
00010000
FFFF0000
0001FFFF
0000FFFF
0001FFFF
00000000
0000FFFF
00000001
00010002
00030005
00000002 00000002
00000006
FFFF0002
00010001 00010001
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000001
00000000
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
0000FFFF
00010000
FFFF0000
0001FFFF
0000FFFF
0001FFFF
00000000
0000FFFF
00010000
00020001
00060002
00000001
00010000
0001FFFF 0001FFFF
00000000 00000000
00000000 00000000
@@ -1939,9 +1939,9 @@ FFFF0000
00000000 00000000
0000FFFF 0000FFFF
0001FFFF 0001FFFF
0001FFFF 0000FFFE
00070000 0005FFFC
00000000 0000FFFF
00010001 00010001
FFFF0000 FFFF0000
0000FFFF 0000FFFF
@@ -2003,9 +2003,9 @@ FFFF0000
00000001 00000001
00000000 00000000
0000FFFF 0000FFFF
0001FFFF
0002FFF9
0000FFFF 0000FFFF
FFFDFFF9
FFFFFFFF
FFFFFFFF FFFFFFFF
00000000 00000000
00000000 00000000
@@ -1099,7 +1099,7 @@ FFFF0000
00000000 00000000
00000002 00000002
FFFF0003 FFFF0003
FFFE0012 FFFF0012
00000003 00000003
FFFF0002 FFFF0002
00010001 00010001
@@ -1163,7 +1163,7 @@ FFFF0000
00010001 00010001
00010002 00010002
00020003 00020003
000C000D 000D000C
00030003 00030003
00000001 00000001
00000001 00000001
@@ -1226,9 +1226,9 @@ FFFF0000
00000000 00000000
FFFF0000 FFFF0000
00020000 00020000
00030000 0003FFFF
00110004 00120002
00030000 0003FFFF
00020000 00020000
00000000 00000000
FFFF0000 FFFF0000
@@ -1291,8 +1291,8 @@ FFFF0000
00010000 00010000
0002FFFF 0002FFFF
0003FFFE 0003FFFE
000FFFF6 000EFFF4
0004FFFF 0003FFFE
0002FFFF 0002FFFF
00000000 00000000
FFFF0000 FFFF0000
@@ -1312,8 +1312,8 @@ FFFF0000
00010000 00010000
00000001 00000001
0000FFFF 0000FFFF
00000000
00010000 00010000
00010001
FFFF0000 FFFF0000
00000001 00000001
0000FFFF 0000FFFF
@@ -1353,10 +1353,10 @@ FFFF0000
00010001 00010001
0001FFFF 0001FFFF
00010000 00010000
0001FFFE 0000FFFE
0001FFFD 0000FFFD
0006FFF0 0003FFEF
0001FFFD 0000FFFD
0000FFFE 0000FFFE
00000000 00000000
00010000 00010000
@@ -1376,7 +1376,7 @@ FFFF0000
0000FFFF 0000FFFF
00010000 00010000
00000001 00000001
00010001 00010002
00000000 00000000
00000001 00000001
00000000 00000000
@@ -1418,10 +1418,10 @@ FFFF0000
0000FFFF 0000FFFF
FFFF0000 FFFF0000
FFFFFFFE FFFFFFFE
FFFEFFFD FFFDFFFD
FFF9FFF1 FFF5FFF2
FFFEFFFD FFFEFFFE
FFFFFFFF FFFE0000
FFFF0000 FFFF0000
00000001 00000001
FFFF0000 FFFF0000
@@ -1439,8 +1439,8 @@ FFFF0000
0000FFFF 0000FFFF
00010001 00010001
FFFF0000 FFFF0000
FFFF0001 FFFF0000
FFFF0001 FFFF0000
00000000 00000000
00000000 00000000
00000001 00000001
@@ -1482,10 +1482,10 @@ FFFF0000
00000000 00000000
00000000 00000000
FFFF0000 FFFF0000
FFFCFFFF FFFC0000
FFEFFFF9 FFEEFFFE
FFFCFFFF FFFC0000
FFFF0000 FFFF0001
00000000 00000000
00000000 00000000
FFFF0000 FFFF0000
@@ -1504,7 +1504,7 @@ FFFF0000
00000000 00000000
00000000 00000000
00000000 00000000
FFFFFFFF 0000FFFF
FFFF0001 FFFF0001
00000000 00000000
00010000 00010000
@@ -1546,10 +1546,10 @@ FFFFFFFF
00000000 00000000
FFFFFFFF FFFFFFFF
FFFE0001 FFFE0001
FFFD0001 FFFD0002
FFEF0006 FFF1000B
FFFD0001 FFFD0002
FFFF0000 FFFF0001
00000000 00000000
FFFFFFFF FFFFFFFF
00010000 00010000
@@ -1609,77 +1609,77 @@ FFFF0001
00000000 00000000
00000001 00000001
00000000 00000000
FFFF0002
FFFE0003
FFF7000E
FFFF0005
FFFF0001
0001FFFF
00000000
00000001
0000FFFF
00000000
00000000
FFFF0000
00010000
00010000
FFFF0000
FFFF0000
0000FFFF
00000000
00000000
00010000
00000000
00000000
00010000
00020001
00000000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000001
00000001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
0000FFFF
00010000
FFFF0000
0001FFFF
00010001
00000000
FFFF0001
00010000
0000FFFF
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
00000001
00000000
FFFF0000
FFFF0000
00000000
0000FFFF
00000001
00000002 00000002
00000003 FFFF0004
00050012 FFFC0010
00010003 00000005
00000001
0001FFFF
00000000
00000001
0000FFFF
00000000
00000000
FFFF0000
00010000
00010000
FFFF0000
FFFF0000
0000FFFF
00000000
00000000
00010000
00000000
00000000
00010000
00010002 00010002
00000000 00000000
00000000 00000000
00000000
FFFF0000
00000000
00000000
00010000
00000001
00000001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
0000FFFF
00010000
FFFF0000
0001FFFF
00010001
00000000
FFFF0001
00010000
0000FFFF
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
00000001
00000000
FFFF0000
FFFF0000
00000000
0000FFFF
00000001
00000002
00010003
000B000F
00020003
00020002
00000000
00000000
00000001 00000001
00000001 00000001
00000001 00000001
@@ -1696,9 +1696,9 @@ FFFFFFFF
00000000 00000000
0000FFFF 0000FFFF
00000000 00000000
00000002 FFFF0001
00010000 0000FFFF
00000000 FFFF0000
00000000 00000000
00000000 00000000
00000000 00000000
@@ -1737,160 +1737,160 @@ FFFFFFFF
00000000 00000000
00000000 00000000
00000001 00000001
00020001
00030000
00110004
00040000
00020000
00000000
00000000
00000000
0000FFFF
00000001
00000000
00000001
00000000
00000000
00000000
00000001
FFFFFFFF
0000FFFF
FFFF0000
00000000
FFFF0000
00000001
00000000
0000FFFF
FFFFFFFF
00000000
00000000
FFFF0000
FFFF0000
0000FFFF
00010000
00000001
00010000
00010001
00000000
0000FFFF
00000001
00000000
FFFF0001
00010001
00000000
00000000
00000000
00000000
FFFFFFFF
FFFF0000
00000000
00010001
00010000
FFFFFFFF
00000000
00000001
00000000
00000000
00000000
00000000
00000000
00010000
00000000
FFFF0000
0000FFFF
0000FFFF
00000000
00000000
0001FFFF
0004FFFE
000FFFF7
0004FFFE
00010000
FFFF0001
0000FFFF
00010000
0000FFFF
00000000
FFFF0001
00000000
FFFF0000
00010000
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000000
00010000
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
0000FFFE
0001FFFB
0005FFEF
0000FFFC
0001FFFE
0000FFFF
0001FFFF
00000000
0000FFFF
00000000
00010001
00000000
FFFF0001
00000000
0001FFFF
00000000
00000000
00010000
FFFF0000
00000000
0001FFFF
00000000
00000001
00020002 00020002
00030001
000E000A
00040001
00020001
00000000
00000000
00000000 00000000
0000FFFF 0000FFFF
00000001
00000000
00000001
00000000
00000000
00000000
00000001
FFFFFFFF
0000FFFF
FFFF0000
00000000
FFFF0000
00000001
00000000
FFFFFFFF
FFFFFFFF
00000000
00000000
FFFF0000
FFFF0000
0000FFFF
00010000
00000001
00010000
00010001
00000000
0000FFFF
00000001
00000000
FFFF0001
00010001
00000000
00000000
00000000
00000000
FFFFFFFF
FFFF0000
00000000
00010001
00010000
FFFFFFFF
00000000
00000001
00000000
00000000
00000000
00000000
00000000
00010000
00000000
FFFF0000
0000FFFF
0000FFFF
00000000
00000000
00020000
00050000
0012FFFE
00040000
00020000
FFFF0001
0000FFFF
00010000
0000FFFF
00000000
FFFF0001
00000000
FFFF0000
00010000
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000000
0000FFFF
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
0000FFFE
0003FFFC
000CFFF3
0001FFFD
0002FFFE
0000FFFF
0001FFFF
00000000
0000FFFF
00000000
00010001
00000000
FFFF0001
00000000
0001FFFF
00000000
00000000
00010000
FFFF0000
00000000
0001FFFF
00010000
00000000
00030001
00000000
0001FFFF
00000000 00000000
00000000 00000000
0000FFFF 0000FFFF
@@ -1929,78 +1929,78 @@ FFFF0000
FFFF0000 FFFF0000
00000000 00000000
00000000 00000000
0000FFFE
FFFFFFFD
FFFFFFEE
FFFFFFFC
FFFFFFFE FFFFFFFE
00000000
FFFF0000
00000000
0000FFFF
0000FFFF
FFFFFFFF
00000000
FFFF0000
00000001
FFFF0000
0000FFFF
00000000
00000000
00000000
00010000
FFFF0000
00000000
00000000
00010001
00000000
00000000
0000FFFF
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
00000000
00000000
00000000
00010000
00000000
00000001
00000000
FFFF0000
00000000
00000001
00010000
00000000
00000001
00010000
00000000
FFFF0000
00000001
00000000
00000000
00000000
00000000
00000000
00000001
00010000
00000000
00000000
0001FFFF
0000FFFF
00010000
FFFF0000
FFFFFFFF
FFFEFFFE
FFF3FFF3
FFFEFFFD FFFEFFFD
FFF7FFF1
FFFEFFFD
FFFEFFFE
00000000
FFFF0000
00000000
0000FFFF
0000FFFF
FFFFFFFF FFFFFFFF
00000000 00000000
FFFF0000 FFFF0000
00000001 00000001
FFFF0000
0000FFFF
00000000
00000000
00000000
00010000
FFFF0000
00000000
00000000
FFFF0000
00000000
00000000
0000FFFF
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
00000000
00000000
00000000
00010000
00000000
00000001
00000000
FFFF0000
00000000
00000001
00010000
00000000
00000001
00010000
00000000
FFFF0000
00000001
00000000
00000000
00000000
00000000
00000000
00000001
00010000
00000000
00000000
0001FFFF
0000FFFF
00010000
FFFF0000
FFFF0000
FFFEFFFF
FFEEFFFB
FFFDFFFE
FFFEFFFF
00000000
FFFF0000
00000001
00000000 00000000
00000000 00000000
00000001 00000001
@@ -2016,7 +2016,7 @@ FFFF0001
00010000 00010000
00000000 00000000
0001FFFF 0001FFFF
FFFE0000 FFFFFFFF
00000001 00000001
00000000 00000000
00010000 00010000
+60 -44
View File
@@ -1075,44 +1075,43 @@ class RangeBinDecimator:
# ============================================================================= # =============================================================================
# Doppler Processor (Hamming window + 32-point FFT) # Doppler Processor (Hamming window + dual 16-point FFT)
# ============================================================================= # =============================================================================
# Hamming window LUT (32 entries, 16-bit unsigned Q15) # Hamming window LUT (16 entries, 16-bit unsigned Q15)
# Matches doppler_processor.v window_coeff[0:15]
# w[n] = 0.54 - 0.46 * cos(2*pi*n/15), n=0..15, symmetric
HAMMING_WINDOW = [ HAMMING_WINDOW = [
0x0800, 0x0862, 0x09CB, 0x0C3B, 0x0FB2, 0x142F, 0x19B2, 0x2039, 0x0A3D, 0x0E5C, 0x1B6D, 0x3088, 0x4B33, 0x6573, 0x7642, 0x7F62,
0x27C4, 0x3050, 0x39DB, 0x4462, 0x4FE3, 0x5C5A, 0x69C4, 0x781D, 0x7F62, 0x7642, 0x6573, 0x4B33, 0x3088, 0x1B6D, 0x0E5C, 0x0A3D,
0x7FFF, 0x781D, 0x69C4, 0x5C5A, 0x4FE3, 0x4462, 0x39DB, 0x3050,
0x27C4, 0x2039, 0x19B2, 0x142F, 0x0FB2, 0x0C3B, 0x09CB, 0x0862,
] ]
class DopplerProcessor: class DopplerProcessor:
""" """
Bit-accurate model of doppler_processor_optimized.v Bit-accurate model of doppler_processor_optimized.v (dual 16-pt FFT architecture).
For each range bin (0-63): The staggered-PRF frame has 32 chirps total:
1. Read 32 chirps of data from accumulation buffer - Sub-frame 0 (long PRI): chirps 0-15 -> 16-pt Hamming -> 16-pt FFT -> bins 0-15
2. Apply Hamming window (Q15 multiply, round, >>>15) - Sub-frame 1 (short PRI): chirps 16-31 -> 16-pt Hamming -> 16-pt FFT -> bins 16-31
3. 32-point FFT
The 32-point FFT uses xfft_32.v (Xilinx IP wrapper around fft_engine). Output: doppler_bin[4:0] = {sub_frame_id, bin_in_subframe[3:0]}
For the Python model, we use FFTEngine with N=32. Total output per range bin: 32 bins (16 + 16), same interface as before.
""" """
DOPPLER_FFT_SIZE = 32 DOPPLER_FFT_SIZE = 16 # Per sub-frame
RANGE_BINS = 64 RANGE_BINS = 64
CHIRPS_PER_FRAME = 32 CHIRPS_PER_FRAME = 32
CHIRPS_PER_SUBFRAME = 16
def __init__(self, twiddle_file_32=None): def __init__(self, twiddle_file_16=None):
""" """
For 32-point FFT, we need the 32-point twiddle file. For 16-point FFT, we need the 16-point twiddle file.
If not provided, we generate twiddle factors mathematically If not provided, we generate twiddle factors mathematically
(since the 32-pt twiddle ROM is cos(2*pi*k/32) for k=0..7). (cos(2*pi*k/16) for k=0..3, quarter-wave ROM with 4 entries).
""" """
self.fft32 = None self.fft16 = None
self._twiddle_file_32 = twiddle_file_32 self._twiddle_file_16 = twiddle_file_16
# We'll use a simple 32-pt FFT with computed twiddles
@staticmethod @staticmethod
def window_multiply(data_16, window_16): def window_multiply(data_16, window_16):
@@ -1134,7 +1133,7 @@ class DopplerProcessor:
def process_frame(self, chirp_data_i, chirp_data_q): def process_frame(self, chirp_data_i, chirp_data_q):
""" """
Process one complete Doppler frame. Process one complete Doppler frame using dual 16-pt FFTs.
Args: Args:
chirp_data_i: 2D array [32 chirps][64 range bins] of signed 16-bit I chirp_data_i: 2D array [32 chirps][64 range bins] of signed 16-bit I
@@ -1143,46 +1142,63 @@ class DopplerProcessor:
Returns: Returns:
(doppler_map_i, doppler_map_q): 2D arrays [64 range bins][32 doppler bins] (doppler_map_i, doppler_map_q): 2D arrays [64 range bins][32 doppler bins]
of signed 16-bit of signed 16-bit
Bins 0-15 = sub-frame 0 (long PRI)
Bins 16-31 = sub-frame 1 (short PRI)
""" """
doppler_map_i = [] doppler_map_i = []
doppler_map_q = [] doppler_map_q = []
# Generate 32-pt twiddle factors (quarter-wave cos, 8 entries) # Generate 16-pt twiddle factors (quarter-wave cos, 4 entries)
# cos(2*pi*k/32) for k=0..7 # cos(2*pi*k/16) for k=0..3
# Matches fft_twiddle_16.mem: 7FFF, 7641, 5A82, 30FB
import math import math
cos_rom_32 = [] cos_rom_16 = []
for k in range(8): for k in range(4):
val = round(32767.0 * math.cos(2.0 * math.pi * k / 32.0)) val = round(32767.0 * math.cos(2.0 * math.pi * k / 16.0))
cos_rom_32.append(sign_extend(val & 0xFFFF, 16)) cos_rom_16.append(sign_extend(val & 0xFFFF, 16))
fft32 = FFTEngine.__new__(FFTEngine) fft16 = FFTEngine.__new__(FFTEngine)
fft32.N = 32 fft16.N = 16
fft32.LOG2N = 5 fft16.LOG2N = 4
fft32.cos_rom = cos_rom_32 fft16.cos_rom = cos_rom_16
fft32.mem_re = [0] * 32 fft16.mem_re = [0] * 16
fft32.mem_im = [0] * 32 fft16.mem_im = [0] * 16
for rbin in range(self.RANGE_BINS): for rbin in range(self.RANGE_BINS):
# Gather 32 chirps for this range bin # Output bins for this range bin: 32 total (16 from each sub-frame)
out_re = [0] * 32
out_im = [0] * 32
# Process each sub-frame independently
for sf in range(2):
chirp_start = sf * self.CHIRPS_PER_SUBFRAME
bin_offset = sf * self.DOPPLER_FFT_SIZE
fft_in_re = [] fft_in_re = []
fft_in_im = [] fft_in_im = []
for chirp in range(self.CHIRPS_PER_FRAME): for c in range(self.CHIRPS_PER_SUBFRAME):
chirp = chirp_start + c
re_val = sign_extend(chirp_data_i[chirp][rbin] & 0xFFFF, 16) re_val = sign_extend(chirp_data_i[chirp][rbin] & 0xFFFF, 16)
im_val = sign_extend(chirp_data_q[chirp][rbin] & 0xFFFF, 16) im_val = sign_extend(chirp_data_q[chirp][rbin] & 0xFFFF, 16)
# Apply Hamming window # Apply 16-pt Hamming window (index = c within sub-frame)
win_re = self.window_multiply(re_val, HAMMING_WINDOW[chirp]) win_re = self.window_multiply(re_val, HAMMING_WINDOW[c])
win_im = self.window_multiply(im_val, HAMMING_WINDOW[chirp]) win_im = self.window_multiply(im_val, HAMMING_WINDOW[c])
fft_in_re.append(win_re) fft_in_re.append(win_re)
fft_in_im.append(win_im) fft_in_im.append(win_im)
# 32-point forward FFT # 16-point forward FFT
fft_out_re, fft_out_im = fft32.compute(fft_in_re, fft_in_im, inverse=False) fft_out_re, fft_out_im = fft16.compute(fft_in_re, fft_in_im, inverse=False)
doppler_map_i.append(fft_out_re) # Pack into output: sub-frame 0 -> bins 0-15, sub-frame 1 -> bins 16-31
doppler_map_q.append(fft_out_im) for b in range(self.DOPPLER_FFT_SIZE):
out_re[bin_offset + b] = fft_out_re[b]
out_im[bin_offset + b] = fft_out_im[b]
doppler_map_i.append(out_re)
doppler_map_q.append(out_im)
return doppler_map_i, doppler_map_q return doppler_map_i, doppler_map_q
@@ -1207,7 +1223,7 @@ class SignalChain:
IF_FREQ = 120_000_000 # IF frequency IF_FREQ = 120_000_000 # IF frequency
FTW_120MHZ = 0x4CCCCCCD # Phase increment for 120 MHz at 400 MSPS FTW_120MHZ = 0x4CCCCCCD # Phase increment for 120 MHz at 400 MSPS
def __init__(self, twiddle_file_1024=None, twiddle_file_32=None): def __init__(self, twiddle_file_1024=None, twiddle_file_16=None):
self.nco = NCO() self.nco = NCO()
self.mixer = Mixer() self.mixer = Mixer()
self.cic_i = CICDecimator() self.cic_i = CICDecimator()
@@ -1217,7 +1233,7 @@ class SignalChain:
self.ddc_interface = DDCInputInterface() self.ddc_interface = DDCInputInterface()
self.matched_filter = MatchedFilterChain(fft_size=1024, twiddle_file=twiddle_file_1024) self.matched_filter = MatchedFilterChain(fft_size=1024, twiddle_file=twiddle_file_1024)
self.range_decimator = RangeBinDecimator() self.range_decimator = RangeBinDecimator()
self.doppler = DopplerProcessor(twiddle_file_32=twiddle_file_32) self.doppler = DopplerProcessor(twiddle_file_16=twiddle_file_16)
def ddc_step(self, adc_data_8bit, ftw=None): def ddc_step(self, adc_data_8bit, ftw=None):
""" """
@@ -3,23 +3,17 @@
Generate Doppler processor co-simulation golden reference data. Generate Doppler processor co-simulation golden reference data.
Uses the bit-accurate Python model (fpga_model.py) to compute the expected Uses the bit-accurate Python model (fpga_model.py) to compute the expected
Doppler FFT output. Also generates the input hex files consumed by the Doppler FFT output for the dual 16-pt FFT architecture. Also generates the
Verilog testbench (tb_doppler_cosim.v). input hex files consumed by the Verilog testbench (tb_doppler_cosim.v).
Two output modes: Architecture:
1. "clean" — straight Python model (correct windowing alignment) Sub-frame 0 (long PRI): chirps 0-15 -> 16-pt Hamming -> 16-pt FFT -> bins 0-15
2. "buggy" — replicates the RTL's windowing pipeline misalignment: Sub-frame 1 (short PRI): chirps 16-31 -> 16-pt Hamming -> 16-pt FFT -> bins 16-31
* Sample 0: fft_input = 0 (from reset mult value)
* Sample 1: fft_input = window_multiply(data[wrong_rbin_or_0], window[0])
* Sample k (k>=2): fft_input = window_multiply(data[k-2], window[k-1])
Default mode is "clean". The comparison script uses correlation-based
metrics that are tolerant of the pipeline shift.
Usage: Usage:
cd ~/PLFM_RADAR/9_Firmware/9_2_FPGA/tb/cosim cd ~/PLFM_RADAR/9_Firmware/9_2_FPGA/tb/cosim
python3 gen_doppler_golden.py # clean model python3 gen_doppler_golden.py
python3 gen_doppler_golden.py --buggy # replicate RTL pipeline bug python3 gen_doppler_golden.py stationary # single scenario
Author: Phase 0.5 Doppler co-simulation suite for PLFM_RADAR Author: Phase 0.5 Doppler co-simulation suite for PLFM_RADAR
""" """
@@ -31,7 +25,7 @@ import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from fpga_model import ( from fpga_model import (
DopplerProcessor, FFTEngine, sign_extend, HAMMING_WINDOW DopplerProcessor, sign_extend, HAMMING_WINDOW
) )
from radar_scene import Target, generate_doppler_frame from radar_scene import Target, generate_doppler_frame
@@ -40,7 +34,8 @@ from radar_scene import Target, generate_doppler_frame
# Constants # Constants
# ============================================================================= # =============================================================================
DOPPLER_FFT_SIZE = 32 DOPPLER_FFT_SIZE = 16 # Per sub-frame
DOPPLER_TOTAL_BINS = 32 # Total output (2 sub-frames x 16)
RANGE_BINS = 64 RANGE_BINS = 64
CHIRPS_PER_FRAME = 32 CHIRPS_PER_FRAME = 32
TOTAL_SAMPLES = CHIRPS_PER_FRAME * RANGE_BINS # 2048 TOTAL_SAMPLES = CHIRPS_PER_FRAME * RANGE_BINS # 2048
@@ -82,154 +77,6 @@ def write_hex_16bit(filepath, data):
# Buggy-model helpers (match RTL pipeline misalignment) # Buggy-model helpers (match RTL pipeline misalignment)
# ============================================================================= # =============================================================================
def window_multiply(data_16, window_16):
"""Hamming window multiply matching RTL."""
d = sign_extend(data_16 & 0xFFFF, 16)
w = sign_extend(window_16 & 0xFFFF, 16)
product = d * w
rounded = product + (1 << 14)
result = rounded >> 15
return sign_extend(result & 0xFFFF, 16)
def buggy_process_frame(chirp_data_i, chirp_data_q):
"""
Replicate the RTL's exact windowing pipeline for all 64 range bins.
For each range bin we model the three-stage pipeline:
Stage A (BRAM registered read):
mem_rdata captures doppler_i_mem[mem_read_addr] one cycle AFTER
mem_read_addr is presented.
Stage B (multiply):
mult_i <= mem_rdata_i * window_coeff[read_doppler_index]
-- read_doppler_index is the CURRENT cycle's value, but mem_rdata_i
-- is from the PREVIOUS cycle's address.
Stage C (round+shift):
fft_input_i <= (mult_i + (1<<14)) >>> 15
-- uses the PREVIOUS cycle's mult_i.
Additionally, at the S_ACCUMULATE->S_LOAD_FFT transition (rbin=0) or
S_OUTPUT->S_LOAD_FFT transition (rbin>0), the BRAM address during the
transition cycle depends on the stale read_doppler_index and read_range_bin
values.
This function models every detail to produce bit-exact FFT inputs.
"""
# Build the 32-pt FFT engine (matching fpga_model.py)
import math as _math
cos_rom_32 = []
for k in range(8):
val = round(32767.0 * _math.cos(2.0 * _math.pi * k / 32.0))
cos_rom_32.append(sign_extend(val & 0xFFFF, 16))
fft32 = FFTEngine.__new__(FFTEngine)
fft32.N = 32
fft32.LOG2N = 5
fft32.cos_rom = cos_rom_32
fft32.mem_re = [0] * 32
fft32.mem_im = [0] * 32
# Build flat BRAM contents: addr = chirp_index * 64 + range_bin
bram_i = [0] * TOTAL_SAMPLES
bram_q = [0] * TOTAL_SAMPLES
for chirp in range(CHIRPS_PER_FRAME):
for rb in range(RANGE_BINS):
addr = chirp * RANGE_BINS + rb
bram_i[addr] = sign_extend(chirp_data_i[chirp][rb] & 0xFFFF, 16)
bram_q[addr] = sign_extend(chirp_data_q[chirp][rb] & 0xFFFF, 16)
doppler_map_i = []
doppler_map_q = []
# State carried across range bins (simulates the RTL registers)
# After reset: read_doppler_index=0, read_range_bin=0, mult_i=0, mult_q=0,
# fft_input_i=0, fft_input_q=0
# The BRAM read is always active: mem_rdata <= doppler_i_mem[mem_read_addr]
# mem_read_addr = read_doppler_index * 64 + read_range_bin
# We need to track what read_doppler_index and read_range_bin are at each
# transition, since the BRAM captures data one cycle before S_LOAD_FFT runs.
# Before processing starts (just entered S_LOAD_FFT from S_ACCUMULATE):
# At the S_ACCUMULATE clock that transitions:
# read_doppler_index <= 0 (NBA)
# read_range_bin <= 0 (NBA)
# These take effect NEXT cycle. At the transition clock itself,
# read_doppler_index and read_range_bin still had their old values.
# From reset, both were 0. So BRAM captures addr=0*64+0=0.
#
# For rbin>0 transitions from S_OUTPUT:
# At S_OUTPUT clock:
# read_doppler_index <= 0 (was 0, since it wrapped from 32->0 in 5 bits)
# read_range_bin <= prev_rbin + 1 (NBA, takes effect next cycle)
# At S_OUTPUT clock, the current read_range_bin = prev_rbin,
# read_doppler_index = 0 (wrapped). So BRAM captures addr=0*64+prev_rbin.
for rbin in range(RANGE_BINS):
# Determine what BRAM data was captured during the transition clock
# (one cycle before S_LOAD_FFT's first execution cycle).
if rbin == 0:
# From S_ACCUMULATE: both indices were 0 (from reset or previous NBA)
# BRAM captures addr = 0*64+0 = 0 -> data[chirp=0][rbin=0]
transition_bram_addr = 0 * RANGE_BINS + 0
else:
# From S_OUTPUT: read_doppler_index=0 (wrapped), read_range_bin=rbin-1
# BRAM captures addr = 0*64+(rbin-1) -> data[chirp=0][rbin-1]
transition_bram_addr = 0 * RANGE_BINS + (rbin - 1)
transition_data_i = bram_i[transition_bram_addr]
transition_data_q = bram_q[transition_bram_addr]
# Now simulate the 32 cycles of S_LOAD_FFT for this range bin.
# Register pipeline state at entry:
mult_i_reg = 0 # From reset (rbin=0) or from end of previous S_FFT_WAIT
mult_q_reg = 0
fft_in_i_list = []
fft_in_q_list = []
for k in range(DOPPLER_FFT_SIZE):
# read_doppler_index = k at this cycle's start
# mem_read_addr = k * 64 + rbin
# What mem_rdata holds THIS cycle:
if k == 0:
# BRAM captured transition_bram_addr last cycle
rd_i = transition_data_i
rd_q = transition_data_q
else:
# BRAM captured addr from PREVIOUS cycle: (k-1)*64 + rbin
prev_addr = (k - 1) * RANGE_BINS + rbin
rd_i = bram_i[prev_addr]
rd_q = bram_q[prev_addr]
# Stage B: multiply (uses current read_doppler_index = k)
new_mult_i = sign_extend(rd_i & 0xFFFF, 16) * \
sign_extend(HAMMING_WINDOW[k] & 0xFFFF, 16)
new_mult_q = sign_extend(rd_q & 0xFFFF, 16) * \
sign_extend(HAMMING_WINDOW[k] & 0xFFFF, 16)
# Stage C: round+shift (uses PREVIOUS cycle's mult)
fft_i = (mult_i_reg + (1 << 14)) >> 15
fft_q = (mult_q_reg + (1 << 14)) >> 15
fft_in_i_list.append(sign_extend(fft_i & 0xFFFF, 16))
fft_in_q_list.append(sign_extend(fft_q & 0xFFFF, 16))
# Update pipeline registers for next cycle
mult_i_reg = new_mult_i
mult_q_reg = new_mult_q
# 32-point FFT
fft_out_re, fft_out_im = fft32.compute(
fft_in_i_list, fft_in_q_list, inverse=False
)
doppler_map_i.append(fft_out_re)
doppler_map_q.append(fft_out_im)
return doppler_map_i, doppler_map_q
# ============================================================================= # =============================================================================
# Test scenario definitions # Test scenario definitions
@@ -244,9 +91,10 @@ def make_scenario_stationary():
def make_scenario_moving(): def make_scenario_moving():
"""Single target with moderate Doppler shift.""" """Single target with moderate Doppler shift."""
# v = 15 m/s → fd = 2*v*fc/c ≈ 1050 Hz # v = 15 m/s → fd = 2*v*fc/c ≈ 1050 Hz
# PRI = 167 us → Doppler bin = fd * N_chirps * PRI = 1050 * 32 * 167e-6 ≈ 5.6 # Long PRI = 167 us → sub-frame 0 bin = fd * 16 * 167e-6 ≈ 2.8 → bin ~3
# Short PRI = 175 us → sub-frame 1 bin = fd * 16 * 175e-6 ≈ 2.9 → bin 16+3 = 19
targets = [Target(range_m=500, velocity_mps=15.0, rcs_dbsm=20.0)] targets = [Target(range_m=500, velocity_mps=15.0, rcs_dbsm=20.0)]
return targets, "Single moving target v=15m/s (~1050Hz Doppler, bin~5-6)" return targets, "Single moving target v=15m/s (~1050Hz Doppler, sf0 bin~3, sf1 bin~19)"
def make_scenario_two_targets(): def make_scenario_two_targets():
@@ -269,12 +117,11 @@ SCENARIOS = {
# Main generator # Main generator
# ============================================================================= # =============================================================================
def generate_scenario(name, targets, description, base_dir, use_buggy_model=False): def generate_scenario(name, targets, description, base_dir):
"""Generate input hex + golden output for one scenario.""" """Generate input hex + golden output for one scenario."""
print(f"\n{'='*60}") print(f"\n{'='*60}")
print(f"Scenario: {name}{description}") print(f"Scenario: {name}{description}")
model_label = "BUGGY (RTL pipeline)" if use_buggy_model else "CLEAN" print(f"Model: CLEAN (dual 16-pt FFT)")
print(f"Model: {model_label}")
print(f"{'='*60}") print(f"{'='*60}")
# Generate Doppler frame (32 chirps x 64 range bins) # Generate Doppler frame (32 chirps x 64 range bins)
@@ -292,26 +139,24 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
input_hex = os.path.join(base_dir, f"doppler_input_{name}.hex") input_hex = os.path.join(base_dir, f"doppler_input_{name}.hex")
write_hex_32bit(input_hex, packed_samples) write_hex_32bit(input_hex, packed_samples)
# ---- Run through Python model ---- # ---- Run through Python model (dual 16-pt FFT) ----
if use_buggy_model:
doppler_i, doppler_q = buggy_process_frame(frame_i, frame_q)
else:
dp = DopplerProcessor() dp = DopplerProcessor()
doppler_i, doppler_q = dp.process_frame(frame_i, frame_q) doppler_i, doppler_q = dp.process_frame(frame_i, frame_q)
print(f" Doppler output: {len(doppler_i)} range bins x " print(f" Doppler output: {len(doppler_i)} range bins x "
f"{len(doppler_i[0])} doppler bins") f"{len(doppler_i[0])} doppler bins (2 sub-frames x {DOPPLER_FFT_SIZE})")
# ---- Write golden output CSV ---- # ---- Write golden output CSV ----
# Format: range_bin, doppler_bin, out_i, out_q # Format: range_bin, doppler_bin, out_i, out_q
# Ordered same as RTL output: all doppler bins for rbin 0, then rbin 1, ... # Ordered same as RTL output: all doppler bins for rbin 0, then rbin 1, ...
# Bins 0-15 = sub-frame 0 (long PRI), bins 16-31 = sub-frame 1 (short PRI)
flat_rbin = [] flat_rbin = []
flat_dbin = [] flat_dbin = []
flat_i = [] flat_i = []
flat_q = [] flat_q = []
for rbin in range(RANGE_BINS): for rbin in range(RANGE_BINS):
for dbin in range(DOPPLER_FFT_SIZE): for dbin in range(DOPPLER_TOTAL_BINS):
flat_rbin.append(rbin) flat_rbin.append(rbin)
flat_dbin.append(dbin) flat_dbin.append(dbin)
flat_i.append(doppler_i[rbin][dbin]) flat_i.append(doppler_i[rbin][dbin])
@@ -331,8 +176,8 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
peak_info = [] peak_info = []
for rbin in range(RANGE_BINS): for rbin in range(RANGE_BINS):
mags = [abs(doppler_i[rbin][d]) + abs(doppler_q[rbin][d]) mags = [abs(doppler_i[rbin][d]) + abs(doppler_q[rbin][d])
for d in range(DOPPLER_FFT_SIZE)] for d in range(DOPPLER_TOTAL_BINS)]
peak_dbin = max(range(DOPPLER_FFT_SIZE), key=lambda d: mags[d]) peak_dbin = max(range(DOPPLER_TOTAL_BINS), key=lambda d: mags[d])
peak_mag = mags[peak_dbin] peak_mag = mags[peak_dbin]
peak_info.append((rbin, peak_dbin, peak_mag)) peak_info.append((rbin, peak_dbin, peak_mag))
@@ -341,33 +186,14 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
for rbin, dbin, mag in peak_info[:5]: for rbin, dbin, mag in peak_info[:5]:
i_val = doppler_i[rbin][dbin] i_val = doppler_i[rbin][dbin]
q_val = doppler_q[rbin][dbin] q_val = doppler_q[rbin][dbin]
print(f" rbin={rbin:2d}, dbin={dbin:2d}, mag={mag:6d}, " sf = dbin // DOPPLER_FFT_SIZE
bin_in_sf = dbin % DOPPLER_FFT_SIZE
print(f" rbin={rbin:2d}, dbin={dbin:2d} (sf{sf}:{bin_in_sf:2d}), mag={mag:6d}, "
f"I={i_val:6d}, Q={q_val:6d}") f"I={i_val:6d}, Q={q_val:6d}")
# ---- Write frame data for debugging ----
# Also write per-range-bin FFT input (for debugging pipeline alignment)
if use_buggy_model:
# Write the buggy FFT inputs for debugging
debug_csv = os.path.join(base_dir, f"doppler_fft_inputs_{name}.csv")
# Regenerate to capture FFT inputs
dp_debug = DopplerProcessor()
clean_i, clean_q = dp_debug.process_frame(frame_i, frame_q)
# Show the difference between clean and buggy
print(f"\n Comparing clean vs buggy model outputs:")
mismatches = 0
for rbin in range(RANGE_BINS):
for dbin in range(DOPPLER_FFT_SIZE):
if (doppler_i[rbin][dbin] != clean_i[rbin][dbin] or
doppler_q[rbin][dbin] != clean_q[rbin][dbin]):
mismatches += 1
total = RANGE_BINS * DOPPLER_FFT_SIZE
print(f" {mismatches}/{total} output samples differ "
f"({100*mismatches/total:.1f}%)")
return { return {
'name': name, 'name': name,
'description': description, 'description': description,
'model': 'buggy' if use_buggy_model else 'clean',
'peak_info': peak_info[:5], 'peak_info': peak_info[:5],
} }
@@ -375,11 +201,9 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
def main(): def main():
base_dir = os.path.dirname(os.path.abspath(__file__)) base_dir = os.path.dirname(os.path.abspath(__file__))
use_buggy = '--buggy' in sys.argv
print("=" * 60) print("=" * 60)
print("Doppler Processor Co-Sim Golden Reference Generator") print("Doppler Processor Co-Sim Golden Reference Generator")
print(f"Model: {'BUGGY (RTL pipeline replication)' if use_buggy else 'CLEAN'}") print(f"Architecture: dual {DOPPLER_FFT_SIZE}-pt FFT ({DOPPLER_TOTAL_BINS} total bins)")
print("=" * 60) print("=" * 60)
scenarios_to_run = list(SCENARIOS.keys()) scenarios_to_run = list(SCENARIOS.keys())
@@ -395,15 +219,14 @@ def main():
results = [] results = []
for name in scenarios_to_run: for name in scenarios_to_run:
targets, description = SCENARIOS[name]() targets, description = SCENARIOS[name]()
r = generate_scenario(name, targets, description, base_dir, r = generate_scenario(name, targets, description, base_dir)
use_buggy_model=use_buggy)
results.append(r) results.append(r)
print(f"\n{'='*60}") print(f"\n{'='*60}")
print("Summary:") print("Summary:")
print(f"{'='*60}") print(f"{'='*60}")
for r in results: for r in results:
print(f" {r['name']:<15s} [{r['model']}] top peak: " print(f" {r['name']:<15s} top peak: "
f"rbin={r['peak_info'][0][0]}, dbin={r['peak_info'][0][1]}, " f"rbin={r['peak_info'][0][0]}, dbin={r['peak_info'][0][1]}, "
f"mag={r['peak_info'][0][2]}") f"mag={r['peak_info'][0][2]}")
+17 -7
View File
@@ -48,19 +48,24 @@ ADC_BITS = 8 # ADC resolution
T_LONG_CHIRP = 30e-6 # 30 us long chirp duration T_LONG_CHIRP = 30e-6 # 30 us long chirp duration
T_SHORT_CHIRP = 0.5e-6 # 0.5 us short chirp T_SHORT_CHIRP = 0.5e-6 # 0.5 us short chirp
T_LISTEN_LONG = 137e-6 # 137 us listening window T_LISTEN_LONG = 137e-6 # 137 us listening window
T_PRI_LONG = 167e-6 # 30 us chirp + 137 us listen
T_PRI_SHORT = 175e-6 # staggered short-PRI sub-frame
N_SAMPLES_LISTEN = int(T_LISTEN_LONG * FS_ADC) # 54800 samples N_SAMPLES_LISTEN = int(T_LISTEN_LONG * FS_ADC) # 54800 samples
# Processing chain # Processing chain
CIC_DECIMATION = 4 CIC_DECIMATION = 4
FFT_SIZE = 1024 FFT_SIZE = 1024
RANGE_BINS = 64 RANGE_BINS = 64
DOPPLER_FFT_SIZE = 32 DOPPLER_FFT_SIZE = 16 # Per sub-frame
DOPPLER_TOTAL_BINS = 32 # Total output bins (2 sub-frames x 16)
CHIRPS_PER_SUBFRAME = 16
CHIRPS_PER_FRAME = 32 CHIRPS_PER_FRAME = 32
# Derived # Derived
RANGE_RESOLUTION = C_LIGHT / (2 * CHIRP_BW) # 7.5 m RANGE_RESOLUTION = C_LIGHT / (2 * CHIRP_BW) # 7.5 m
MAX_UNAMBIGUOUS_RANGE = C_LIGHT * T_LISTEN_LONG / 2 # ~20.55 km MAX_UNAMBIGUOUS_RANGE = C_LIGHT * T_LISTEN_LONG / 2 # ~20.55 km
VELOCITY_RESOLUTION = WAVELENGTH / (2 * CHIRPS_PER_FRAME * T_LONG_CHIRP) VELOCITY_RESOLUTION_LONG = WAVELENGTH / (2 * CHIRPS_PER_SUBFRAME * T_PRI_LONG)
VELOCITY_RESOLUTION_SHORT = WAVELENGTH / (2 * CHIRPS_PER_SUBFRAME * T_PRI_SHORT)
# Short chirp LUT (60 entries, 8-bit unsigned) # Short chirp LUT (60 entries, 8-bit unsigned)
SHORT_CHIRP_LUT = [ SHORT_CHIRP_LUT = [
@@ -384,9 +389,6 @@ def generate_doppler_frame(targets, n_chirps=CHIRPS_PER_FRAME,
break break
return math.sqrt(-2.0 * math.log(u1)) * math.cos(2.0 * math.pi * u2) return math.sqrt(-2.0 * math.log(u1)) * math.cos(2.0 * math.pi * u2)
# Chirp repetition interval (PRI)
t_pri = T_LONG_CHIRP + T_LISTEN_LONG # ~167 us
frame_i = [] frame_i = []
frame_q = [] frame_q = []
@@ -408,8 +410,16 @@ def generate_doppler_frame(targets, n_chirps=CHIRPS_PER_FRAME,
# Amplitude (simplified) # Amplitude (simplified)
amp = target.amplitude / 4.0 amp = target.amplitude / 4.0
# Doppler phase for this chirp # Doppler phase for this chirp.
doppler_phase = 2 * math.pi * target.doppler_hz * chirp_idx * t_pri # The frame uses staggered PRF: chirps 0-15 use the long PRI,
# chirps 16-31 use the short PRI.
if chirp_idx < CHIRPS_PER_SUBFRAME:
slow_time_s = chirp_idx * T_PRI_LONG
else:
slow_time_s = (CHIRPS_PER_SUBFRAME * T_PRI_LONG) + \
((chirp_idx - CHIRPS_PER_SUBFRAME) * T_PRI_SHORT)
doppler_phase = 2 * math.pi * target.doppler_hz * slow_time_s
total_phase = doppler_phase + target.phase_deg * math.pi / 180.0 total_phase = doppler_phase + target.phase_deg * math.pi / 180.0
# Spread across a few bins (sinc-like response from matched filter) # Spread across a few bins (sinc-like response from matched filter)
@@ -91,6 +91,7 @@ doppler_processor_optimized dut (
.doppler_valid(doppler_valid), .doppler_valid(doppler_valid),
.doppler_bin(doppler_bin), .doppler_bin(doppler_bin),
.range_bin(range_bin), .range_bin(range_bin),
.sub_frame(), // Not used in this testbench
.processing_active(processing_active), .processing_active(processing_active),
.frame_complete(frame_complete), .frame_complete(frame_complete),
.status(dut_status) .status(dut_status)
@@ -75,6 +75,7 @@ doppler_processor_optimized dut (
.doppler_valid(doppler_valid), .doppler_valid(doppler_valid),
.doppler_bin(doppler_bin), .doppler_bin(doppler_bin),
.range_bin(range_bin), .range_bin(range_bin),
.sub_frame(), // Not used in this testbench
.processing_active(processing_active), .processing_active(processing_active),
.frame_complete(frame_complete), .frame_complete(frame_complete),
.status(dut_status) .status(dut_status)
+252
View File
@@ -0,0 +1,252 @@
`timescale 1ns / 1ps
// ============================================================================
// xfft_16.v 16-point FFT with AXI-Stream interface
// ============================================================================
// Wraps the synthesizable fft_engine (radix-2 DIT) with the AXI-Stream port
// interface expected by the doppler_processor dual-FFT architecture.
//
// Identical interface to xfft_32.v but with N=16.
//
// Data format: {Q[15:0], I[15:0]} packed 32-bit.
// Config tdata[0]: 1 = forward FFT, 0 = inverse FFT.
// ============================================================================
module xfft_16 (
input wire aclk,
input wire aresetn,
// Configuration channel (AXI-Stream slave)
input wire [7:0] s_axis_config_tdata,
input wire s_axis_config_tvalid,
output wire s_axis_config_tready,
// Data input channel (AXI-Stream slave)
input wire [31:0] s_axis_data_tdata,
input wire s_axis_data_tvalid,
input wire s_axis_data_tlast,
// Data output channel (AXI-Stream master)
output wire [31:0] m_axis_data_tdata,
output wire m_axis_data_tvalid,
output wire m_axis_data_tlast,
input wire m_axis_data_tready
);
// ============================================================================
// PARAMETERS
// ============================================================================
localparam N = 16;
localparam LOG2N = 4;
// ============================================================================
// INTERNAL SIGNALS
// ============================================================================
// FSM states
localparam [2:0] S_IDLE = 3'd0,
S_CONFIG = 3'd1,
S_FEED = 3'd2,
S_WAIT = 3'd3,
S_OUTPUT = 3'd4;
reg [2:0] state;
// Configuration
reg inverse_reg;
// Input buffering
reg signed [15:0] in_buf_re [0:N-1];
reg signed [15:0] in_buf_im [0:N-1];
reg [4:0] in_count;
// Output buffering
reg signed [15:0] out_buf_re [0:N-1];
reg signed [15:0] out_buf_im [0:N-1];
reg [4:0] out_count;
reg [4:0] out_total;
// FFT engine interface
reg fft_start;
reg fft_inverse;
reg signed [15:0] fft_din_re, fft_din_im;
reg fft_din_valid;
wire signed [15:0] fft_dout_re, fft_dout_im;
wire fft_dout_valid;
wire fft_busy;
wire fft_done;
// Feed counter
reg [4:0] feed_count;
// ============================================================================
// FFT ENGINE INSTANCE
// ============================================================================
fft_engine #(
.N(N),
.LOG2N(LOG2N),
.DATA_W(16),
.INTERNAL_W(32),
.TWIDDLE_W(16),
.TWIDDLE_FILE("fft_twiddle_16.mem")
) fft_core (
.clk(aclk),
.reset_n(aresetn),
.start(fft_start),
.inverse(fft_inverse),
.din_re(fft_din_re),
.din_im(fft_din_im),
.din_valid(fft_din_valid),
.dout_re(fft_dout_re),
.dout_im(fft_dout_im),
.dout_valid(fft_dout_valid),
.busy(fft_busy),
.done(fft_done)
);
// ============================================================================
// AXI-STREAM OUTPUTS
// ============================================================================
assign s_axis_config_tready = (state == S_IDLE);
assign m_axis_data_tdata = {out_buf_im[out_count[3:0]], out_buf_re[out_count[3:0]]};
assign m_axis_data_tvalid = (state == S_OUTPUT) && (out_count < N);
assign m_axis_data_tlast = (state == S_OUTPUT) && (out_count == N - 1);
// ============================================================================
// BUFFER WRITE LOGIC separate always block, NO async reset
// ============================================================================
reg in_buf_we;
reg [3:0] in_buf_waddr;
reg signed [15:0] in_buf_wdata_re, in_buf_wdata_im;
reg out_buf_we;
reg [3:0] out_buf_waddr;
reg signed [15:0] out_buf_wdata_re, out_buf_wdata_im;
always @(posedge aclk) begin
if (in_buf_we) begin
in_buf_re[in_buf_waddr] <= in_buf_wdata_re;
in_buf_im[in_buf_waddr] <= in_buf_wdata_im;
end
if (out_buf_we) begin
out_buf_re[out_buf_waddr] <= out_buf_wdata_re;
out_buf_im[out_buf_waddr] <= out_buf_wdata_im;
end
end
// ============================================================================
// MAIN FSM
// ============================================================================
always @(posedge aclk or negedge aresetn) begin
if (!aresetn) begin
state <= S_IDLE;
inverse_reg <= 1'b0;
in_count <= 0;
out_count <= 0;
out_total <= 0;
feed_count <= 0;
fft_start <= 1'b0;
fft_inverse <= 1'b0;
fft_din_re <= 0;
fft_din_im <= 0;
fft_din_valid <= 1'b0;
in_buf_we <= 1'b0;
in_buf_waddr <= 0;
in_buf_wdata_re <= 0;
in_buf_wdata_im <= 0;
out_buf_we <= 1'b0;
out_buf_waddr <= 0;
out_buf_wdata_re <= 0;
out_buf_wdata_im <= 0;
end else begin
fft_start <= 1'b0;
fft_din_valid <= 1'b0;
in_buf_we <= 1'b0;
out_buf_we <= 1'b0;
case (state)
S_IDLE: begin
in_count <= 0;
if (s_axis_config_tvalid) begin
inverse_reg <= ~s_axis_config_tdata[0];
state <= S_FEED;
in_count <= 0;
feed_count <= 0;
end
end
S_FEED: begin
if (in_count < N) begin
if (s_axis_data_tvalid) begin
in_buf_we <= 1'b1;
in_buf_waddr <= in_count[3:0];
in_buf_wdata_re <= s_axis_data_tdata[15:0];
in_buf_wdata_im <= s_axis_data_tdata[31:16];
in_count <= in_count + 1;
end
end else if (feed_count == 0) begin
fft_start <= 1'b1;
fft_inverse <= inverse_reg;
feed_count <= 0;
state <= S_WAIT;
out_total <= 0;
end
end
S_WAIT: begin
if (feed_count < N) begin
fft_din_re <= in_buf_re[feed_count[3:0]];
fft_din_im <= in_buf_im[feed_count[3:0]];
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1;
end
if (fft_dout_valid && out_total < N) begin
out_buf_we <= 1'b1;
out_buf_waddr <= out_total[3:0];
out_buf_wdata_re <= fft_dout_re;
out_buf_wdata_im <= fft_dout_im;
out_total <= out_total + 1;
end
if (fft_done) begin
state <= S_OUTPUT;
out_count <= 0;
end
end
S_OUTPUT: begin
if (m_axis_data_tready || !m_axis_data_tvalid) begin
if (out_count < N) begin
if (m_axis_data_tready) begin
out_count <= out_count + 1;
end
end
if (out_count >= N - 1 && m_axis_data_tready) begin
state <= S_IDLE;
end
end
end
default: state <= S_IDLE;
endcase
end
end
// ============================================================================
// MEMORY INIT (simulation only)
// ============================================================================
`ifdef SIMULATION
integer init_k;
initial begin
for (init_k = 0; init_k < N; init_k = init_k + 1) begin
in_buf_re[init_k] = 0;
in_buf_im[init_k] = 0;
out_buf_re[init_k] = 0;
out_buf_im[init_k] = 0;
end
end
`endif
endmodule