Merge pull request #33 from JJassonn69/fix/staggered-prf-dual16-doppler

Fix staggered-PRF Doppler path using dual 16-point FFT sub-frames
This commit is contained in:
NawfalMotii79
2026-03-27 22:09:08 +01:00
committed by GitHub
18 changed files with 12801 additions and 12657 deletions
+447 -432
View File
@@ -1,11 +1,44 @@
`timescale 1ns / 1ps
// ============================================================================
// doppler_processor.v Staggered-PRF Doppler Processor (CORRECTED)
// ============================================================================
//
// ARCHITECTURE:
// This module implements dual 16-point FFTs for the AERIS-10 staggered-PRF
// waveform. The radar transmits 16 long-PRI chirps followed by 16 short-PRI
// chirps per frame (32 total). Rather than a single 32-point FFT over the
// non-uniformly sampled frame (which is signal-processing invalid), this
// module processes each sub-frame independently:
//
// Sub-frame 0 (long PRI): chirps 0..15 16-pt windowed FFT
// Sub-frame 1 (short PRI): chirps 16..31 16-pt windowed FFT
//
// Each sub-frame produces 16 Doppler bins per range bin. The outputs are
// tagged with a sub_frame bit and the 4-bit bin index is packed into the
// existing 5-bit doppler_bin port as {sub_frame, bin[3:0]}.
//
// This architecture enables downstream staggered-PRF ambiguity resolution:
// the same target velocity maps to DIFFERENT Doppler bins at different PRIs,
// and comparing the two sub-frame results resolves velocity ambiguity.
//
// INTERFACE COMPATIBILITY:
// The port list is a superset of the original module. Existing instantiations
// that don't connect `sub_frame` will still work. The FORMAL ports are
// retained. CHIRPS_PER_FRAME must be 32 (16 per sub-frame).
//
// WINDOW:
// 16-point Hamming window (Q15), symmetric. Computed as:
// w[n] = 0.54 - 0.46 * cos(2*pi*n/15), n=0..15
// ============================================================================
module doppler_processor_optimized #(
parameter DOPPLER_FFT_SIZE = 32,
parameter RANGE_BINS = 64,
parameter CHIRPS_PER_FRAME = 32,
parameter WINDOW_TYPE = 0, // 0=Hamming, 1=Rectangular
parameter DATA_WIDTH = 16
parameter DOPPLER_FFT_SIZE = 16, // FFT size per sub-frame (was 32)
parameter RANGE_BINS = 64,
parameter CHIRPS_PER_FRAME = 32, // Total chirps in frame (16+16)
parameter CHIRPS_PER_SUBFRAME = 16, // Chirps per sub-frame
parameter WINDOW_TYPE = 0, // 0=Hamming, 1=Rectangular
parameter DATA_WIDTH = 16
)(
input wire clk,
input wire reset_n,
@@ -14,62 +47,63 @@ module doppler_processor_optimized #(
input wire new_chirp_frame,
output reg [31:0] doppler_output,
output reg doppler_valid,
output reg [4:0] doppler_bin,
output reg [4:0] doppler_bin, // {sub_frame, bin[3:0]}
output reg [5:0] range_bin,
output wire processing_active,
output wire frame_complete,
output reg [3:0] status
`ifdef FORMAL
,
output wire [2:0] fv_state,
output wire [10:0] fv_mem_write_addr,
output wire [10:0] fv_mem_read_addr,
output wire [5:0] fv_write_range_bin,
output wire [4:0] fv_write_chirp_index,
output wire [5:0] fv_read_range_bin,
output wire [4:0] fv_read_doppler_index,
output wire [9:0] fv_processing_timeout,
output wire fv_frame_buffer_full,
output wire fv_mem_we,
output wire [10:0] fv_mem_waddr_r
`endif
);
// ==============================================
// Window Coefficients (Simple Implementation)
// ==============================================
reg [DATA_WIDTH-1:0] window_coeff [0:31];
output reg sub_frame, // 0=long PRI, 1=short PRI
output wire processing_active,
output wire frame_complete,
output reg [3:0] status
`ifdef FORMAL
,
output wire [2:0] fv_state,
output wire [10:0] fv_mem_write_addr,
output wire [10:0] fv_mem_read_addr,
output wire [5:0] fv_write_range_bin,
output wire [4:0] fv_write_chirp_index,
output wire [5:0] fv_read_range_bin,
output wire [4:0] fv_read_doppler_index,
output wire [9:0] fv_processing_timeout,
output wire fv_frame_buffer_full,
output wire fv_mem_we,
output wire [10:0] fv_mem_waddr_r
`endif
);
// ==============================================
// Window Coefficients 16-point Hamming (Q15)
// ==============================================
// w[n] = 0.54 - 0.46 * cos(2*pi*n/15), n=0..15
// Symmetric: w[n] = w[15-n]
reg [DATA_WIDTH-1:0] window_coeff [0:15];
// Generate window coefficients
integer w;
initial begin
if (WINDOW_TYPE == 0) begin
// Pre-calculated Hamming window (Q15 format)
window_coeff[0] = 16'h0800; window_coeff[1] = 16'h0862;
window_coeff[2] = 16'h09CB; window_coeff[3] = 16'h0C3B;
window_coeff[4] = 16'h0FB2; window_coeff[5] = 16'h142F;
window_coeff[6] = 16'h19B2; window_coeff[7] = 16'h2039;
window_coeff[8] = 16'h27C4; window_coeff[9] = 16'h3050;
window_coeff[10] = 16'h39DB; window_coeff[11] = 16'h4462;
window_coeff[12] = 16'h4FE3; window_coeff[13] = 16'h5C5A;
window_coeff[14] = 16'h69C4; window_coeff[15] = 16'h781D;
window_coeff[16] = 16'h7FFF; // Peak
window_coeff[17] = 16'h781D; window_coeff[18] = 16'h69C4;
window_coeff[19] = 16'h5C5A; window_coeff[20] = 16'h4FE3;
window_coeff[21] = 16'h4462; window_coeff[22] = 16'h39DB;
window_coeff[23] = 16'h3050; window_coeff[24] = 16'h27C4;
window_coeff[25] = 16'h2039; window_coeff[26] = 16'h19B2;
window_coeff[27] = 16'h142F; window_coeff[28] = 16'h0FB2;
window_coeff[29] = 16'h0C3B; window_coeff[30] = 16'h09CB;
window_coeff[31] = 16'h0862;
// 16-point Hamming window, Q15 format
// Computed: round(32767 * (0.54 - 0.46*cos(2*pi*n/15)))
window_coeff[0] = 16'h0A3D; // 0.0800 * 32767 = 2621
window_coeff[1] = 16'h0E5C; // 0.1116 * 32767 = 3676
window_coeff[2] = 16'h1B6D; // 0.2138 * 32767 = 7021
window_coeff[3] = 16'h3088; // 0.3790 * 32767 = 12424
window_coeff[4] = 16'h4B33; // 0.5868 * 32767 = 19251
window_coeff[5] = 16'h6573; // 0.7930 * 32767 = 25971
window_coeff[6] = 16'h7642; // 0.9245 * 32767 = 30274
window_coeff[7] = 16'h7F62; // 0.9932 * 32767 = 32610
window_coeff[8] = 16'h7F62; // symmetric
window_coeff[9] = 16'h7642;
window_coeff[10] = 16'h6573;
window_coeff[11] = 16'h4B33;
window_coeff[12] = 16'h3088;
window_coeff[13] = 16'h1B6D;
window_coeff[14] = 16'h0E5C;
window_coeff[15] = 16'h0A3D;
end else begin
// Rectangular window (all ones)
for (w = 0; w < 32; w = w + 1) begin
for (w = 0; w < 16; w = w + 1) begin
window_coeff[w] = 16'h7FFF;
end
end
end
end
// ==============================================
// Memory Declaration - FIXED SIZE
@@ -81,57 +115,53 @@ localparam MEM_DEPTH = RANGE_BINS * CHIRPS_PER_FRAME;
// ==============================================
// Control Registers
// ==============================================
reg [5:0] write_range_bin; // Changed to match RANGE_BINS width
reg [4:0] write_chirp_index; // Changed to match CHIRPS_PER_FRAME width
reg [5:0] write_range_bin;
reg [4:0] write_chirp_index;
reg [5:0] read_range_bin;
reg [4:0] read_doppler_index; // Changed name for clarity
reg [4:0] read_doppler_index;
reg frame_buffer_full;
reg [9:0] chirps_received; // Enough for up to 1024 chirps
reg [1:0] chirp_state; // Track chirp accumulation state
reg [9:0] chirps_received;
reg [1:0] chirp_state;
// Sub-frame tracking
reg current_sub_frame; // 0=processing long, 1=processing short
// ==============================================
// FFT Interface
// ==============================================
reg fft_start;
wire fft_ready;
reg [DATA_WIDTH-1:0] fft_input_i;
reg [DATA_WIDTH-1:0] fft_input_q;
reg signed [31:0] mult_i, mult_q; // 32-bit to avoid overflow
reg signed [DATA_WIDTH-1:0] window_val_reg; // BREG pipeline stage
reg signed [31:0] mult_i_raw, mult_q_raw; // MREG pipeline stage
reg [DATA_WIDTH-1:0] fft_input_q;
reg signed [31:0] mult_i, mult_q;
reg signed [DATA_WIDTH-1:0] window_val_reg;
reg signed [31:0] mult_i_raw, mult_q_raw;
reg fft_input_valid;
reg fft_input_last;
wire [DATA_WIDTH-1:0] fft_output_i;
wire [DATA_WIDTH-1:0] fft_output_q;
wire fft_output_valid;
wire fft_output_last;
wire fft_output_last;
// ==============================================
// Addressing
// Addressing
// ==============================================
wire [10:0] mem_write_addr;
wire [10:0] mem_read_addr;
// Proper address calculation using parameters
assign mem_write_addr = (write_chirp_index * RANGE_BINS) + write_range_bin;
assign mem_read_addr = (read_doppler_index * RANGE_BINS) + read_range_bin;
// Alternative organization (choose one):
// If you want range-major organization (all chirps for one range bin together):
// assign mem_write_addr = (write_range_bin * CHIRPS_PER_FRAME) + write_chirp_index;
// assign mem_read_addr = (read_range_bin * CHIRPS_PER_FRAME) + read_doppler_index;
// ==============================================
// State Machine
// ==============================================
reg [2:0] state;
localparam S_IDLE = 3'b000;
localparam S_ACCUMULATE = 3'b001;
localparam S_PRE_READ = 3'b101; // Prime BRAM pipeline before FFT load
localparam S_LOAD_FFT = 3'b010;
localparam S_FFT_WAIT = 3'b011;
// ==============================================
// State Machine
// ==============================================
reg [2:0] state;
localparam S_IDLE = 3'b000;
localparam S_ACCUMULATE = 3'b001;
localparam S_PRE_READ = 3'b101;
localparam S_LOAD_FFT = 3'b010;
localparam S_FFT_WAIT = 3'b011;
localparam S_OUTPUT = 3'b100;
// Frame sync detection
@@ -142,361 +172,347 @@ always @(posedge clk or negedge reset_n) begin
end
wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1;
// ==============================================
// Main State Machine - FIXED
// ==============================================
reg [5:0] fft_sample_counter;
reg [9:0] processing_timeout;
// Memory write enable and data signals (extracted for BRAM inference)
reg mem_we;
reg [10:0] mem_waddr_r;
reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q;
// Memory read data (registered for BRAM read latency)
reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q;
`ifdef FORMAL
assign fv_state = state;
assign fv_mem_write_addr = mem_write_addr;
assign fv_mem_read_addr = mem_read_addr;
assign fv_write_range_bin = write_range_bin;
assign fv_write_chirp_index = write_chirp_index;
assign fv_read_range_bin = read_range_bin;
assign fv_read_doppler_index = read_doppler_index;
assign fv_processing_timeout = processing_timeout;
assign fv_frame_buffer_full = frame_buffer_full;
assign fv_mem_we = mem_we;
assign fv_mem_waddr_r = mem_waddr_r;
`endif
// ----------------------------------------------------------
// Separate always block for memory writes NO async reset
// in sensitivity list, so Vivado can infer Block RAM.
// ----------------------------------------------------------
always @(posedge clk) begin
if (mem_we) begin
doppler_i_mem[mem_waddr_r] <= mem_wdata_i;
doppler_q_mem[mem_waddr_r] <= mem_wdata_q;
end
// Registered read address driven by mem_read_addr from FSM
mem_rdata_i <= doppler_i_mem[mem_read_addr];
mem_rdata_q <= doppler_q_mem[mem_read_addr];
end
// ----------------------------------------------------------
// Block 1: FSM / Control async reset (posedge clk or negedge reset_n).
// Only state-machine and control registers live here.
// BRAM-driving and DSP datapath registers are intentionally
// excluded to avoid Vivado REQP-1839 (async-reset on BRAM
// address) and DPOR-1/DPIP-1 (async-reset blocking DSP48
// absorption) DRC warnings.
// ----------------------------------------------------------
always @(posedge clk or negedge reset_n) begin
if (!reset_n) begin
state <= S_IDLE;
write_range_bin <= 0;
write_chirp_index <= 0;
// read_range_bin, read_doppler_index moved to Block 2 (sync reset)
// to enable BRAM address register absorption (REQP-1839 fix)
frame_buffer_full <= 0;
doppler_valid <= 0;
fft_start <= 0;
fft_input_valid <= 0;
fft_input_last <= 0;
fft_sample_counter <= 0;
processing_timeout <= 0;
status <= 0;
chirps_received <= 0;
chirp_state <= 0;
doppler_output <= 0;
doppler_bin <= 0;
range_bin <= 0;
end else begin
doppler_valid <= 0;
fft_input_valid <= 0;
fft_input_last <= 0;
if (processing_timeout > 0) begin
processing_timeout <= processing_timeout - 1;
end
case (state)
S_IDLE: begin
if (frame_start_pulse) begin
// Start new frame
write_chirp_index <= 0;
write_range_bin <= 0;
frame_buffer_full <= 0;
chirps_received <= 0;
end
if (data_valid && !frame_buffer_full) begin
state <= S_ACCUMULATE;
write_range_bin <= 1;
end
end
S_ACCUMULATE: begin
if (data_valid) begin
// Increment range bin
if (write_range_bin < RANGE_BINS - 1) begin
write_range_bin <= write_range_bin + 1;
end else begin
// Completed one chirp
write_range_bin <= 0;
write_chirp_index <= write_chirp_index + 1;
chirps_received <= chirps_received + 1;
// Check if frame is complete
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
frame_buffer_full <= 1;
chirp_state <= 0;
state <= S_PRE_READ;
// read_range_bin/read_doppler_index zeroed in Block 2
fft_sample_counter <= 0;
// Reset write pointers no longer needed for
// this frame, and prevents stale overflow of
// write_chirp_index (which was just incremented
// past CHIRPS_PER_FRAME-1 above).
write_chirp_index <= 0;
write_range_bin <= 0;
end
end
end
end
S_PRE_READ: begin
// Prime the BRAM pipeline: present addr for chirp 0 of
// current read_range_bin. read_doppler_index is already 0.
// mem_read_addr = 0 * RANGE_BINS + read_range_bin.
// After this cycle, mem_rdata_i will hold data[chirp=0][rbin].
// Advance read_doppler_index to 1 so the NEXT BRAM read
// (which happens every cycle in the memory block) will
// fetch chirp 1.
// read_doppler_index <= 1 moved to Block 2
fft_start <= 1;
state <= S_LOAD_FFT;
end
S_LOAD_FFT: begin
fft_start <= 0;
// Pipeline alignment (after S_PRE_READ primed the BRAM
// and pre-registered window_val_reg = window_coeff[0]):
//
// With DSP48 BREG+MREG pipelining, data flows through:
// sub=0: multiply mem_rdata * window_val_reg -> mult_i_raw
// pre-register window_coeff[1] into window_val_reg
// sub=1: MREG capture mult_i_raw -> mult_i (sample 0)
// new multiply for sample 1
// sub=2..DOPPLER_FFT_SIZE+1: steady state
// fft_input = rounding(mult_i), mult_i = mult_i_raw,
// mult_i_raw = new multiply, window_val_reg = next coeff
//
// fft_input_valid asserted at sub=2..DOPPLER_FFT_SIZE+1
// fft_input_last asserted at sub=DOPPLER_FFT_SIZE+1
// read_doppler_index updates moved to Block 2 (sync reset)
if (fft_sample_counter <= 1) begin
// Sub 0..1: pipeline priming no valid FFT data yet
fft_sample_counter <= fft_sample_counter + 1;
end else if (fft_sample_counter <= DOPPLER_FFT_SIZE + 1) begin
// Sub 2..DOPPLER_FFT_SIZE+1: steady state
// (fft_input_i/fft_input_q captured in Block 2)
fft_input_valid <= 1;
if (fft_sample_counter == DOPPLER_FFT_SIZE + 1) begin
// Last sample: flush
fft_input_last <= 1;
state <= S_FFT_WAIT;
fft_sample_counter <= 0;
processing_timeout <= 1000;
end else begin
fft_sample_counter <= fft_sample_counter + 1;
end
end
end
S_FFT_WAIT: begin
if (fft_output_valid) begin
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]};
doppler_bin <= fft_sample_counter;
range_bin <= read_range_bin;
doppler_valid <= 1;
fft_sample_counter <= fft_sample_counter + 1;
if (fft_output_last) begin
state <= S_OUTPUT;
fft_sample_counter <= 0;
end
end
if (processing_timeout == 0) begin
state <= S_OUTPUT;
end
end
S_OUTPUT: begin
if (read_range_bin < RANGE_BINS - 1) begin
// read_range_bin/read_doppler_index updated in Block 2
fft_sample_counter <= 0;
state <= S_PRE_READ;
end else begin
state <= S_IDLE;
frame_buffer_full <= 0;
end
end
endcase
status <= {state, frame_buffer_full};
end
end
// ----------------------------------------------------------
// Block 2: BRAM address/data & DSP datapath synchronous reset only.
// Uses always @(posedge clk) so Vivado can absorb multipliers
// into DSP48 primitives and does not flag REQP-1839/1840 on
// BRAM address registers. Replicates the same state/condition
// structure as Block 1 for the registers:
// mem_we, mem_waddr_r, mem_wdata_i, mem_wdata_q,
// mult_i, mult_q, fft_input_i, fft_input_q,
// read_range_bin, read_doppler_index
// ----------------------------------------------------------
always @(posedge clk) begin
if (!reset_n) begin
mem_we <= 0;
mem_waddr_r <= 0;
mem_wdata_i <= 0;
mem_wdata_q <= 0;
mult_i <= 0;
mult_q <= 0;
mult_i_raw <= 0;
mult_q_raw <= 0;
window_val_reg <= 0;
fft_input_i <= 0;
fft_input_q <= 0;
read_range_bin <= 0;
read_doppler_index <= 0;
end else begin
mem_we <= 0;
case (state)
S_IDLE: begin
if (data_valid && !frame_buffer_full) begin
// Write the first sample immediately (Bug #3 fix:
// previously this transition consumed data_valid
// without writing to BRAM)
mem_we <= 1;
mem_waddr_r <= mem_write_addr;
mem_wdata_i <= range_data[15:0];
mem_wdata_q <= range_data[31:16];
end
end
S_ACCUMULATE: begin
if (data_valid) begin
// Drive memory write signals (actual write in separate block)
mem_we <= 1;
mem_waddr_r <= mem_write_addr;
mem_wdata_i <= range_data[15:0];
mem_wdata_q <= range_data[31:16];
// Transition to S_PRE_READ when frame complete
if (write_range_bin >= RANGE_BINS - 1 &&
write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
read_range_bin <= 0;
read_doppler_index <= 0;
end
end
end
S_PRE_READ: begin
// Advance read_doppler_index to 1 so next BRAM read
// fetches chirp 1
read_doppler_index <= 1;
// BREG priming: pre-register window coeff for sample 0
// so it is ready when S_LOAD_FFT sub=0 performs the multiply
window_val_reg <= $signed(window_coeff[0]);
end
S_LOAD_FFT: begin
if (fft_sample_counter == 0) begin
// Pipe stage 1: multiply using pre-registered BREG value
// mem_rdata_i = data[chirp=0][rbin] (primed by S_PRE_READ)
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
// Pre-register next window coeff (sample 1)
window_val_reg <= $signed(window_coeff[1]);
// Present BRAM addr for chirp 2
read_doppler_index <= (2 < DOPPLER_FFT_SIZE) ? 2
: DOPPLER_FFT_SIZE - 1;
end else if (fft_sample_counter == 1) begin
// Pipe stage 2 (MREG): capture sample 0 multiply result
mult_i <= mult_i_raw;
mult_q <= mult_q_raw;
// Multiply sample 1 using registered window value
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
// Pre-register next window coeff (sample 2)
if (2 < DOPPLER_FFT_SIZE)
window_val_reg <= $signed(window_coeff[2]);
// Advance BRAM read to chirp 3
if (3 < DOPPLER_FFT_SIZE)
read_doppler_index <= 3;
else
read_doppler_index <= DOPPLER_FFT_SIZE - 1;
end else if (fft_sample_counter <= DOPPLER_FFT_SIZE + 1) begin
// Sub 2..DOPPLER_FFT_SIZE+1: steady state
// Capture rounding into fft_input from MREG output
fft_input_i <= (mult_i + (1 << 14)) >>> 15;
fft_input_q <= (mult_q + (1 << 14)) >>> 15;
// MREG: capture multiply result
mult_i <= mult_i_raw;
mult_q <= mult_q_raw;
if (fft_sample_counter <= DOPPLER_FFT_SIZE - 1) begin
// New multiply from current BRAM data
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
// Pre-register next window coeff (clamped)
if (fft_sample_counter + 1 < DOPPLER_FFT_SIZE)
window_val_reg <= $signed(window_coeff[fft_sample_counter + 1]);
// Advance BRAM read
if (fft_sample_counter + 2 < DOPPLER_FFT_SIZE)
read_doppler_index <= fft_sample_counter + 2;
else
read_doppler_index <= DOPPLER_FFT_SIZE - 1;
end
if (fft_sample_counter == DOPPLER_FFT_SIZE + 1) begin
// Flush complete reset read index
read_doppler_index <= 0;
end
end
end
S_OUTPUT: begin
if (read_range_bin < RANGE_BINS - 1) begin
read_range_bin <= read_range_bin + 1;
read_doppler_index <= 0;
end
end
default: begin
// S_IDLE, S_FFT_WAIT:
// no BRAM-write, DSP, or read-address operations needed
end
endcase
end
// ==============================================
// Main State Machine
// ==============================================
reg [4:0] fft_sample_counter; // Reduced: only need 0..17 for 16-pt FFT
reg [9:0] processing_timeout;
// Memory write enable and data signals
reg mem_we;
reg [10:0] mem_waddr_r;
reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q;
// Memory read data
reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q;
`ifdef FORMAL
assign fv_state = state;
assign fv_mem_write_addr = mem_write_addr;
assign fv_mem_read_addr = mem_read_addr;
assign fv_write_range_bin = write_range_bin;
assign fv_write_chirp_index = write_chirp_index;
assign fv_read_range_bin = read_range_bin;
assign fv_read_doppler_index = read_doppler_index;
assign fv_processing_timeout = processing_timeout;
assign fv_frame_buffer_full = frame_buffer_full;
assign fv_mem_we = mem_we;
assign fv_mem_waddr_r = mem_waddr_r;
`endif
// ----------------------------------------------------------
// Separate always block for memory writes NO async reset
// ----------------------------------------------------------
always @(posedge clk) begin
if (mem_we) begin
doppler_i_mem[mem_waddr_r] <= mem_wdata_i;
doppler_q_mem[mem_waddr_r] <= mem_wdata_q;
end
mem_rdata_i <= doppler_i_mem[mem_read_addr];
mem_rdata_q <= doppler_q_mem[mem_read_addr];
end
// ----------------------------------------------------------
// Block 1: FSM / Control async reset
// ----------------------------------------------------------
always @(posedge clk or negedge reset_n) begin
if (!reset_n) begin
state <= S_IDLE;
write_range_bin <= 0;
write_chirp_index <= 0;
frame_buffer_full <= 0;
doppler_valid <= 0;
fft_start <= 0;
fft_input_valid <= 0;
fft_input_last <= 0;
fft_sample_counter <= 0;
processing_timeout <= 0;
status <= 0;
chirps_received <= 0;
chirp_state <= 0;
doppler_output <= 0;
doppler_bin <= 0;
range_bin <= 0;
sub_frame <= 0;
current_sub_frame <= 0;
end else begin
doppler_valid <= 0;
fft_input_valid <= 0;
fft_input_last <= 0;
if (processing_timeout > 0) begin
processing_timeout <= processing_timeout - 1;
end
case (state)
S_IDLE: begin
if (frame_start_pulse) begin
write_chirp_index <= 0;
write_range_bin <= 0;
frame_buffer_full <= 0;
chirps_received <= 0;
end
if (data_valid && !frame_buffer_full) begin
state <= S_ACCUMULATE;
write_range_bin <= 1;
end
end
S_ACCUMULATE: begin
if (data_valid) begin
if (write_range_bin < RANGE_BINS - 1) begin
write_range_bin <= write_range_bin + 1;
end else begin
write_range_bin <= 0;
write_chirp_index <= write_chirp_index + 1;
chirps_received <= chirps_received + 1;
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
frame_buffer_full <= 1;
chirp_state <= 0;
state <= S_PRE_READ;
fft_sample_counter <= 0;
write_chirp_index <= 0;
write_range_bin <= 0;
// Start with sub-frame 0 (long PRI chirps 0..15)
current_sub_frame <= 0;
end
end
end
end
S_PRE_READ: begin
// Prime BRAM pipeline for current sub-frame
// read_doppler_index already set in Block 2 to sub-frame base
fft_start <= 1;
state <= S_LOAD_FFT;
end
S_LOAD_FFT: begin
fft_start <= 0;
// Pipeline: 2 priming cycles + CHIRPS_PER_SUBFRAME data cycles
if (fft_sample_counter <= 1) begin
fft_sample_counter <= fft_sample_counter + 1;
end else if (fft_sample_counter <= CHIRPS_PER_SUBFRAME + 1) begin
fft_input_valid <= 1;
if (fft_sample_counter == CHIRPS_PER_SUBFRAME + 1) begin
fft_input_last <= 1;
state <= S_FFT_WAIT;
fft_sample_counter <= 0;
processing_timeout <= 1000;
end else begin
fft_sample_counter <= fft_sample_counter + 1;
end
end
end
S_FFT_WAIT: begin
if (fft_output_valid) begin
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]};
// Pack: {sub_frame, bin[3:0]}
doppler_bin <= {current_sub_frame, fft_sample_counter[3:0]};
range_bin <= read_range_bin;
sub_frame <= current_sub_frame;
doppler_valid <= 1;
fft_sample_counter <= fft_sample_counter + 1;
if (fft_output_last) begin
state <= S_OUTPUT;
fft_sample_counter <= 0;
end
end
if (processing_timeout == 0) begin
state <= S_OUTPUT;
end
end
S_OUTPUT: begin
if (current_sub_frame == 0) begin
// Just finished long PRI sub-frame now do short PRI
current_sub_frame <= 1;
fft_sample_counter <= 0;
state <= S_PRE_READ;
// read_range_bin stays the same, read_doppler_index
// will be set to CHIRPS_PER_SUBFRAME in Block 2
end else begin
// Finished both sub-frames for this range bin
current_sub_frame <= 0;
if (read_range_bin < RANGE_BINS - 1) begin
fft_sample_counter <= 0;
state <= S_PRE_READ;
// read_range_bin incremented in Block 2
end else begin
state <= S_IDLE;
frame_buffer_full <= 0;
end
end
end
endcase
status <= {state, frame_buffer_full};
end
end
// ----------------------------------------------------------
// Block 2: BRAM address/data & DSP datapath synchronous reset
// ----------------------------------------------------------
always @(posedge clk) begin
if (!reset_n) begin
mem_we <= 0;
mem_waddr_r <= 0;
mem_wdata_i <= 0;
mem_wdata_q <= 0;
mult_i <= 0;
mult_q <= 0;
mult_i_raw <= 0;
mult_q_raw <= 0;
window_val_reg <= 0;
fft_input_i <= 0;
fft_input_q <= 0;
read_range_bin <= 0;
read_doppler_index <= 0;
end else begin
mem_we <= 0;
case (state)
S_IDLE: begin
if (data_valid && !frame_buffer_full) begin
mem_we <= 1;
mem_waddr_r <= mem_write_addr;
mem_wdata_i <= range_data[15:0];
mem_wdata_q <= range_data[31:16];
end
end
S_ACCUMULATE: begin
if (data_valid) begin
mem_we <= 1;
mem_waddr_r <= mem_write_addr;
mem_wdata_i <= range_data[15:0];
mem_wdata_q <= range_data[31:16];
if (write_range_bin >= RANGE_BINS - 1 &&
write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
read_range_bin <= 0;
// Start reading from chirp 0 (long PRI sub-frame)
read_doppler_index <= 0;
end
end
end
S_PRE_READ: begin
// Set read_doppler_index to first chirp of current sub-frame + 1
// (because address is presented this cycle, data arrives next)
if (current_sub_frame == 0)
read_doppler_index <= 1; // Long PRI: chirps 0..15
else
read_doppler_index <= CHIRPS_PER_SUBFRAME + 1; // Short PRI: chirps 16..31
// BREG priming: window coeff for sample 0
window_val_reg <= $signed(window_coeff[0]);
end
S_LOAD_FFT: begin
if (fft_sample_counter == 0) begin
// Pipe stage 1: multiply using pre-registered BREG value
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
window_val_reg <= $signed(window_coeff[1]);
// Advance to chirp base+2
if (current_sub_frame == 0)
read_doppler_index <= (2 < CHIRPS_PER_SUBFRAME) ? 2
: CHIRPS_PER_SUBFRAME - 1;
else
read_doppler_index <= (CHIRPS_PER_SUBFRAME + 2 < CHIRPS_PER_FRAME)
? CHIRPS_PER_SUBFRAME + 2
: CHIRPS_PER_FRAME - 1;
end else if (fft_sample_counter == 1) begin
mult_i <= mult_i_raw;
mult_q <= mult_q_raw;
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
if (2 < CHIRPS_PER_SUBFRAME)
window_val_reg <= $signed(window_coeff[2]);
// Advance to chirp base+3
begin : advance_chirp3
reg [4:0] next_chirp;
next_chirp = (current_sub_frame == 0) ? 3 : CHIRPS_PER_SUBFRAME + 3;
if (next_chirp < CHIRPS_PER_FRAME)
read_doppler_index <= next_chirp;
else
read_doppler_index <= CHIRPS_PER_FRAME - 1;
end
end else if (fft_sample_counter <= CHIRPS_PER_SUBFRAME + 1) begin
// Steady state
fft_input_i <= (mult_i + (1 << 14)) >>> 15;
fft_input_q <= (mult_q + (1 << 14)) >>> 15;
mult_i <= mult_i_raw;
mult_q <= mult_q_raw;
if (fft_sample_counter <= CHIRPS_PER_SUBFRAME - 1) begin
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
// Window coeff index within sub-frame
begin : advance_window
reg [4:0] win_idx;
win_idx = fft_sample_counter[3:0] + 1;
if (win_idx < CHIRPS_PER_SUBFRAME)
window_val_reg <= $signed(window_coeff[win_idx]);
end
// Advance BRAM read
begin : advance_bram
reg [4:0] chirp_offset;
reg [4:0] chirp_base;
chirp_offset = fft_sample_counter[3:0] + 2;
chirp_base = (current_sub_frame == 0) ? 0 : CHIRPS_PER_SUBFRAME;
if (chirp_base + chirp_offset < CHIRPS_PER_FRAME)
read_doppler_index <= chirp_base + chirp_offset;
else
read_doppler_index <= CHIRPS_PER_FRAME - 1;
end
end
if (fft_sample_counter == CHIRPS_PER_SUBFRAME + 1) begin
// Reset read index for potential next operation
if (current_sub_frame == 0)
read_doppler_index <= CHIRPS_PER_SUBFRAME; // Ready for short sub-frame
else
read_doppler_index <= 0;
end
end
end
S_OUTPUT: begin
if (current_sub_frame == 0) begin
// Transitioning to short PRI sub-frame
// Set read_doppler_index to start of short sub-frame
read_doppler_index <= CHIRPS_PER_SUBFRAME;
end else begin
// Both sub-frames done
if (read_range_bin < RANGE_BINS - 1) begin
read_range_bin <= read_range_bin + 1;
read_doppler_index <= 0; // Next range bin starts with long sub-frame
end
end
end
default: begin
// S_FFT_WAIT: no BRAM-write or address operations needed
end
endcase
end
end
// ==============================================
// FFT Module
// FFT Module 16-point
// ==============================================
xfft_32 fft_inst (
xfft_16 fft_inst (
.aclk(clk),
.aresetn(reset_n),
.s_axis_config_tdata(8'h01),
@@ -517,5 +533,4 @@ xfft_32 fft_inst (
assign processing_active = (state != S_IDLE);
assign frame_complete = (state == S_IDLE && frame_buffer_full == 0);
endmodule
endmodule
+8
View File
@@ -0,0 +1,8 @@
// Quarter-wave cosine ROM for 16-point FFT
// 4 entries (N/4), 16-bit signed Q15 format
// cos(2*pi*k/16) for k = 0..3
// Used by fft_engine with N=16, LOG2N=4
7FFF
7641
5A82
30FB
@@ -8,8 +8,8 @@
// Single-clock design: clk is an input wire, async2sync handles async reset.
// Each formal step = one clock edge.
//
// Parameters reduced: RANGE_BINS=4, CHIRPS_PER_FRAME=4, DOPPLER_FFT_SIZE=4.
// Includes full xfft_32 and fft_engine sub-modules.
// Parameters reduced: RANGE_BINS=4, CHIRPS_PER_FRAME=4, CHIRPS_PER_SUBFRAME=2, DOPPLER_FFT_SIZE=2.
// Includes full xfft_16 and fft_engine sub-modules.
//
// Focus: memory address bounds (highest-value finding) and state encoding.
// ============================================================================
@@ -20,7 +20,8 @@ module fv_doppler_processor (
// Reduced parameters for tractable BMC
localparam RANGE_BINS = 4;
localparam CHIRPS_PER_FRAME = 4;
localparam DOPPLER_FFT_SIZE = 4;
localparam CHIRPS_PER_SUBFRAME = 2; // Dual sub-frame: 2 chirps per sub-frame
localparam DOPPLER_FFT_SIZE = 2; // FFT size matches sub-frame size
localparam MEM_DEPTH = RANGE_BINS * CHIRPS_PER_FRAME; // 16
// State encoding (mirrors DUT localparams)
@@ -62,6 +63,7 @@ module fv_doppler_processor (
wire doppler_valid;
wire [4:0] doppler_bin;
wire [5:0] range_bin;
wire sub_frame;
wire processing_active;
wire frame_complete;
wire [3:0] status;
@@ -86,6 +88,7 @@ module fv_doppler_processor (
.DOPPLER_FFT_SIZE (DOPPLER_FFT_SIZE),
.RANGE_BINS (RANGE_BINS),
.CHIRPS_PER_FRAME (CHIRPS_PER_FRAME),
.CHIRPS_PER_SUBFRAME (CHIRPS_PER_SUBFRAME),
.WINDOW_TYPE (1), // Rectangular — simpler for formal
.DATA_WIDTH (16)
) dut (
@@ -98,6 +101,7 @@ module fv_doppler_processor (
.doppler_valid (doppler_valid),
.doppler_bin (doppler_bin),
.range_bin (range_bin),
.sub_frame (sub_frame),
.processing_active(processing_active),
.frame_complete (frame_complete),
.status (status),
@@ -36,6 +36,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
DOPPLER_FFT = 32
RANGE_BINS = 64
TOTAL_OUTPUTS = RANGE_BINS * DOPPLER_FFT # 2048
SUBFRAME_SIZE = 16
SCENARIOS = {
'stationary': {
@@ -125,6 +126,19 @@ def find_peak_bin(i_arr, q_arr):
return max(range(len(mags)), key=lambda k: mags[k])
def peak_bins_match(py_peak, rtl_peak):
"""Return True if peaks match within +/-1 bin inside the same sub-frame."""
py_sf = py_peak // SUBFRAME_SIZE
rtl_sf = rtl_peak // SUBFRAME_SIZE
if py_sf != rtl_sf:
return False
py_bin = py_peak % SUBFRAME_SIZE
rtl_bin = rtl_peak % SUBFRAME_SIZE
diff = abs(py_bin - rtl_bin)
return diff <= 1 or diff >= SUBFRAME_SIZE - 1
def total_energy(data_dict):
"""Sum of I^2 + Q^2 across all range bins and Doppler bins."""
total = 0
@@ -207,8 +221,8 @@ def compare_scenario(name, config, base_dir):
py_peak = find_peak_bin(py_i, py_q)
rtl_peak = find_peak_bin(rtl_i, rtl_q)
# Peak agreement (allow +/- 1 bin tolerance)
if abs(py_peak - rtl_peak) <= 1 or abs(py_peak - rtl_peak) >= DOPPLER_FFT - 1:
# Peak agreement (allow +/-1 bin tolerance, but only within a sub-frame)
if peak_bins_match(py_peak, rtl_peak):
peak_agreements += 1
py_mag = magnitude_l1(py_i, py_q)
@@ -242,7 +256,7 @@ def compare_scenario(name, config, base_dir):
avg_corr_q = sum(q_correlations) / len(q_correlations)
print(f"\n Per-range-bin metrics:")
print(f" Peak Doppler bin agreement (+/-1): {peak_agreements}/{RANGE_BINS} "
print(f" Peak Doppler bin agreement (+/-1 within sub-frame): {peak_agreements}/{RANGE_BINS} "
f"({peak_agreement_frac:.0%})")
print(f" Avg magnitude correlation: {avg_mag_corr:.4f}")
print(f" Avg I-channel correlation: {avg_corr_i:.4f}")
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1106,8 +1106,8 @@ FFFF0000
00000000
00000000
00000000
FFFF0001
FFFF0000
00000001
00000000
FFFF0005
00000001
00000001
@@ -1172,7 +1172,7 @@ FFFF0000
00010000
00010000
00010000
00060003
00060002
00010001
00000001
00000000
@@ -1236,7 +1236,7 @@ FFFF0000
00000000
0001FFFF
0002FFFF
0006FFFD
0005FFFC
00010000
0001FFFF
00000001
@@ -1300,7 +1300,7 @@ FFFF0000
00000000
00000000
FFFFFFFF
FFFFFFFA
FFFEFFFA
0000FFFF
0000FFFF
00010001
@@ -1364,9 +1364,9 @@ FFFF0000
00000000
00000000
FFFF0000
FFFAFFFD
FFFAFFFF
FFFFFFFF
00000000
00000001
00000001
FFFF0000
00000000
@@ -1427,74 +1427,74 @@ FFFF0000
FFFF0000
00000000
FFFF0000
00000001
FFFB0005
FFFE0001
00000000
00010000
00000000
00000000
00000001
00000000
0000FFFF
00010001
00000000
00000000
00000000
00000000
00000000
00000001
00000001
00000000
00010001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
FFFFFFFF
FFFFFFFF
0000FFFF
00000000
00000000
00000001
00000000
00000000
FFFF0000
FFFF0000
00000001
00010000
00000000
FFFF0000
00010000
00000001
FFFF0000
FFFF0000
00010001
FFFF0000
FFFFFFFF
00000000
00010000
FFFF0000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00010001
00000000
00000000
FFFF0000
00000000
00010001
00000001
00010006
00000002
FFFD0006
FFFE0001
00000001
00010000
00000000
00000000
00000001
00000000
0000FFFF
00010001
00000000
00000000
00000000
00000000
00000000
00000001
00000001
00000000
00010001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
FFFFFFFF
FFFFFFFF
0000FFFF
00000000
00000000
00000001
00000000
00000000
FFFF0000
FFFF0000
00000001
00010000
00000000
FFFF0000
00010000
00000001
FFFF0000
FFFF0000
00010001
FFFF0000
FFFFFFFF
00000000
00010000
FFFF0000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00010001
00000000
00000000
FFFF0000
00000000
00010000
00010001
00030005
00010001
00010001
00000000
00000000
FFFF0000
@@ -1556,8 +1556,8 @@ FFFFFFFF
00000000
00010000
00020000
00060001
00010000
0006FFFE
0001FFFF
00010000
FFFF0000
00000001
@@ -1619,9 +1619,9 @@ FFFFFFFE
00000001
0000FFFF
00010000
0001FFFF
0004FFFB
0002FFFF
0001FFFE
0001FFFA
0002FFFE
00010000
FFFF0000
FFFF0000
@@ -1682,9 +1682,9 @@ FFFF0000
00000000
00000001
00000001
00000000
FFFF0000
FFFEFFFA
FFFF0000
FFFBFFFC
FFFFFFFF
FFFF0000
0000FFFF
@@ -1747,9 +1747,9 @@ FFFFFFFF
00000000
0000FFFF
FFFF0001
FFFF0000
FFFA0000
FFFE0000
FFFF0001
FFFA0003
FFFF0001
FFFF0000
00000000
00000001
@@ -1811,74 +1811,74 @@ FFFF0001
00010000
0000FFFF
00000000
FFFF0002
FFFD0005
FFFF0001
00000001
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000001
00000000
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
0000FFFF
00010000
FFFF0000
0001FFFF
0000FFFF
0001FFFF
00000000
0000FFFF
00000001
00010002
00030005
00000002
00000006
FFFF0002
00010001
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000001
00000000
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
0000FFFF
00010000
FFFF0000
0001FFFF
0000FFFF
0001FFFF
00000000
0000FFFF
00010000
00020001
00060002
00000001
00010000
0001FFFF
00000000
00000000
@@ -1939,9 +1939,9 @@ FFFF0000
00000000
0000FFFF
0001FFFF
0001FFFF
00070000
00000000
0000FFFE
0005FFFC
0000FFFF
00010001
FFFF0000
0000FFFF
@@ -2003,9 +2003,9 @@ FFFF0000
00000001
00000000
0000FFFF
0001FFFF
0002FFF9
0000FFFF
FFFDFFF9
FFFFFFFF
FFFFFFFF
00000000
00000000
@@ -1099,7 +1099,7 @@ FFFF0000
00000000
00000002
FFFF0003
FFFE0012
FFFF0012
00000003
FFFF0002
00010001
@@ -1163,7 +1163,7 @@ FFFF0000
00010001
00010002
00020003
000C000D
000D000C
00030003
00000001
00000001
@@ -1226,9 +1226,9 @@ FFFF0000
00000000
FFFF0000
00020000
00030000
00110004
00030000
0003FFFF
00120002
0003FFFF
00020000
00000000
FFFF0000
@@ -1291,8 +1291,8 @@ FFFF0000
00010000
0002FFFF
0003FFFE
000FFFF6
0004FFFF
000EFFF4
0003FFFE
0002FFFF
00000000
FFFF0000
@@ -1312,8 +1312,8 @@ FFFF0000
00010000
00000001
0000FFFF
00000000
00010000
00010001
FFFF0000
00000001
0000FFFF
@@ -1353,10 +1353,10 @@ FFFF0000
00010001
0001FFFF
00010000
0001FFFE
0001FFFD
0006FFF0
0001FFFD
0000FFFE
0000FFFD
0003FFEF
0000FFFD
0000FFFE
00000000
00010000
@@ -1376,7 +1376,7 @@ FFFF0000
0000FFFF
00010000
00000001
00010001
00010002
00000000
00000001
00000000
@@ -1418,10 +1418,10 @@ FFFF0000
0000FFFF
FFFF0000
FFFFFFFE
FFFEFFFD
FFF9FFF1
FFFEFFFD
FFFFFFFF
FFFDFFFD
FFF5FFF2
FFFEFFFE
FFFE0000
FFFF0000
00000001
FFFF0000
@@ -1439,8 +1439,8 @@ FFFF0000
0000FFFF
00010001
FFFF0000
FFFF0001
FFFF0001
FFFF0000
FFFF0000
00000000
00000000
00000001
@@ -1482,10 +1482,10 @@ FFFF0000
00000000
00000000
FFFF0000
FFFCFFFF
FFEFFFF9
FFFCFFFF
FFFF0000
FFFC0000
FFEEFFFE
FFFC0000
FFFF0001
00000000
00000000
FFFF0000
@@ -1504,7 +1504,7 @@ FFFF0000
00000000
00000000
00000000
FFFFFFFF
0000FFFF
FFFF0001
00000000
00010000
@@ -1546,10 +1546,10 @@ FFFFFFFF
00000000
FFFFFFFF
FFFE0001
FFFD0001
FFEF0006
FFFD0001
FFFF0000
FFFD0002
FFF1000B
FFFD0002
FFFF0001
00000000
FFFFFFFF
00010000
@@ -1609,77 +1609,77 @@ FFFF0001
00000000
00000001
00000000
FFFF0002
FFFE0003
FFF7000E
FFFF0005
FFFF0001
0001FFFF
00000000
00000001
0000FFFF
00000000
00000000
FFFF0000
00010000
00010000
FFFF0000
FFFF0000
0000FFFF
00000000
00000000
00010000
00000000
00000000
00010000
00020001
00000000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000001
00000001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
0000FFFF
00010000
FFFF0000
0001FFFF
00010001
00000000
FFFF0001
00010000
0000FFFF
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
00000001
00000000
FFFF0000
FFFF0000
00000000
0000FFFF
00000001
00000002
00000003
00050012
00010003
FFFF0004
FFFC0010
00000005
00000001
0001FFFF
00000000
00000001
0000FFFF
00000000
00000000
FFFF0000
00010000
00010000
FFFF0000
FFFF0000
0000FFFF
00000000
00000000
00010000
00000000
00000000
00010000
00010002
00000000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000001
00000001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
0000FFFF
00010000
FFFF0000
0001FFFF
00010001
00000000
FFFF0001
00010000
0000FFFF
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
00000001
00000000
FFFF0000
FFFF0000
00000000
0000FFFF
00000001
00000002
00010003
000B000F
00020003
00020002
00000000
00000000
00000001
00000001
00000001
@@ -1696,9 +1696,9 @@ FFFFFFFF
00000000
0000FFFF
00000000
00000002
00010000
00000000
FFFF0001
0000FFFF
FFFF0000
00000000
00000000
00000000
@@ -1737,160 +1737,160 @@ FFFFFFFF
00000000
00000000
00000001
00020001
00030000
00110004
00040000
00020000
00000000
00000000
00000000
0000FFFF
00000001
00000000
00000001
00000000
00000000
00000000
00000001
FFFFFFFF
0000FFFF
FFFF0000
00000000
FFFF0000
00000001
00000000
0000FFFF
FFFFFFFF
00000000
00000000
FFFF0000
FFFF0000
0000FFFF
00010000
00000001
00010000
00010001
00000000
0000FFFF
00000001
00000000
FFFF0001
00010001
00000000
00000000
00000000
00000000
FFFFFFFF
FFFF0000
00000000
00010001
00010000
FFFFFFFF
00000000
00000001
00000000
00000000
00000000
00000000
00000000
00010000
00000000
FFFF0000
0000FFFF
0000FFFF
00000000
00000000
0001FFFF
0004FFFE
000FFFF7
0004FFFE
00010000
FFFF0001
0000FFFF
00010000
0000FFFF
00000000
FFFF0001
00000000
FFFF0000
00010000
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000000
00010000
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
0000FFFE
0001FFFB
0005FFEF
0000FFFC
0001FFFE
0000FFFF
0001FFFF
00000000
0000FFFF
00000000
00010001
00000000
FFFF0001
00000000
0001FFFF
00000000
00000000
00010000
FFFF0000
00000000
0001FFFF
00000000
00000001
00020002
00030001
000E000A
00040001
00020001
00000000
00000000
00000000
0000FFFF
00000001
00000000
00000001
00000000
00000000
00000000
00000001
FFFFFFFF
0000FFFF
FFFF0000
00000000
FFFF0000
00000001
00000000
FFFFFFFF
FFFFFFFF
00000000
00000000
FFFF0000
FFFF0000
0000FFFF
00010000
00000001
00010000
00010001
00000000
0000FFFF
00000001
00000000
FFFF0001
00010001
00000000
00000000
00000000
00000000
FFFFFFFF
FFFF0000
00000000
00010001
00010000
FFFFFFFF
00000000
00000001
00000000
00000000
00000000
00000000
00000000
00010000
00000000
FFFF0000
0000FFFF
0000FFFF
00000000
00000000
00020000
00050000
0012FFFE
00040000
00020000
FFFF0001
0000FFFF
00010000
0000FFFF
00000000
FFFF0001
00000000
FFFF0000
00010000
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000000
0000FFFF
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
0000FFFE
0003FFFC
000CFFF3
0001FFFD
0002FFFE
0000FFFF
0001FFFF
00000000
0000FFFF
00000000
00010001
00000000
FFFF0001
00000000
0001FFFF
00000000
00000000
00010000
FFFF0000
00000000
0001FFFF
00010000
00000000
00030001
00000000
0001FFFF
00000000
00000000
0000FFFF
@@ -1929,78 +1929,78 @@ FFFF0000
FFFF0000
00000000
00000000
0000FFFE
FFFFFFFD
FFFFFFEE
FFFFFFFC
FFFFFFFE
00000000
FFFF0000
00000000
0000FFFF
0000FFFF
FFFFFFFF
00000000
FFFF0000
00000001
FFFF0000
0000FFFF
00000000
00000000
00000000
00010000
FFFF0000
00000000
00000000
00010001
00000000
00000000
0000FFFF
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
00000000
00000000
00000000
00010000
00000000
00000001
00000000
FFFF0000
00000000
00000001
00010000
00000000
00000001
00010000
00000000
FFFF0000
00000001
00000000
00000000
00000000
00000000
00000000
00000001
00010000
00000000
00000000
0001FFFF
0000FFFF
00010000
FFFF0000
FFFFFFFF
FFFEFFFE
FFF3FFF3
FFFEFFFD
FFF7FFF1
FFFEFFFD
FFFEFFFE
00000000
FFFF0000
00000000
0000FFFF
0000FFFF
FFFFFFFF
00000000
FFFF0000
00000001
FFFF0000
0000FFFF
00000000
00000000
00000000
00010000
FFFF0000
00000000
00000000
FFFF0000
00000000
00000000
0000FFFF
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
00000000
00000000
00000000
00010000
00000000
00000001
00000000
FFFF0000
00000000
00000001
00010000
00000000
00000001
00010000
00000000
FFFF0000
00000001
00000000
00000000
00000000
00000000
00000000
00000001
00010000
00000000
00000000
0001FFFF
0000FFFF
00010000
FFFF0000
FFFF0000
FFFEFFFF
FFEEFFFB
FFFDFFFE
FFFEFFFF
00000000
FFFF0000
00000001
00000000
00000000
00000001
@@ -2016,7 +2016,7 @@ FFFF0001
00010000
00000000
0001FFFF
FFFE0000
FFFFFFFF
00000001
00000000
00010000
+66 -50
View File
@@ -1075,44 +1075,43 @@ class RangeBinDecimator:
# =============================================================================
# Doppler Processor (Hamming window + 32-point FFT)
# Doppler Processor (Hamming window + dual 16-point FFT)
# =============================================================================
# Hamming window LUT (32 entries, 16-bit unsigned Q15)
# Hamming window LUT (16 entries, 16-bit unsigned Q15)
# Matches doppler_processor.v window_coeff[0:15]
# w[n] = 0.54 - 0.46 * cos(2*pi*n/15), n=0..15, symmetric
HAMMING_WINDOW = [
0x0800, 0x0862, 0x09CB, 0x0C3B, 0x0FB2, 0x142F, 0x19B2, 0x2039,
0x27C4, 0x3050, 0x39DB, 0x4462, 0x4FE3, 0x5C5A, 0x69C4, 0x781D,
0x7FFF, 0x781D, 0x69C4, 0x5C5A, 0x4FE3, 0x4462, 0x39DB, 0x3050,
0x27C4, 0x2039, 0x19B2, 0x142F, 0x0FB2, 0x0C3B, 0x09CB, 0x0862,
0x0A3D, 0x0E5C, 0x1B6D, 0x3088, 0x4B33, 0x6573, 0x7642, 0x7F62,
0x7F62, 0x7642, 0x6573, 0x4B33, 0x3088, 0x1B6D, 0x0E5C, 0x0A3D,
]
class DopplerProcessor:
"""
Bit-accurate model of doppler_processor_optimized.v
Bit-accurate model of doppler_processor_optimized.v (dual 16-pt FFT architecture).
For each range bin (0-63):
1. Read 32 chirps of data from accumulation buffer
2. Apply Hamming window (Q15 multiply, round, >>>15)
3. 32-point FFT
The staggered-PRF frame has 32 chirps total:
- Sub-frame 0 (long PRI): chirps 0-15 -> 16-pt Hamming -> 16-pt FFT -> bins 0-15
- Sub-frame 1 (short PRI): chirps 16-31 -> 16-pt Hamming -> 16-pt FFT -> bins 16-31
The 32-point FFT uses xfft_32.v (Xilinx IP wrapper around fft_engine).
For the Python model, we use FFTEngine with N=32.
Output: doppler_bin[4:0] = {sub_frame_id, bin_in_subframe[3:0]}
Total output per range bin: 32 bins (16 + 16), same interface as before.
"""
DOPPLER_FFT_SIZE = 32
DOPPLER_FFT_SIZE = 16 # Per sub-frame
RANGE_BINS = 64
CHIRPS_PER_FRAME = 32
CHIRPS_PER_SUBFRAME = 16
def __init__(self, twiddle_file_32=None):
def __init__(self, twiddle_file_16=None):
"""
For 32-point FFT, we need the 32-point twiddle file.
For 16-point FFT, we need the 16-point twiddle file.
If not provided, we generate twiddle factors mathematically
(since the 32-pt twiddle ROM is cos(2*pi*k/32) for k=0..7).
(cos(2*pi*k/16) for k=0..3, quarter-wave ROM with 4 entries).
"""
self.fft32 = None
self._twiddle_file_32 = twiddle_file_32
# We'll use a simple 32-pt FFT with computed twiddles
self.fft16 = None
self._twiddle_file_16 = twiddle_file_16
@staticmethod
def window_multiply(data_16, window_16):
@@ -1134,7 +1133,7 @@ class DopplerProcessor:
def process_frame(self, chirp_data_i, chirp_data_q):
"""
Process one complete Doppler frame.
Process one complete Doppler frame using dual 16-pt FFTs.
Args:
chirp_data_i: 2D array [32 chirps][64 range bins] of signed 16-bit I
@@ -1143,46 +1142,63 @@ class DopplerProcessor:
Returns:
(doppler_map_i, doppler_map_q): 2D arrays [64 range bins][32 doppler bins]
of signed 16-bit
Bins 0-15 = sub-frame 0 (long PRI)
Bins 16-31 = sub-frame 1 (short PRI)
"""
doppler_map_i = []
doppler_map_q = []
# Generate 32-pt twiddle factors (quarter-wave cos, 8 entries)
# cos(2*pi*k/32) for k=0..7
# Generate 16-pt twiddle factors (quarter-wave cos, 4 entries)
# cos(2*pi*k/16) for k=0..3
# Matches fft_twiddle_16.mem: 7FFF, 7641, 5A82, 30FB
import math
cos_rom_32 = []
for k in range(8):
val = round(32767.0 * math.cos(2.0 * math.pi * k / 32.0))
cos_rom_32.append(sign_extend(val & 0xFFFF, 16))
cos_rom_16 = []
for k in range(4):
val = round(32767.0 * math.cos(2.0 * math.pi * k / 16.0))
cos_rom_16.append(sign_extend(val & 0xFFFF, 16))
fft32 = FFTEngine.__new__(FFTEngine)
fft32.N = 32
fft32.LOG2N = 5
fft32.cos_rom = cos_rom_32
fft32.mem_re = [0] * 32
fft32.mem_im = [0] * 32
fft16 = FFTEngine.__new__(FFTEngine)
fft16.N = 16
fft16.LOG2N = 4
fft16.cos_rom = cos_rom_16
fft16.mem_re = [0] * 16
fft16.mem_im = [0] * 16
for rbin in range(self.RANGE_BINS):
# Gather 32 chirps for this range bin
fft_in_re = []
fft_in_im = []
# Output bins for this range bin: 32 total (16 from each sub-frame)
out_re = [0] * 32
out_im = [0] * 32
for chirp in range(self.CHIRPS_PER_FRAME):
re_val = sign_extend(chirp_data_i[chirp][rbin] & 0xFFFF, 16)
im_val = sign_extend(chirp_data_q[chirp][rbin] & 0xFFFF, 16)
# Process each sub-frame independently
for sf in range(2):
chirp_start = sf * self.CHIRPS_PER_SUBFRAME
bin_offset = sf * self.DOPPLER_FFT_SIZE
# Apply Hamming window
win_re = self.window_multiply(re_val, HAMMING_WINDOW[chirp])
win_im = self.window_multiply(im_val, HAMMING_WINDOW[chirp])
fft_in_re = []
fft_in_im = []
fft_in_re.append(win_re)
fft_in_im.append(win_im)
for c in range(self.CHIRPS_PER_SUBFRAME):
chirp = chirp_start + c
re_val = sign_extend(chirp_data_i[chirp][rbin] & 0xFFFF, 16)
im_val = sign_extend(chirp_data_q[chirp][rbin] & 0xFFFF, 16)
# 32-point forward FFT
fft_out_re, fft_out_im = fft32.compute(fft_in_re, fft_in_im, inverse=False)
# Apply 16-pt Hamming window (index = c within sub-frame)
win_re = self.window_multiply(re_val, HAMMING_WINDOW[c])
win_im = self.window_multiply(im_val, HAMMING_WINDOW[c])
doppler_map_i.append(fft_out_re)
doppler_map_q.append(fft_out_im)
fft_in_re.append(win_re)
fft_in_im.append(win_im)
# 16-point forward FFT
fft_out_re, fft_out_im = fft16.compute(fft_in_re, fft_in_im, inverse=False)
# Pack into output: sub-frame 0 -> bins 0-15, sub-frame 1 -> bins 16-31
for b in range(self.DOPPLER_FFT_SIZE):
out_re[bin_offset + b] = fft_out_re[b]
out_im[bin_offset + b] = fft_out_im[b]
doppler_map_i.append(out_re)
doppler_map_q.append(out_im)
return doppler_map_i, doppler_map_q
@@ -1207,7 +1223,7 @@ class SignalChain:
IF_FREQ = 120_000_000 # IF frequency
FTW_120MHZ = 0x4CCCCCCD # Phase increment for 120 MHz at 400 MSPS
def __init__(self, twiddle_file_1024=None, twiddle_file_32=None):
def __init__(self, twiddle_file_1024=None, twiddle_file_16=None):
self.nco = NCO()
self.mixer = Mixer()
self.cic_i = CICDecimator()
@@ -1217,7 +1233,7 @@ class SignalChain:
self.ddc_interface = DDCInputInterface()
self.matched_filter = MatchedFilterChain(fft_size=1024, twiddle_file=twiddle_file_1024)
self.range_decimator = RangeBinDecimator()
self.doppler = DopplerProcessor(twiddle_file_32=twiddle_file_32)
self.doppler = DopplerProcessor(twiddle_file_16=twiddle_file_16)
def ddc_step(self, adc_data_8bit, ftw=None):
"""
@@ -3,23 +3,17 @@
Generate Doppler processor co-simulation golden reference data.
Uses the bit-accurate Python model (fpga_model.py) to compute the expected
Doppler FFT output. Also generates the input hex files consumed by the
Verilog testbench (tb_doppler_cosim.v).
Doppler FFT output for the dual 16-pt FFT architecture. Also generates the
input hex files consumed by the Verilog testbench (tb_doppler_cosim.v).
Two output modes:
1. "clean" — straight Python model (correct windowing alignment)
2. "buggy" — replicates the RTL's windowing pipeline misalignment:
* Sample 0: fft_input = 0 (from reset mult value)
* Sample 1: fft_input = window_multiply(data[wrong_rbin_or_0], window[0])
* Sample k (k>=2): fft_input = window_multiply(data[k-2], window[k-1])
Default mode is "clean". The comparison script uses correlation-based
metrics that are tolerant of the pipeline shift.
Architecture:
Sub-frame 0 (long PRI): chirps 0-15 -> 16-pt Hamming -> 16-pt FFT -> bins 0-15
Sub-frame 1 (short PRI): chirps 16-31 -> 16-pt Hamming -> 16-pt FFT -> bins 16-31
Usage:
cd ~/PLFM_RADAR/9_Firmware/9_2_FPGA/tb/cosim
python3 gen_doppler_golden.py # clean model
python3 gen_doppler_golden.py --buggy # replicate RTL pipeline bug
python3 gen_doppler_golden.py
python3 gen_doppler_golden.py stationary # single scenario
Author: Phase 0.5 Doppler co-simulation suite for PLFM_RADAR
"""
@@ -31,7 +25,7 @@ import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from fpga_model import (
DopplerProcessor, FFTEngine, sign_extend, HAMMING_WINDOW
DopplerProcessor, sign_extend, HAMMING_WINDOW
)
from radar_scene import Target, generate_doppler_frame
@@ -40,7 +34,8 @@ from radar_scene import Target, generate_doppler_frame
# Constants
# =============================================================================
DOPPLER_FFT_SIZE = 32
DOPPLER_FFT_SIZE = 16 # Per sub-frame
DOPPLER_TOTAL_BINS = 32 # Total output (2 sub-frames x 16)
RANGE_BINS = 64
CHIRPS_PER_FRAME = 32
TOTAL_SAMPLES = CHIRPS_PER_FRAME * RANGE_BINS # 2048
@@ -82,154 +77,6 @@ def write_hex_16bit(filepath, data):
# Buggy-model helpers (match RTL pipeline misalignment)
# =============================================================================
def window_multiply(data_16, window_16):
"""Hamming window multiply matching RTL."""
d = sign_extend(data_16 & 0xFFFF, 16)
w = sign_extend(window_16 & 0xFFFF, 16)
product = d * w
rounded = product + (1 << 14)
result = rounded >> 15
return sign_extend(result & 0xFFFF, 16)
def buggy_process_frame(chirp_data_i, chirp_data_q):
"""
Replicate the RTL's exact windowing pipeline for all 64 range bins.
For each range bin we model the three-stage pipeline:
Stage A (BRAM registered read):
mem_rdata captures doppler_i_mem[mem_read_addr] one cycle AFTER
mem_read_addr is presented.
Stage B (multiply):
mult_i <= mem_rdata_i * window_coeff[read_doppler_index]
-- read_doppler_index is the CURRENT cycle's value, but mem_rdata_i
-- is from the PREVIOUS cycle's address.
Stage C (round+shift):
fft_input_i <= (mult_i + (1<<14)) >>> 15
-- uses the PREVIOUS cycle's mult_i.
Additionally, at the S_ACCUMULATE->S_LOAD_FFT transition (rbin=0) or
S_OUTPUT->S_LOAD_FFT transition (rbin>0), the BRAM address during the
transition cycle depends on the stale read_doppler_index and read_range_bin
values.
This function models every detail to produce bit-exact FFT inputs.
"""
# Build the 32-pt FFT engine (matching fpga_model.py)
import math as _math
cos_rom_32 = []
for k in range(8):
val = round(32767.0 * _math.cos(2.0 * _math.pi * k / 32.0))
cos_rom_32.append(sign_extend(val & 0xFFFF, 16))
fft32 = FFTEngine.__new__(FFTEngine)
fft32.N = 32
fft32.LOG2N = 5
fft32.cos_rom = cos_rom_32
fft32.mem_re = [0] * 32
fft32.mem_im = [0] * 32
# Build flat BRAM contents: addr = chirp_index * 64 + range_bin
bram_i = [0] * TOTAL_SAMPLES
bram_q = [0] * TOTAL_SAMPLES
for chirp in range(CHIRPS_PER_FRAME):
for rb in range(RANGE_BINS):
addr = chirp * RANGE_BINS + rb
bram_i[addr] = sign_extend(chirp_data_i[chirp][rb] & 0xFFFF, 16)
bram_q[addr] = sign_extend(chirp_data_q[chirp][rb] & 0xFFFF, 16)
doppler_map_i = []
doppler_map_q = []
# State carried across range bins (simulates the RTL registers)
# After reset: read_doppler_index=0, read_range_bin=0, mult_i=0, mult_q=0,
# fft_input_i=0, fft_input_q=0
# The BRAM read is always active: mem_rdata <= doppler_i_mem[mem_read_addr]
# mem_read_addr = read_doppler_index * 64 + read_range_bin
# We need to track what read_doppler_index and read_range_bin are at each
# transition, since the BRAM captures data one cycle before S_LOAD_FFT runs.
# Before processing starts (just entered S_LOAD_FFT from S_ACCUMULATE):
# At the S_ACCUMULATE clock that transitions:
# read_doppler_index <= 0 (NBA)
# read_range_bin <= 0 (NBA)
# These take effect NEXT cycle. At the transition clock itself,
# read_doppler_index and read_range_bin still had their old values.
# From reset, both were 0. So BRAM captures addr=0*64+0=0.
#
# For rbin>0 transitions from S_OUTPUT:
# At S_OUTPUT clock:
# read_doppler_index <= 0 (was 0, since it wrapped from 32->0 in 5 bits)
# read_range_bin <= prev_rbin + 1 (NBA, takes effect next cycle)
# At S_OUTPUT clock, the current read_range_bin = prev_rbin,
# read_doppler_index = 0 (wrapped). So BRAM captures addr=0*64+prev_rbin.
for rbin in range(RANGE_BINS):
# Determine what BRAM data was captured during the transition clock
# (one cycle before S_LOAD_FFT's first execution cycle).
if rbin == 0:
# From S_ACCUMULATE: both indices were 0 (from reset or previous NBA)
# BRAM captures addr = 0*64+0 = 0 -> data[chirp=0][rbin=0]
transition_bram_addr = 0 * RANGE_BINS + 0
else:
# From S_OUTPUT: read_doppler_index=0 (wrapped), read_range_bin=rbin-1
# BRAM captures addr = 0*64+(rbin-1) -> data[chirp=0][rbin-1]
transition_bram_addr = 0 * RANGE_BINS + (rbin - 1)
transition_data_i = bram_i[transition_bram_addr]
transition_data_q = bram_q[transition_bram_addr]
# Now simulate the 32 cycles of S_LOAD_FFT for this range bin.
# Register pipeline state at entry:
mult_i_reg = 0 # From reset (rbin=0) or from end of previous S_FFT_WAIT
mult_q_reg = 0
fft_in_i_list = []
fft_in_q_list = []
for k in range(DOPPLER_FFT_SIZE):
# read_doppler_index = k at this cycle's start
# mem_read_addr = k * 64 + rbin
# What mem_rdata holds THIS cycle:
if k == 0:
# BRAM captured transition_bram_addr last cycle
rd_i = transition_data_i
rd_q = transition_data_q
else:
# BRAM captured addr from PREVIOUS cycle: (k-1)*64 + rbin
prev_addr = (k - 1) * RANGE_BINS + rbin
rd_i = bram_i[prev_addr]
rd_q = bram_q[prev_addr]
# Stage B: multiply (uses current read_doppler_index = k)
new_mult_i = sign_extend(rd_i & 0xFFFF, 16) * \
sign_extend(HAMMING_WINDOW[k] & 0xFFFF, 16)
new_mult_q = sign_extend(rd_q & 0xFFFF, 16) * \
sign_extend(HAMMING_WINDOW[k] & 0xFFFF, 16)
# Stage C: round+shift (uses PREVIOUS cycle's mult)
fft_i = (mult_i_reg + (1 << 14)) >> 15
fft_q = (mult_q_reg + (1 << 14)) >> 15
fft_in_i_list.append(sign_extend(fft_i & 0xFFFF, 16))
fft_in_q_list.append(sign_extend(fft_q & 0xFFFF, 16))
# Update pipeline registers for next cycle
mult_i_reg = new_mult_i
mult_q_reg = new_mult_q
# 32-point FFT
fft_out_re, fft_out_im = fft32.compute(
fft_in_i_list, fft_in_q_list, inverse=False
)
doppler_map_i.append(fft_out_re)
doppler_map_q.append(fft_out_im)
return doppler_map_i, doppler_map_q
# =============================================================================
# Test scenario definitions
@@ -244,9 +91,10 @@ def make_scenario_stationary():
def make_scenario_moving():
"""Single target with moderate Doppler shift."""
# v = 15 m/s → fd = 2*v*fc/c ≈ 1050 Hz
# PRI = 167 us → Doppler bin = fd * N_chirps * PRI = 1050 * 32 * 167e-6 ≈ 5.6
# Long PRI = 167 us → sub-frame 0 bin = fd * 16 * 167e-6 ≈ 2.8 → bin ~3
# Short PRI = 175 us → sub-frame 1 bin = fd * 16 * 175e-6 ≈ 2.9 → bin 16+3 = 19
targets = [Target(range_m=500, velocity_mps=15.0, rcs_dbsm=20.0)]
return targets, "Single moving target v=15m/s (~1050Hz Doppler, bin~5-6)"
return targets, "Single moving target v=15m/s (~1050Hz Doppler, sf0 bin~3, sf1 bin~19)"
def make_scenario_two_targets():
@@ -269,12 +117,11 @@ SCENARIOS = {
# Main generator
# =============================================================================
def generate_scenario(name, targets, description, base_dir, use_buggy_model=False):
def generate_scenario(name, targets, description, base_dir):
"""Generate input hex + golden output for one scenario."""
print(f"\n{'='*60}")
print(f"Scenario: {name}{description}")
model_label = "BUGGY (RTL pipeline)" if use_buggy_model else "CLEAN"
print(f"Model: {model_label}")
print(f"Model: CLEAN (dual 16-pt FFT)")
print(f"{'='*60}")
# Generate Doppler frame (32 chirps x 64 range bins)
@@ -292,26 +139,24 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
input_hex = os.path.join(base_dir, f"doppler_input_{name}.hex")
write_hex_32bit(input_hex, packed_samples)
# ---- Run through Python model ----
if use_buggy_model:
doppler_i, doppler_q = buggy_process_frame(frame_i, frame_q)
else:
dp = DopplerProcessor()
doppler_i, doppler_q = dp.process_frame(frame_i, frame_q)
# ---- Run through Python model (dual 16-pt FFT) ----
dp = DopplerProcessor()
doppler_i, doppler_q = dp.process_frame(frame_i, frame_q)
print(f" Doppler output: {len(doppler_i)} range bins x "
f"{len(doppler_i[0])} doppler bins")
f"{len(doppler_i[0])} doppler bins (2 sub-frames x {DOPPLER_FFT_SIZE})")
# ---- Write golden output CSV ----
# Format: range_bin, doppler_bin, out_i, out_q
# Ordered same as RTL output: all doppler bins for rbin 0, then rbin 1, ...
# Bins 0-15 = sub-frame 0 (long PRI), bins 16-31 = sub-frame 1 (short PRI)
flat_rbin = []
flat_dbin = []
flat_i = []
flat_q = []
for rbin in range(RANGE_BINS):
for dbin in range(DOPPLER_FFT_SIZE):
for dbin in range(DOPPLER_TOTAL_BINS):
flat_rbin.append(rbin)
flat_dbin.append(dbin)
flat_i.append(doppler_i[rbin][dbin])
@@ -331,8 +176,8 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
peak_info = []
for rbin in range(RANGE_BINS):
mags = [abs(doppler_i[rbin][d]) + abs(doppler_q[rbin][d])
for d in range(DOPPLER_FFT_SIZE)]
peak_dbin = max(range(DOPPLER_FFT_SIZE), key=lambda d: mags[d])
for d in range(DOPPLER_TOTAL_BINS)]
peak_dbin = max(range(DOPPLER_TOTAL_BINS), key=lambda d: mags[d])
peak_mag = mags[peak_dbin]
peak_info.append((rbin, peak_dbin, peak_mag))
@@ -341,33 +186,14 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
for rbin, dbin, mag in peak_info[:5]:
i_val = doppler_i[rbin][dbin]
q_val = doppler_q[rbin][dbin]
print(f" rbin={rbin:2d}, dbin={dbin:2d}, mag={mag:6d}, "
sf = dbin // DOPPLER_FFT_SIZE
bin_in_sf = dbin % DOPPLER_FFT_SIZE
print(f" rbin={rbin:2d}, dbin={dbin:2d} (sf{sf}:{bin_in_sf:2d}), mag={mag:6d}, "
f"I={i_val:6d}, Q={q_val:6d}")
# ---- Write frame data for debugging ----
# Also write per-range-bin FFT input (for debugging pipeline alignment)
if use_buggy_model:
# Write the buggy FFT inputs for debugging
debug_csv = os.path.join(base_dir, f"doppler_fft_inputs_{name}.csv")
# Regenerate to capture FFT inputs
dp_debug = DopplerProcessor()
clean_i, clean_q = dp_debug.process_frame(frame_i, frame_q)
# Show the difference between clean and buggy
print(f"\n Comparing clean vs buggy model outputs:")
mismatches = 0
for rbin in range(RANGE_BINS):
for dbin in range(DOPPLER_FFT_SIZE):
if (doppler_i[rbin][dbin] != clean_i[rbin][dbin] or
doppler_q[rbin][dbin] != clean_q[rbin][dbin]):
mismatches += 1
total = RANGE_BINS * DOPPLER_FFT_SIZE
print(f" {mismatches}/{total} output samples differ "
f"({100*mismatches/total:.1f}%)")
return {
'name': name,
'description': description,
'model': 'buggy' if use_buggy_model else 'clean',
'peak_info': peak_info[:5],
}
@@ -375,11 +201,9 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
def main():
base_dir = os.path.dirname(os.path.abspath(__file__))
use_buggy = '--buggy' in sys.argv
print("=" * 60)
print("Doppler Processor Co-Sim Golden Reference Generator")
print(f"Model: {'BUGGY (RTL pipeline replication)' if use_buggy else 'CLEAN'}")
print(f"Architecture: dual {DOPPLER_FFT_SIZE}-pt FFT ({DOPPLER_TOTAL_BINS} total bins)")
print("=" * 60)
scenarios_to_run = list(SCENARIOS.keys())
@@ -395,15 +219,14 @@ def main():
results = []
for name in scenarios_to_run:
targets, description = SCENARIOS[name]()
r = generate_scenario(name, targets, description, base_dir,
use_buggy_model=use_buggy)
r = generate_scenario(name, targets, description, base_dir)
results.append(r)
print(f"\n{'='*60}")
print("Summary:")
print(f"{'='*60}")
for r in results:
print(f" {r['name']:<15s} [{r['model']}] top peak: "
print(f" {r['name']:<15s} top peak: "
f"rbin={r['peak_info'][0][0]}, dbin={r['peak_info'][0][1]}, "
f"mag={r['peak_info'][0][2]}")
+17 -7
View File
@@ -48,19 +48,24 @@ ADC_BITS = 8 # ADC resolution
T_LONG_CHIRP = 30e-6 # 30 us long chirp duration
T_SHORT_CHIRP = 0.5e-6 # 0.5 us short chirp
T_LISTEN_LONG = 137e-6 # 137 us listening window
T_PRI_LONG = 167e-6 # 30 us chirp + 137 us listen
T_PRI_SHORT = 175e-6 # staggered short-PRI sub-frame
N_SAMPLES_LISTEN = int(T_LISTEN_LONG * FS_ADC) # 54800 samples
# Processing chain
CIC_DECIMATION = 4
FFT_SIZE = 1024
RANGE_BINS = 64
DOPPLER_FFT_SIZE = 32
DOPPLER_FFT_SIZE = 16 # Per sub-frame
DOPPLER_TOTAL_BINS = 32 # Total output bins (2 sub-frames x 16)
CHIRPS_PER_SUBFRAME = 16
CHIRPS_PER_FRAME = 32
# Derived
RANGE_RESOLUTION = C_LIGHT / (2 * CHIRP_BW) # 7.5 m
MAX_UNAMBIGUOUS_RANGE = C_LIGHT * T_LISTEN_LONG / 2 # ~20.55 km
VELOCITY_RESOLUTION = WAVELENGTH / (2 * CHIRPS_PER_FRAME * T_LONG_CHIRP)
VELOCITY_RESOLUTION_LONG = WAVELENGTH / (2 * CHIRPS_PER_SUBFRAME * T_PRI_LONG)
VELOCITY_RESOLUTION_SHORT = WAVELENGTH / (2 * CHIRPS_PER_SUBFRAME * T_PRI_SHORT)
# Short chirp LUT (60 entries, 8-bit unsigned)
SHORT_CHIRP_LUT = [
@@ -384,9 +389,6 @@ def generate_doppler_frame(targets, n_chirps=CHIRPS_PER_FRAME,
break
return math.sqrt(-2.0 * math.log(u1)) * math.cos(2.0 * math.pi * u2)
# Chirp repetition interval (PRI)
t_pri = T_LONG_CHIRP + T_LISTEN_LONG # ~167 us
frame_i = []
frame_q = []
@@ -408,8 +410,16 @@ def generate_doppler_frame(targets, n_chirps=CHIRPS_PER_FRAME,
# Amplitude (simplified)
amp = target.amplitude / 4.0
# Doppler phase for this chirp
doppler_phase = 2 * math.pi * target.doppler_hz * chirp_idx * t_pri
# Doppler phase for this chirp.
# The frame uses staggered PRF: chirps 0-15 use the long PRI,
# chirps 16-31 use the short PRI.
if chirp_idx < CHIRPS_PER_SUBFRAME:
slow_time_s = chirp_idx * T_PRI_LONG
else:
slow_time_s = (CHIRPS_PER_SUBFRAME * T_PRI_LONG) + \
((chirp_idx - CHIRPS_PER_SUBFRAME) * T_PRI_SHORT)
doppler_phase = 2 * math.pi * target.doppler_hz * slow_time_s
total_phase = doppler_phase + target.phase_deg * math.pi / 180.0
# Spread across a few bins (sinc-like response from matched filter)
@@ -91,6 +91,7 @@ doppler_processor_optimized dut (
.doppler_valid(doppler_valid),
.doppler_bin(doppler_bin),
.range_bin(range_bin),
.sub_frame(), // Not used in this testbench
.processing_active(processing_active),
.frame_complete(frame_complete),
.status(dut_status)
@@ -75,6 +75,7 @@ doppler_processor_optimized dut (
.doppler_valid(doppler_valid),
.doppler_bin(doppler_bin),
.range_bin(range_bin),
.sub_frame(), // Not used in this testbench
.processing_active(processing_active),
.frame_complete(frame_complete),
.status(dut_status)
+252
View File
@@ -0,0 +1,252 @@
`timescale 1ns / 1ps
// ============================================================================
// xfft_16.v 16-point FFT with AXI-Stream interface
// ============================================================================
// Wraps the synthesizable fft_engine (radix-2 DIT) with the AXI-Stream port
// interface expected by the doppler_processor dual-FFT architecture.
//
// Identical interface to xfft_32.v but with N=16.
//
// Data format: {Q[15:0], I[15:0]} packed 32-bit.
// Config tdata[0]: 1 = forward FFT, 0 = inverse FFT.
// ============================================================================
module xfft_16 (
input wire aclk,
input wire aresetn,
// Configuration channel (AXI-Stream slave)
input wire [7:0] s_axis_config_tdata,
input wire s_axis_config_tvalid,
output wire s_axis_config_tready,
// Data input channel (AXI-Stream slave)
input wire [31:0] s_axis_data_tdata,
input wire s_axis_data_tvalid,
input wire s_axis_data_tlast,
// Data output channel (AXI-Stream master)
output wire [31:0] m_axis_data_tdata,
output wire m_axis_data_tvalid,
output wire m_axis_data_tlast,
input wire m_axis_data_tready
);
// ============================================================================
// PARAMETERS
// ============================================================================
localparam N = 16;
localparam LOG2N = 4;
// ============================================================================
// INTERNAL SIGNALS
// ============================================================================
// FSM states
localparam [2:0] S_IDLE = 3'd0,
S_CONFIG = 3'd1,
S_FEED = 3'd2,
S_WAIT = 3'd3,
S_OUTPUT = 3'd4;
reg [2:0] state;
// Configuration
reg inverse_reg;
// Input buffering
reg signed [15:0] in_buf_re [0:N-1];
reg signed [15:0] in_buf_im [0:N-1];
reg [4:0] in_count;
// Output buffering
reg signed [15:0] out_buf_re [0:N-1];
reg signed [15:0] out_buf_im [0:N-1];
reg [4:0] out_count;
reg [4:0] out_total;
// FFT engine interface
reg fft_start;
reg fft_inverse;
reg signed [15:0] fft_din_re, fft_din_im;
reg fft_din_valid;
wire signed [15:0] fft_dout_re, fft_dout_im;
wire fft_dout_valid;
wire fft_busy;
wire fft_done;
// Feed counter
reg [4:0] feed_count;
// ============================================================================
// FFT ENGINE INSTANCE
// ============================================================================
fft_engine #(
.N(N),
.LOG2N(LOG2N),
.DATA_W(16),
.INTERNAL_W(32),
.TWIDDLE_W(16),
.TWIDDLE_FILE("fft_twiddle_16.mem")
) fft_core (
.clk(aclk),
.reset_n(aresetn),
.start(fft_start),
.inverse(fft_inverse),
.din_re(fft_din_re),
.din_im(fft_din_im),
.din_valid(fft_din_valid),
.dout_re(fft_dout_re),
.dout_im(fft_dout_im),
.dout_valid(fft_dout_valid),
.busy(fft_busy),
.done(fft_done)
);
// ============================================================================
// AXI-STREAM OUTPUTS
// ============================================================================
assign s_axis_config_tready = (state == S_IDLE);
assign m_axis_data_tdata = {out_buf_im[out_count[3:0]], out_buf_re[out_count[3:0]]};
assign m_axis_data_tvalid = (state == S_OUTPUT) && (out_count < N);
assign m_axis_data_tlast = (state == S_OUTPUT) && (out_count == N - 1);
// ============================================================================
// BUFFER WRITE LOGIC separate always block, NO async reset
// ============================================================================
reg in_buf_we;
reg [3:0] in_buf_waddr;
reg signed [15:0] in_buf_wdata_re, in_buf_wdata_im;
reg out_buf_we;
reg [3:0] out_buf_waddr;
reg signed [15:0] out_buf_wdata_re, out_buf_wdata_im;
always @(posedge aclk) begin
if (in_buf_we) begin
in_buf_re[in_buf_waddr] <= in_buf_wdata_re;
in_buf_im[in_buf_waddr] <= in_buf_wdata_im;
end
if (out_buf_we) begin
out_buf_re[out_buf_waddr] <= out_buf_wdata_re;
out_buf_im[out_buf_waddr] <= out_buf_wdata_im;
end
end
// ============================================================================
// MAIN FSM
// ============================================================================
always @(posedge aclk or negedge aresetn) begin
if (!aresetn) begin
state <= S_IDLE;
inverse_reg <= 1'b0;
in_count <= 0;
out_count <= 0;
out_total <= 0;
feed_count <= 0;
fft_start <= 1'b0;
fft_inverse <= 1'b0;
fft_din_re <= 0;
fft_din_im <= 0;
fft_din_valid <= 1'b0;
in_buf_we <= 1'b0;
in_buf_waddr <= 0;
in_buf_wdata_re <= 0;
in_buf_wdata_im <= 0;
out_buf_we <= 1'b0;
out_buf_waddr <= 0;
out_buf_wdata_re <= 0;
out_buf_wdata_im <= 0;
end else begin
fft_start <= 1'b0;
fft_din_valid <= 1'b0;
in_buf_we <= 1'b0;
out_buf_we <= 1'b0;
case (state)
S_IDLE: begin
in_count <= 0;
if (s_axis_config_tvalid) begin
inverse_reg <= ~s_axis_config_tdata[0];
state <= S_FEED;
in_count <= 0;
feed_count <= 0;
end
end
S_FEED: begin
if (in_count < N) begin
if (s_axis_data_tvalid) begin
in_buf_we <= 1'b1;
in_buf_waddr <= in_count[3:0];
in_buf_wdata_re <= s_axis_data_tdata[15:0];
in_buf_wdata_im <= s_axis_data_tdata[31:16];
in_count <= in_count + 1;
end
end else if (feed_count == 0) begin
fft_start <= 1'b1;
fft_inverse <= inverse_reg;
feed_count <= 0;
state <= S_WAIT;
out_total <= 0;
end
end
S_WAIT: begin
if (feed_count < N) begin
fft_din_re <= in_buf_re[feed_count[3:0]];
fft_din_im <= in_buf_im[feed_count[3:0]];
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1;
end
if (fft_dout_valid && out_total < N) begin
out_buf_we <= 1'b1;
out_buf_waddr <= out_total[3:0];
out_buf_wdata_re <= fft_dout_re;
out_buf_wdata_im <= fft_dout_im;
out_total <= out_total + 1;
end
if (fft_done) begin
state <= S_OUTPUT;
out_count <= 0;
end
end
S_OUTPUT: begin
if (m_axis_data_tready || !m_axis_data_tvalid) begin
if (out_count < N) begin
if (m_axis_data_tready) begin
out_count <= out_count + 1;
end
end
if (out_count >= N - 1 && m_axis_data_tready) begin
state <= S_IDLE;
end
end
end
default: state <= S_IDLE;
endcase
end
end
// ============================================================================
// MEMORY INIT (simulation only)
// ============================================================================
`ifdef SIMULATION
integer init_k;
initial begin
for (init_k = 0; init_k < N; init_k = init_k + 1) begin
in_buf_re[init_k] = 0;
in_buf_im[init_k] = 0;
out_buf_re[init_k] = 0;
out_buf_im[init_k] = 0;
end
end
`endif
endmodule