Convert async→sync reset on DSP/BRAM datapath registers for timing closure
P1-CRITICAL: doppler_processor.v — split FSM into control (async reset) and BRAM/DSP datapath (sync reset) blocks. Fixes REQP-1839/1840 BRAM address register corruption risk; enables DSP48 absorption of window multipliers (mult_i/q). P1-CRITICAL: frequency_matched_filter.v — convert all 4 pipeline stages (input capture, multiply, add, saturate) from async to sync reset. Enables DSP48E1 absorption of complex multiplier registers. P1-HIGH: fir_lowpass.v — convert adder tree (L0-L4), output stage, and valid pipeline from async to sync reset. Fixes 856 DPOR-1 warnings (428 per FIR instance × 2 I/Q channels), enabling DSP48 absorption of the entire pipelined adder tree. Expected impact: eliminate ~1000 DRC warnings, improve WNS from +0.019ns by enabling Vivado to absorb hundreds of registers into DSP48E1/BRAM hard blocks. Full regression: 13/13 test suites pass (257+ assertions).
This commit is contained in:
@@ -183,8 +183,12 @@ always @(posedge clk) begin
|
|||||||
end
|
end
|
||||||
|
|
||||||
// ----------------------------------------------------------
|
// ----------------------------------------------------------
|
||||||
// Main FSM — async reset for control registers only.
|
// Block 1: FSM / Control — async reset (posedge clk or negedge reset_n).
|
||||||
// Memory arrays are NOT touched here.
|
// Only state-machine and control registers live here.
|
||||||
|
// BRAM-driving and DSP datapath registers are intentionally
|
||||||
|
// excluded to avoid Vivado REQP-1839 (async-reset on BRAM
|
||||||
|
// address) and DPOR-1/DPIP-1 (async-reset blocking DSP48
|
||||||
|
// absorption) DRC warnings.
|
||||||
// ----------------------------------------------------------
|
// ----------------------------------------------------------
|
||||||
always @(posedge clk or negedge reset_n) begin
|
always @(posedge clk or negedge reset_n) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
@@ -203,21 +207,13 @@ always @(posedge clk or negedge reset_n) begin
|
|||||||
status <= 0;
|
status <= 0;
|
||||||
chirps_received <= 0;
|
chirps_received <= 0;
|
||||||
chirp_state <= 0;
|
chirp_state <= 0;
|
||||||
mem_we <= 0;
|
|
||||||
mem_waddr_r <= 0;
|
|
||||||
mem_wdata_i <= 0;
|
|
||||||
mem_wdata_q <= 0;
|
|
||||||
mult_i <= 0;
|
|
||||||
mult_q <= 0;
|
|
||||||
fft_input_i <= 0;
|
|
||||||
fft_input_q <= 0;
|
|
||||||
doppler_output <= 0;
|
doppler_output <= 0;
|
||||||
doppler_bin <= 0;
|
doppler_bin <= 0;
|
||||||
|
range_bin <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
doppler_valid <= 0;
|
doppler_valid <= 0;
|
||||||
fft_input_valid <= 0;
|
fft_input_valid <= 0;
|
||||||
fft_input_last <= 0;
|
fft_input_last <= 0;
|
||||||
mem_we <= 0;
|
|
||||||
|
|
||||||
if (processing_timeout > 0) begin
|
if (processing_timeout > 0) begin
|
||||||
processing_timeout <= processing_timeout - 1;
|
processing_timeout <= processing_timeout - 1;
|
||||||
@@ -235,25 +231,12 @@ always @(posedge clk or negedge reset_n) begin
|
|||||||
|
|
||||||
if (data_valid && !frame_buffer_full) begin
|
if (data_valid && !frame_buffer_full) begin
|
||||||
state <= S_ACCUMULATE;
|
state <= S_ACCUMULATE;
|
||||||
// Write the first sample immediately (Bug #3 fix:
|
|
||||||
// previously this transition consumed data_valid
|
|
||||||
// without writing to BRAM)
|
|
||||||
mem_we <= 1;
|
|
||||||
mem_waddr_r <= mem_write_addr;
|
|
||||||
mem_wdata_i <= range_data[15:0];
|
|
||||||
mem_wdata_q <= range_data[31:16];
|
|
||||||
write_range_bin <= 1;
|
write_range_bin <= 1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
S_ACCUMULATE: begin
|
S_ACCUMULATE: begin
|
||||||
if (data_valid) begin
|
if (data_valid) begin
|
||||||
// Drive memory write signals (actual write in separate block)
|
|
||||||
mem_we <= 1;
|
|
||||||
mem_waddr_r <= mem_write_addr;
|
|
||||||
mem_wdata_i <= range_data[15:0];
|
|
||||||
mem_wdata_q <= range_data[31:16];
|
|
||||||
|
|
||||||
// Increment range bin
|
// Increment range bin
|
||||||
if (write_range_bin < RANGE_BINS - 1) begin
|
if (write_range_bin < RANGE_BINS - 1) begin
|
||||||
write_range_bin <= write_range_bin + 1;
|
write_range_bin <= write_range_bin + 1;
|
||||||
@@ -330,10 +313,7 @@ always @(posedge clk or negedge reset_n) begin
|
|||||||
|
|
||||||
if (fft_sample_counter == 0) begin
|
if (fft_sample_counter == 0) begin
|
||||||
// Sub 0: pre-multiply. mem_rdata_i = data[chirp=0][rbin].
|
// Sub 0: pre-multiply. mem_rdata_i = data[chirp=0][rbin].
|
||||||
mult_i <= $signed(mem_rdata_i) *
|
// (mult_i/mult_q computed in Block 2)
|
||||||
$signed(window_coeff[0]);
|
|
||||||
mult_q <= $signed(mem_rdata_q) *
|
|
||||||
$signed(window_coeff[0]);
|
|
||||||
// Present BRAM addr for chirp 2 (sub=1 reads chirp 1
|
// Present BRAM addr for chirp 2 (sub=1 reads chirp 1
|
||||||
// from the BRAM read we triggered in S_PRE_READ;
|
// from the BRAM read we triggered in S_PRE_READ;
|
||||||
// we need chirp 2 ready for sub=2).
|
// we need chirp 2 ready for sub=2).
|
||||||
@@ -342,9 +322,7 @@ always @(posedge clk or negedge reset_n) begin
|
|||||||
fft_sample_counter <= 1;
|
fft_sample_counter <= 1;
|
||||||
end else if (fft_sample_counter <= DOPPLER_FFT_SIZE) begin
|
end else if (fft_sample_counter <= DOPPLER_FFT_SIZE) begin
|
||||||
// Sub 1..32
|
// Sub 1..32
|
||||||
// Capture previous mult into fft_input
|
// (fft_input_i/fft_input_q captured in Block 2)
|
||||||
fft_input_i <= (mult_i + (1 << 14)) >>> 15;
|
|
||||||
fft_input_q <= (mult_q + (1 << 14)) >>> 15;
|
|
||||||
fft_input_valid <= 1;
|
fft_input_valid <= 1;
|
||||||
|
|
||||||
if (fft_sample_counter == DOPPLER_FFT_SIZE) begin
|
if (fft_sample_counter == DOPPLER_FFT_SIZE) begin
|
||||||
@@ -358,11 +336,7 @@ always @(posedge clk or negedge reset_n) begin
|
|||||||
read_doppler_index <= 0;
|
read_doppler_index <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
// Sub 1..31: also compute new mult from current BRAM data
|
// Sub 1..31: also compute new mult from current BRAM data
|
||||||
// mem_rdata_i = data[chirp = fft_sample_counter][rbin]
|
// (mult_i/mult_q computed in Block 2)
|
||||||
mult_i <= $signed(mem_rdata_i) *
|
|
||||||
$signed(window_coeff[fft_sample_counter]);
|
|
||||||
mult_q <= $signed(mem_rdata_q) *
|
|
||||||
$signed(window_coeff[fft_sample_counter]);
|
|
||||||
// Advance BRAM read to chirp fft_sample_counter+2
|
// Advance BRAM read to chirp fft_sample_counter+2
|
||||||
// (so data is ready two cycles later when we need it)
|
// (so data is ready two cycles later when we need it)
|
||||||
// Clamp to DOPPLER_FFT_SIZE-1 to prevent OOB memory read
|
// Clamp to DOPPLER_FFT_SIZE-1 to prevent OOB memory read
|
||||||
@@ -413,6 +387,82 @@ always @(posedge clk or negedge reset_n) begin
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
// ----------------------------------------------------------
|
||||||
|
// Block 2: BRAM address/data & DSP datapath — synchronous reset only.
|
||||||
|
// Uses always @(posedge clk) so Vivado can absorb multipliers
|
||||||
|
// into DSP48 primitives and does not flag REQP-1839/1840 on
|
||||||
|
// BRAM address registers. Replicates the same state/condition
|
||||||
|
// structure as Block 1 for the eight registers:
|
||||||
|
// mem_we, mem_waddr_r, mem_wdata_i, mem_wdata_q,
|
||||||
|
// mult_i, mult_q, fft_input_i, fft_input_q
|
||||||
|
// ----------------------------------------------------------
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (!reset_n) begin
|
||||||
|
mem_we <= 0;
|
||||||
|
mem_waddr_r <= 0;
|
||||||
|
mem_wdata_i <= 0;
|
||||||
|
mem_wdata_q <= 0;
|
||||||
|
mult_i <= 0;
|
||||||
|
mult_q <= 0;
|
||||||
|
fft_input_i <= 0;
|
||||||
|
fft_input_q <= 0;
|
||||||
|
end else begin
|
||||||
|
mem_we <= 0;
|
||||||
|
|
||||||
|
case (state)
|
||||||
|
S_IDLE: begin
|
||||||
|
if (data_valid && !frame_buffer_full) begin
|
||||||
|
// Write the first sample immediately (Bug #3 fix:
|
||||||
|
// previously this transition consumed data_valid
|
||||||
|
// without writing to BRAM)
|
||||||
|
mem_we <= 1;
|
||||||
|
mem_waddr_r <= mem_write_addr;
|
||||||
|
mem_wdata_i <= range_data[15:0];
|
||||||
|
mem_wdata_q <= range_data[31:16];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
S_ACCUMULATE: begin
|
||||||
|
if (data_valid) begin
|
||||||
|
// Drive memory write signals (actual write in separate block)
|
||||||
|
mem_we <= 1;
|
||||||
|
mem_waddr_r <= mem_write_addr;
|
||||||
|
mem_wdata_i <= range_data[15:0];
|
||||||
|
mem_wdata_q <= range_data[31:16];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
S_LOAD_FFT: begin
|
||||||
|
if (fft_sample_counter == 0) begin
|
||||||
|
// Sub 0: pre-multiply. mem_rdata_i = data[chirp=0][rbin].
|
||||||
|
mult_i <= $signed(mem_rdata_i) *
|
||||||
|
$signed(window_coeff[0]);
|
||||||
|
mult_q <= $signed(mem_rdata_q) *
|
||||||
|
$signed(window_coeff[0]);
|
||||||
|
end else if (fft_sample_counter <= DOPPLER_FFT_SIZE) begin
|
||||||
|
// Sub 1..32: capture previous mult into fft_input
|
||||||
|
fft_input_i <= (mult_i + (1 << 14)) >>> 15;
|
||||||
|
fft_input_q <= (mult_q + (1 << 14)) >>> 15;
|
||||||
|
|
||||||
|
if (fft_sample_counter < DOPPLER_FFT_SIZE) begin
|
||||||
|
// Sub 1..31: also compute new mult from current BRAM data
|
||||||
|
// mem_rdata_i = data[chirp = fft_sample_counter][rbin]
|
||||||
|
mult_i <= $signed(mem_rdata_i) *
|
||||||
|
$signed(window_coeff[fft_sample_counter]);
|
||||||
|
mult_q <= $signed(mem_rdata_q) *
|
||||||
|
$signed(window_coeff[fft_sample_counter]);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
default: begin
|
||||||
|
// S_PRE_READ, S_FFT_WAIT, S_OUTPUT:
|
||||||
|
// no BRAM-write or DSP operations needed
|
||||||
|
end
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
// ==============================================
|
// ==============================================
|
||||||
// FFT Module
|
// FFT Module
|
||||||
// ==============================================
|
// ==============================================
|
||||||
|
|||||||
@@ -109,8 +109,9 @@ end
|
|||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Pipeline Stage 1 (Level 0): Register 16 pairwise sums of 32 multiply results
|
// Pipeline Stage 1 (Level 0): Register 16 pairwise sums of 32 multiply results
|
||||||
// Each addition is a single 36-bit add — one DSP48E1 hop (~1.7ns), fits 10ns.
|
// Each addition is a single 36-bit add — one DSP48E1 hop (~1.7ns), fits 10ns.
|
||||||
|
// Sync reset enables DSP48E1 absorption (fixes DPOR-1 warnings)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk or negedge reset_n) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
for (i = 0; i < 16; i = i + 1) begin
|
for (i = 0; i < 16; i = i + 1) begin
|
||||||
add_l0[i] <= 0;
|
add_l0[i] <= 0;
|
||||||
@@ -128,8 +129,9 @@ end
|
|||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Pipeline Stage 2 (Level 1): 8 pairwise sums of 16 Level-0 results
|
// Pipeline Stage 2 (Level 1): 8 pairwise sums of 16 Level-0 results
|
||||||
|
// Sync reset enables DSP48E1 absorption (fixes DPOR-1 warnings)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk or negedge reset_n) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
for (i = 0; i < 8; i = i + 1) begin
|
for (i = 0; i < 8; i = i + 1) begin
|
||||||
add_l1[i] <= 0;
|
add_l1[i] <= 0;
|
||||||
@@ -143,8 +145,9 @@ end
|
|||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Pipeline Stage 3 (Level 2): 4 pairwise sums of 8 Level-1 results
|
// Pipeline Stage 3 (Level 2): 4 pairwise sums of 8 Level-1 results
|
||||||
|
// Sync reset enables DSP48E1 absorption (fixes DPOR-1 warnings)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk or negedge reset_n) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
for (i = 0; i < 4; i = i + 1) begin
|
for (i = 0; i < 4; i = i + 1) begin
|
||||||
add_l2[i] <= 0;
|
add_l2[i] <= 0;
|
||||||
@@ -158,8 +161,9 @@ end
|
|||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Pipeline Stage 4 (Level 3): 2 pairwise sums of 4 Level-2 results
|
// Pipeline Stage 4 (Level 3): 2 pairwise sums of 4 Level-2 results
|
||||||
|
// Sync reset enables DSP48E1 absorption (fixes DPOR-1 warnings)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk or negedge reset_n) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
add_l3[0] <= 0;
|
add_l3[0] <= 0;
|
||||||
add_l3[1] <= 0;
|
add_l3[1] <= 0;
|
||||||
@@ -171,8 +175,9 @@ end
|
|||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Pipeline Stage 5 (Level 4): Final sum of 2 Level-3 results
|
// Pipeline Stage 5 (Level 4): Final sum of 2 Level-3 results
|
||||||
|
// Sync reset enables DSP48E1 absorption (fixes DPOR-1 warnings)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk or negedge reset_n) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
accumulator_reg <= 0;
|
accumulator_reg <= 0;
|
||||||
end else if (valid_pipe[4]) begin
|
end else if (valid_pipe[4]) begin
|
||||||
@@ -182,8 +187,9 @@ end
|
|||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Pipeline Stage 6: Output saturation/rounding (registered)
|
// Pipeline Stage 6: Output saturation/rounding (registered)
|
||||||
|
// Sync reset enables DSP48E1 absorption (fixes DPOR-1 warnings)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk or negedge reset_n) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
data_out <= 0;
|
data_out <= 0;
|
||||||
data_out_valid <= 0;
|
data_out_valid <= 0;
|
||||||
@@ -206,8 +212,9 @@ end
|
|||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Valid pipeline shift register
|
// Valid pipeline shift register
|
||||||
|
// Sync reset — no DSP48 involvement but keeps reset style consistent with datapath
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk or negedge reset_n) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
valid_pipe <= 7'b0000000;
|
valid_pipe <= 7'b0000000;
|
||||||
end else begin
|
end else begin
|
||||||
|
|||||||
@@ -41,7 +41,8 @@ reg [9:0] addr_counter;
|
|||||||
|
|
||||||
|
|
||||||
// ========== PIPELINE STAGE 1: REGISTER INPUTS ==========
|
// ========== PIPELINE STAGE 1: REGISTER INPUTS ==========
|
||||||
always @(posedge clk or negedge reset_n) begin
|
// Sync reset: enables DSP48E1 absorption (fixes DPOR-1/DPIP-1 DRC)
|
||||||
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
a_reg <= 16'd0; b_reg <= 16'd0;
|
a_reg <= 16'd0; b_reg <= 16'd0;
|
||||||
c_reg <= 16'd0; d_reg <= 16'd0;
|
c_reg <= 16'd0; d_reg <= 16'd0;
|
||||||
@@ -58,7 +59,8 @@ always @(posedge clk or negedge reset_n) begin
|
|||||||
end
|
end
|
||||||
|
|
||||||
// ========== PIPELINE STAGE 2: MULTIPLICATIONS ==========
|
// ========== PIPELINE STAGE 2: MULTIPLICATIONS ==========
|
||||||
always @(posedge clk or negedge reset_n) begin
|
// Sync reset: enables DSP48E1 absorption (fixes DPOR-1/DPIP-1 DRC)
|
||||||
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
ac_reg <= 32'd0; bd_reg <= 32'd0;
|
ac_reg <= 32'd0; bd_reg <= 32'd0;
|
||||||
bc_reg <= 32'd0; ad_reg <= 32'd0;
|
bc_reg <= 32'd0; ad_reg <= 32'd0;
|
||||||
@@ -76,7 +78,8 @@ end
|
|||||||
|
|
||||||
// ========== PIPELINE STAGE 3: ADDITIONS ==========
|
// ========== PIPELINE STAGE 3: ADDITIONS ==========
|
||||||
// For conjugate multiplication: (ac + bd) + j(bc - ad)
|
// For conjugate multiplication: (ac + bd) + j(bc - ad)
|
||||||
always @(posedge clk or negedge reset_n) begin
|
// Sync reset: enables DSP48E1 absorption (fixes DPOR-1/DPIP-1 DRC)
|
||||||
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
real_sum <= 32'd0;
|
real_sum <= 32'd0;
|
||||||
imag_sum <= 32'd0;
|
imag_sum <= 32'd0;
|
||||||
@@ -112,7 +115,8 @@ function automatic signed [15:0] saturate_and_scale;
|
|||||||
end
|
end
|
||||||
endfunction
|
endfunction
|
||||||
|
|
||||||
always @(posedge clk or negedge reset_n) begin
|
// Sync reset: enables DSP48E1 absorption (fixes DPOR-1/DPIP-1 DRC)
|
||||||
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (!reset_n) begin
|
||||||
real_out <= 16'd0;
|
real_out <= 16'd0;
|
||||||
imag_out <= 16'd0;
|
imag_out <= 16'd0;
|
||||||
|
|||||||
Reference in New Issue
Block a user