fix: close all FPGA timing — CFAR pipeline + CIC reset path (Build 19)
CFAR pipeline fix (clk_100m WNS: -0.331ns → +0.156ns): - Pre-register col_buf reads during ST_CFAR_THR pipeline stage - 8 pipeline registers (4 values + 4 valids) break 15-level mux tree - Delta wires use registered values, eliminating combinatorial depth CIC reset path fix (clk_mmcm_out0 WNS: -0.074ns → +0.068ns): - Add reset_h input port to cic_decimator_4x_enhanced.v - Remove internal wire reset_h = ~reset_n (LUT1 inverter was root cause) - Wire pre-registered reset_400m from ddc_400m.v into both CIC instances - 3 sync reset blocks changed from if(!reset_n) to if(reset_h) Build 19 results (xc7a50tftg256-2, Vivado 2025.2): - All 5 clock domains timing met, 0 failing endpoints - WNS: +0.068ns (400MHz), +0.156ns (100MHz), +0.627ns (120MHz) - Utilization: 66.67% LUT, 22.36% FF, 74% BRAM, 93.33% DSP - Bitstream: 2,140 KB
This commit is contained in:
@@ -208,20 +208,31 @@ wire lead_rem_valid = (lead_rem_idx >= 0) && (lead_rem_idx < NUM_RANGE_BINS);
|
|||||||
wire lag_rem_valid = (lag_rem_idx >= 0) && (lag_rem_idx < NUM_RANGE_BINS);
|
wire lag_rem_valid = (lag_rem_idx >= 0) && (lag_rem_idx < NUM_RANGE_BINS);
|
||||||
wire lag_add_valid = (lag_add_idx >= 0) && (lag_add_idx < NUM_RANGE_BINS);
|
wire lag_add_valid = (lag_add_idx >= 0) && (lag_add_idx < NUM_RANGE_BINS);
|
||||||
|
|
||||||
// Safe col_buf read with bounds checking (combinational)
|
// Safe col_buf read with bounds checking (combinational — feeds pipeline regs)
|
||||||
wire [MAG_WIDTH-1:0] lead_add_val = lead_add_valid ? col_buf[lead_add_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}};
|
wire [MAG_WIDTH-1:0] lead_add_val = lead_add_valid ? col_buf[lead_add_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}};
|
||||||
wire [MAG_WIDTH-1:0] lead_rem_val = lead_rem_valid ? col_buf[lead_rem_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}};
|
wire [MAG_WIDTH-1:0] lead_rem_val = lead_rem_valid ? col_buf[lead_rem_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}};
|
||||||
wire [MAG_WIDTH-1:0] lag_rem_val = lag_rem_valid ? col_buf[lag_rem_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}};
|
wire [MAG_WIDTH-1:0] lag_rem_val = lag_rem_valid ? col_buf[lag_rem_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}};
|
||||||
wire [MAG_WIDTH-1:0] lag_add_val = lag_add_valid ? col_buf[lag_add_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}};
|
wire [MAG_WIDTH-1:0] lag_add_val = lag_add_valid ? col_buf[lag_add_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}};
|
||||||
|
|
||||||
// Net deltas
|
// ============================================================================
|
||||||
wire signed [SUM_WIDTH:0] lead_delta = (lead_add_valid ? $signed({1'b0, lead_add_val}) : 0)
|
// PIPELINE REGISTERS: Break col_buf mux tree out of ST_CFAR_CMP critical path
|
||||||
- (lead_rem_valid ? $signed({1'b0, lead_rem_val}) : 0);
|
// ============================================================================
|
||||||
wire signed [1:0] lead_cnt_delta = (lead_add_valid ? 1 : 0) - (lead_rem_valid ? 1 : 0);
|
// Captured in ST_CFAR_THR (col_buf indices depend only on cut_idx/r_guard/r_train,
|
||||||
|
// all stable during THR). Used in ST_CFAR_CMP for delta/sum computation.
|
||||||
|
// This removes ~6-8 logic levels (9-level mux tree) from the CMP critical path.
|
||||||
|
reg [MAG_WIDTH-1:0] lead_add_val_r, lead_rem_val_r;
|
||||||
|
reg [MAG_WIDTH-1:0] lag_rem_val_r, lag_add_val_r;
|
||||||
|
reg lead_add_valid_r, lead_rem_valid_r;
|
||||||
|
reg lag_rem_valid_r, lag_add_valid_r;
|
||||||
|
|
||||||
wire signed [SUM_WIDTH:0] lag_delta = (lag_add_valid ? $signed({1'b0, lag_add_val}) : 0)
|
// Net deltas (computed from registered col_buf values — combinational in CMP)
|
||||||
- (lag_rem_valid ? $signed({1'b0, lag_rem_val}) : 0);
|
wire signed [SUM_WIDTH:0] lead_delta = (lead_add_valid_r ? $signed({1'b0, lead_add_val_r}) : 0)
|
||||||
wire signed [1:0] lag_cnt_delta = (lag_add_valid ? 1 : 0) - (lag_rem_valid ? 1 : 0);
|
- (lead_rem_valid_r ? $signed({1'b0, lead_rem_val_r}) : 0);
|
||||||
|
wire signed [1:0] lead_cnt_delta = (lead_add_valid_r ? 1 : 0) - (lead_rem_valid_r ? 1 : 0);
|
||||||
|
|
||||||
|
wire signed [SUM_WIDTH:0] lag_delta = (lag_add_valid_r ? $signed({1'b0, lag_add_val_r}) : 0)
|
||||||
|
- (lag_rem_valid_r ? $signed({1'b0, lag_rem_val_r}) : 0);
|
||||||
|
wire signed [1:0] lag_cnt_delta = (lag_add_valid_r ? 1 : 0) - (lag_rem_valid_r ? 1 : 0);
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// NOISE ESTIMATE COMPUTATION (combinational for CFAR mode selection)
|
// NOISE ESTIMATE COMPUTATION (combinational for CFAR mode selection)
|
||||||
@@ -290,6 +301,14 @@ always @(posedge clk or negedge reset_n) begin
|
|||||||
noise_sum_reg <= 0;
|
noise_sum_reg <= 0;
|
||||||
noise_product <= 0;
|
noise_product <= 0;
|
||||||
adaptive_thr <= 0;
|
adaptive_thr <= 0;
|
||||||
|
lead_add_val_r <= 0;
|
||||||
|
lead_rem_val_r <= 0;
|
||||||
|
lag_rem_val_r <= 0;
|
||||||
|
lag_add_val_r <= 0;
|
||||||
|
lead_add_valid_r <= 0;
|
||||||
|
lead_rem_valid_r <= 0;
|
||||||
|
lag_rem_valid_r <= 0;
|
||||||
|
lag_add_valid_r <= 0;
|
||||||
r_guard <= 4'd2;
|
r_guard <= 4'd2;
|
||||||
r_train <= 5'd8;
|
r_train <= 5'd8;
|
||||||
r_alpha <= 8'h30;
|
r_alpha <= 8'h30;
|
||||||
@@ -443,6 +462,19 @@ always @(posedge clk or negedge reset_n) begin
|
|||||||
cfar_status <= {4'd4, 1'b0, col_idx[2:0]};
|
cfar_status <= {4'd4, 1'b0, col_idx[2:0]};
|
||||||
|
|
||||||
noise_sum_reg <= noise_sum_comb;
|
noise_sum_reg <= noise_sum_comb;
|
||||||
|
|
||||||
|
// Pipeline: register col_buf reads for next CUT's window update.
|
||||||
|
// Indices depend only on cut_idx/r_guard/r_train (all stable here).
|
||||||
|
// Breaks the 9-level col_buf mux tree out of ST_CFAR_CMP.
|
||||||
|
lead_add_val_r <= lead_add_val;
|
||||||
|
lead_rem_val_r <= lead_rem_val;
|
||||||
|
lag_rem_val_r <= lag_rem_val;
|
||||||
|
lag_add_val_r <= lag_add_val;
|
||||||
|
lead_add_valid_r <= lead_add_valid;
|
||||||
|
lead_rem_valid_r <= lead_rem_valid;
|
||||||
|
lag_rem_valid_r <= lag_rem_valid;
|
||||||
|
lag_add_valid_r <= lag_add_valid;
|
||||||
|
|
||||||
state <= ST_CFAR_MUL;
|
state <= ST_CFAR_MUL;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
module cic_decimator_4x_enhanced (
|
module cic_decimator_4x_enhanced (
|
||||||
input wire clk, // 400MHz input clock
|
input wire clk, // 400MHz input clock
|
||||||
input wire reset_n,
|
input wire reset_n,
|
||||||
|
input wire reset_h, // Pre-registered active-high reset from parent (avoids LUT1 inverter)
|
||||||
input wire signed [17:0] data_in, // 18-bit input
|
input wire signed [17:0] data_in, // 18-bit input
|
||||||
input wire data_valid,
|
input wire data_valid,
|
||||||
output reg signed [17:0] data_out, // 18-bit output
|
output reg signed [17:0] data_out, // 18-bit output
|
||||||
@@ -32,11 +33,15 @@ localparam COMB_WIDTH = 28;
|
|||||||
// adjacent DSP48E1 tiles — zero fabric delay, guaranteed to meet 400+ MHz
|
// adjacent DSP48E1 tiles — zero fabric delay, guaranteed to meet 400+ MHz
|
||||||
// on 7-series regardless of speed grade.
|
// on 7-series regardless of speed grade.
|
||||||
//
|
//
|
||||||
// Active-high reset derived from reset_n (inverted).
|
// Active-high reset provided by parent module (pre-registered).
|
||||||
// CEP (clock enable for P register) gated by data_valid.
|
// CEP (clock enable for P register) gated by data_valid.
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
wire reset_h = ~reset_n; // active-high reset for DSP48E1 RSTP
|
// reset_h is now an input port from parent module (pre-registered active-high).
|
||||||
|
// Previously: wire reset_h = ~reset_n; — this LUT1 inverter + long routing to
|
||||||
|
// 8 DSP48E1 RSTB pins was the root cause of 400 MHz timing failure (WNS=-0.074ns).
|
||||||
|
// The parent ddc_400m.v already has a registered reset_400m derived from
|
||||||
|
// the 2-stage sync reset, so we use that directly.
|
||||||
|
|
||||||
// Sign-extended input for integrator_0 C port (48-bit)
|
// Sign-extended input for integrator_0 C port (48-bit)
|
||||||
wire [ACC_WIDTH-1:0] data_in_c = {{(ACC_WIDTH-18){data_in[17]}}, data_in};
|
wire [ACC_WIDTH-1:0] data_in_c = {{(ACC_WIDTH-18){data_in[17]}}, data_in};
|
||||||
@@ -702,7 +707,7 @@ end
|
|||||||
// Sync reset: enables FDRE inference for better timing at 400 MHz.
|
// Sync reset: enables FDRE inference for better timing at 400 MHz.
|
||||||
// Reset is already synchronous to clk via reset synchronizer in parent module.
|
// Reset is already synchronous to clk via reset synchronizer in parent module.
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
integrator_sampled <= 0;
|
integrator_sampled <= 0;
|
||||||
decimation_counter <= 0;
|
decimation_counter <= 0;
|
||||||
data_valid_delayed <= 0;
|
data_valid_delayed <= 0;
|
||||||
@@ -757,7 +762,7 @@ end
|
|||||||
// Pipeline the valid signal for comb section
|
// Pipeline the valid signal for comb section
|
||||||
// Sync reset: matches decimation control block reset style.
|
// Sync reset: matches decimation control block reset style.
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
data_valid_comb <= 0;
|
data_valid_comb <= 0;
|
||||||
data_valid_comb_pipe <= 0;
|
data_valid_comb_pipe <= 0;
|
||||||
data_valid_comb_0_out <= 0;
|
data_valid_comb_0_out <= 0;
|
||||||
@@ -792,7 +797,7 @@ end
|
|||||||
// - Each stage: comb[i] = comb[i-1] - comb_delay[i][last]
|
// - Each stage: comb[i] = comb[i-1] - comb_delay[i][last]
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
for (i = 0; i < STAGES; i = i + 1) begin
|
for (i = 0; i < STAGES; i = i + 1) begin
|
||||||
comb[i] <= 0;
|
comb[i] <= 0;
|
||||||
for (j = 0; j < COMB_DELAY; j = j + 1) begin
|
for (j = 0; j < COMB_DELAY; j = j + 1) begin
|
||||||
|
|||||||
@@ -565,7 +565,8 @@ wire cic_valid_i, cic_valid_q;
|
|||||||
|
|
||||||
cic_decimator_4x_enhanced cic_i_inst (
|
cic_decimator_4x_enhanced cic_i_inst (
|
||||||
.clk(clk_400m),
|
.clk(clk_400m),
|
||||||
.reset_n(reset_n_400m),
|
.reset_n(reset_n_400m),
|
||||||
|
.reset_h(reset_400m),
|
||||||
.data_in(mixed_i[33:16]),
|
.data_in(mixed_i[33:16]),
|
||||||
.data_valid(mixed_valid),
|
.data_valid(mixed_valid),
|
||||||
.data_out(cic_i_out),
|
.data_out(cic_i_out),
|
||||||
@@ -574,7 +575,8 @@ cic_decimator_4x_enhanced cic_i_inst (
|
|||||||
|
|
||||||
cic_decimator_4x_enhanced cic_q_inst (
|
cic_decimator_4x_enhanced cic_q_inst (
|
||||||
.clk(clk_400m),
|
.clk(clk_400m),
|
||||||
.reset_n(reset_n_400m),
|
.reset_n(reset_n_400m),
|
||||||
|
.reset_h(reset_400m),
|
||||||
.data_in(mixed_q[33:16]),
|
.data_in(mixed_q[33:16]),
|
||||||
.data_valid(mixed_valid),
|
.data_valid(mixed_valid),
|
||||||
.data_out(cic_q_out),
|
.data_out(cic_q_out),
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ module tb_cic_decimator;
|
|||||||
cic_decimator_4x_enhanced uut (
|
cic_decimator_4x_enhanced uut (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset_n (reset_n),
|
.reset_n (reset_n),
|
||||||
|
.reset_h (~reset_n),
|
||||||
.data_in (data_in),
|
.data_in (data_in),
|
||||||
.data_valid (data_valid),
|
.data_valid (data_valid),
|
||||||
.data_out (data_out),
|
.data_out (data_out),
|
||||||
|
|||||||
Reference in New Issue
Block a user