diff --git a/9_Firmware/9_2_FPGA/cfar_ca.v b/9_Firmware/9_2_FPGA/cfar_ca.v index 83f28df..04c33ce 100644 --- a/9_Firmware/9_2_FPGA/cfar_ca.v +++ b/9_Firmware/9_2_FPGA/cfar_ca.v @@ -208,20 +208,31 @@ wire lead_rem_valid = (lead_rem_idx >= 0) && (lead_rem_idx < NUM_RANGE_BINS); wire lag_rem_valid = (lag_rem_idx >= 0) && (lag_rem_idx < NUM_RANGE_BINS); wire lag_add_valid = (lag_add_idx >= 0) && (lag_add_idx < NUM_RANGE_BINS); -// Safe col_buf read with bounds checking (combinational) +// Safe col_buf read with bounds checking (combinational — feeds pipeline regs) wire [MAG_WIDTH-1:0] lead_add_val = lead_add_valid ? col_buf[lead_add_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}}; wire [MAG_WIDTH-1:0] lead_rem_val = lead_rem_valid ? col_buf[lead_rem_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}}; wire [MAG_WIDTH-1:0] lag_rem_val = lag_rem_valid ? col_buf[lag_rem_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}}; wire [MAG_WIDTH-1:0] lag_add_val = lag_add_valid ? col_buf[lag_add_idx[ROW_BITS-1:0]] : {MAG_WIDTH{1'b0}}; -// Net deltas -wire signed [SUM_WIDTH:0] lead_delta = (lead_add_valid ? $signed({1'b0, lead_add_val}) : 0) - - (lead_rem_valid ? $signed({1'b0, lead_rem_val}) : 0); -wire signed [1:0] lead_cnt_delta = (lead_add_valid ? 1 : 0) - (lead_rem_valid ? 1 : 0); +// ============================================================================ +// PIPELINE REGISTERS: Break col_buf mux tree out of ST_CFAR_CMP critical path +// ============================================================================ +// Captured in ST_CFAR_THR (col_buf indices depend only on cut_idx/r_guard/r_train, +// all stable during THR). Used in ST_CFAR_CMP for delta/sum computation. +// This removes ~6-8 logic levels (9-level mux tree) from the CMP critical path. +reg [MAG_WIDTH-1:0] lead_add_val_r, lead_rem_val_r; +reg [MAG_WIDTH-1:0] lag_rem_val_r, lag_add_val_r; +reg lead_add_valid_r, lead_rem_valid_r; +reg lag_rem_valid_r, lag_add_valid_r; -wire signed [SUM_WIDTH:0] lag_delta = (lag_add_valid ? $signed({1'b0, lag_add_val}) : 0) - - (lag_rem_valid ? $signed({1'b0, lag_rem_val}) : 0); -wire signed [1:0] lag_cnt_delta = (lag_add_valid ? 1 : 0) - (lag_rem_valid ? 1 : 0); +// Net deltas (computed from registered col_buf values — combinational in CMP) +wire signed [SUM_WIDTH:0] lead_delta = (lead_add_valid_r ? $signed({1'b0, lead_add_val_r}) : 0) + - (lead_rem_valid_r ? $signed({1'b0, lead_rem_val_r}) : 0); +wire signed [1:0] lead_cnt_delta = (lead_add_valid_r ? 1 : 0) - (lead_rem_valid_r ? 1 : 0); + +wire signed [SUM_WIDTH:0] lag_delta = (lag_add_valid_r ? $signed({1'b0, lag_add_val_r}) : 0) + - (lag_rem_valid_r ? $signed({1'b0, lag_rem_val_r}) : 0); +wire signed [1:0] lag_cnt_delta = (lag_add_valid_r ? 1 : 0) - (lag_rem_valid_r ? 1 : 0); // ============================================================================ // NOISE ESTIMATE COMPUTATION (combinational for CFAR mode selection) @@ -290,6 +301,14 @@ always @(posedge clk or negedge reset_n) begin noise_sum_reg <= 0; noise_product <= 0; adaptive_thr <= 0; + lead_add_val_r <= 0; + lead_rem_val_r <= 0; + lag_rem_val_r <= 0; + lag_add_val_r <= 0; + lead_add_valid_r <= 0; + lead_rem_valid_r <= 0; + lag_rem_valid_r <= 0; + lag_add_valid_r <= 0; r_guard <= 4'd2; r_train <= 5'd8; r_alpha <= 8'h30; @@ -443,6 +462,19 @@ always @(posedge clk or negedge reset_n) begin cfar_status <= {4'd4, 1'b0, col_idx[2:0]}; noise_sum_reg <= noise_sum_comb; + + // Pipeline: register col_buf reads for next CUT's window update. + // Indices depend only on cut_idx/r_guard/r_train (all stable here). + // Breaks the 9-level col_buf mux tree out of ST_CFAR_CMP. + lead_add_val_r <= lead_add_val; + lead_rem_val_r <= lead_rem_val; + lag_rem_val_r <= lag_rem_val; + lag_add_val_r <= lag_add_val; + lead_add_valid_r <= lead_add_valid; + lead_rem_valid_r <= lead_rem_valid; + lag_rem_valid_r <= lag_rem_valid; + lag_add_valid_r <= lag_add_valid; + state <= ST_CFAR_MUL; end diff --git a/9_Firmware/9_2_FPGA/cic_decimator_4x_enhanced.v b/9_Firmware/9_2_FPGA/cic_decimator_4x_enhanced.v index 76ade79..dbc1c84 100644 --- a/9_Firmware/9_2_FPGA/cic_decimator_4x_enhanced.v +++ b/9_Firmware/9_2_FPGA/cic_decimator_4x_enhanced.v @@ -1,6 +1,7 @@ module cic_decimator_4x_enhanced ( input wire clk, // 400MHz input clock input wire reset_n, + input wire reset_h, // Pre-registered active-high reset from parent (avoids LUT1 inverter) input wire signed [17:0] data_in, // 18-bit input input wire data_valid, output reg signed [17:0] data_out, // 18-bit output @@ -32,11 +33,15 @@ localparam COMB_WIDTH = 28; // adjacent DSP48E1 tiles — zero fabric delay, guaranteed to meet 400+ MHz // on 7-series regardless of speed grade. // -// Active-high reset derived from reset_n (inverted). +// Active-high reset provided by parent module (pre-registered). // CEP (clock enable for P register) gated by data_valid. // ============================================================================ -wire reset_h = ~reset_n; // active-high reset for DSP48E1 RSTP +// reset_h is now an input port from parent module (pre-registered active-high). +// Previously: wire reset_h = ~reset_n; — this LUT1 inverter + long routing to +// 8 DSP48E1 RSTB pins was the root cause of 400 MHz timing failure (WNS=-0.074ns). +// The parent ddc_400m.v already has a registered reset_400m derived from +// the 2-stage sync reset, so we use that directly. // Sign-extended input for integrator_0 C port (48-bit) wire [ACC_WIDTH-1:0] data_in_c = {{(ACC_WIDTH-18){data_in[17]}}, data_in}; @@ -702,7 +707,7 @@ end // Sync reset: enables FDRE inference for better timing at 400 MHz. // Reset is already synchronous to clk via reset synchronizer in parent module. always @(posedge clk) begin - if (!reset_n) begin + if (reset_h) begin integrator_sampled <= 0; decimation_counter <= 0; data_valid_delayed <= 0; @@ -757,7 +762,7 @@ end // Pipeline the valid signal for comb section // Sync reset: matches decimation control block reset style. always @(posedge clk) begin - if (!reset_n) begin + if (reset_h) begin data_valid_comb <= 0; data_valid_comb_pipe <= 0; data_valid_comb_0_out <= 0; @@ -792,7 +797,7 @@ end // - Each stage: comb[i] = comb[i-1] - comb_delay[i][last] always @(posedge clk) begin - if (!reset_n) begin + if (reset_h) begin for (i = 0; i < STAGES; i = i + 1) begin comb[i] <= 0; for (j = 0; j < COMB_DELAY; j = j + 1) begin diff --git a/9_Firmware/9_2_FPGA/ddc_400m.v b/9_Firmware/9_2_FPGA/ddc_400m.v index 470ad14..4cad7f4 100644 --- a/9_Firmware/9_2_FPGA/ddc_400m.v +++ b/9_Firmware/9_2_FPGA/ddc_400m.v @@ -565,7 +565,8 @@ wire cic_valid_i, cic_valid_q; cic_decimator_4x_enhanced cic_i_inst ( .clk(clk_400m), - .reset_n(reset_n_400m), + .reset_n(reset_n_400m), + .reset_h(reset_400m), .data_in(mixed_i[33:16]), .data_valid(mixed_valid), .data_out(cic_i_out), @@ -574,7 +575,8 @@ cic_decimator_4x_enhanced cic_i_inst ( cic_decimator_4x_enhanced cic_q_inst ( .clk(clk_400m), - .reset_n(reset_n_400m), + .reset_n(reset_n_400m), + .reset_h(reset_400m), .data_in(mixed_q[33:16]), .data_valid(mixed_valid), .data_out(cic_q_out), diff --git a/9_Firmware/9_2_FPGA/tb/tb_cic_decimator.v b/9_Firmware/9_2_FPGA/tb/tb_cic_decimator.v index 3fc78fc..b51ef34 100644 --- a/9_Firmware/9_2_FPGA/tb/tb_cic_decimator.v +++ b/9_Firmware/9_2_FPGA/tb/tb_cic_decimator.v @@ -36,6 +36,7 @@ module tb_cic_decimator; cic_decimator_4x_enhanced uut ( .clk (clk), .reset_n (reset_n), + .reset_h (~reset_n), .data_in (data_in), .data_valid (data_valid), .data_out (data_out),