fix(fpga): registered reset fan-out at 400 MHz; default USB to FT2232H
Replace direct !reset_n async sense with a registered active-high reset_h (max_fanout=50) in nco_400m_enhanced, cic_decimator_4x_enhanced, and ddc_400m. The prior single-LUT1 / 700+ load net was the root cause of WNS=-0.626 ns in the 400 MHz clock domain on the xc7a50t build. Vivado replicates the constrained register into ≈14 regional copies, each driving ≤50 loads, closing timing at 2.5 ns. Change radar_system_top default USB_MODE from 0 (FT601) to 1 (FT2232H). FT601 remains available for the 200T premium board via explicit parameter override; the 50T production wrapper already hard-codes USB_MODE=1. Regression: add usb_data_interface_ft2232h.v to PROD_RTL lint list and both system-top TB compile commands; fix legacy radar_system_tb hierarchical probe from gen_ft601.usb_inst to gen_ft2232h.usb_inst. Golden reference files (rtl_bb_dc.csv, rx_final_doppler_out.csv, golden_doppler.mem) regenerated to reflect the +1-cycle registered-reset boundary behaviour; Receiver golden-compare passes 18/18 checks. All 25 regression tests pass (0 failures, 0 skipped). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -32,11 +32,50 @@ localparam COMB_WIDTH = 28;
|
|||||||
// adjacent DSP48E1 tiles — zero fabric delay, guaranteed to meet 400+ MHz
|
// adjacent DSP48E1 tiles — zero fabric delay, guaranteed to meet 400+ MHz
|
||||||
// on 7-series regardless of speed grade.
|
// on 7-series regardless of speed grade.
|
||||||
//
|
//
|
||||||
// Active-high reset derived from reset_n (inverted).
|
// Active-high reset derived from reset_n (inverted and REGISTERED).
|
||||||
// CEP (clock enable for P register) gated by data_valid.
|
// CEP (clock enable for P register) gated by data_valid.
|
||||||
// ============================================================================
|
//
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
wire reset_h = ~reset_n; // active-high reset for DSP48E1 RSTP
|
// RESET FAN-OUT INVARIANT (Build N+1 fix for WNS=-0.626ns at 400 MHz):
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
// Previously this was a combinational wire (`wire reset_h = ~reset_n`). Vivado
|
||||||
|
// collapsed all per-module inversions across the DDC hierarchy into a SINGLE
|
||||||
|
// shared LUT1, whose output fanned out to 702 loads (DSP48E1 RSTP/RSTB/RSTC
|
||||||
|
// plus FDRE R pins of all comb-stage DSP48E1s inferred via use_dsp="yes").
|
||||||
|
// Route delay alone on that net was 2.019–2.268 ns — nearly one full 2.5 ns
|
||||||
|
// period. Timing failed by 626 ps on the 400 MHz domain.
|
||||||
|
//
|
||||||
|
// Fix: convert reset_h to a REGISTERED signal with (* max_fanout = 50 *).
|
||||||
|
// Vivado treats max_fanout on a REG (not a wire) as authoritative and
|
||||||
|
// replicates the register into N copies, each placed near its ≈50 loads.
|
||||||
|
// Invariants preserved:
|
||||||
|
// I1 (correctness): reset_h is still active-high, equals ~reset_n
|
||||||
|
// after one clk edge; CIC reset is a RECEIVER-side
|
||||||
|
// synchronizer anyway (driven by reset_n_400m which
|
||||||
|
// is already sync'd in the parent DDC), so adding
|
||||||
|
// one more clk cycle of latency is safe.
|
||||||
|
// I2 (glitch-free): Registered output => inherently glitch-free,
|
||||||
|
// feeding DSP48E1 RST pins (which are synchronous
|
||||||
|
// to CLK, so they capture on the same edge anyway).
|
||||||
|
// I3 (power-up safety): reset_h is NOT async-reset itself. On power-up,
|
||||||
|
// FDRE INIT=0 starts reset_h LOW. First clk edge
|
||||||
|
// samples ~reset_n which is LOW on power-up (the
|
||||||
|
// parent DDC holds reset_n_400m low until the 2-
|
||||||
|
// stage synchronizer releases), so reset_h goes
|
||||||
|
// HIGH on cycle 1 and all DSPs see reset during
|
||||||
|
// the following cycles. System is held in reset
|
||||||
|
// for enough cycles that any initial register
|
||||||
|
// state garbage is overwritten. ✅
|
||||||
|
// I4 (reset de-assertion):reset_h goes LOW one cycle AFTER reset_n_400m
|
||||||
|
// goes HIGH. Downstream DSPs come out of reset on
|
||||||
|
// the next clk edge after that. Total latency
|
||||||
|
// from system reset release to first valid sample:
|
||||||
|
// 2 (sync chain) + 1 (reset_h reg) + 1 (first
|
||||||
|
// DSP output) = 4 cycles at 400 MHz = 10 ns.
|
||||||
|
// Negligible vs system reset assertion duration.
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
(* max_fanout = 50 *) reg reset_h = 1'b1; // INIT=1'b1: registers start in reset state on power-up
|
||||||
|
always @(posedge clk) reset_h <= ~reset_n;
|
||||||
|
|
||||||
// Sign-extended input for integrator_0 C port (48-bit)
|
// Sign-extended input for integrator_0 C port (48-bit)
|
||||||
wire [ACC_WIDTH-1:0] data_in_c = {{(ACC_WIDTH-18){data_in[17]}}, data_in};
|
wire [ACC_WIDTH-1:0] data_in_c = {{(ACC_WIDTH-18){data_in[17]}}, data_in};
|
||||||
@@ -699,10 +738,11 @@ initial begin
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Decimation control + monitoring (integrators are now DSP48E1 instances)
|
// Decimation control + monitoring (integrators are now DSP48E1 instances)
|
||||||
// Sync reset: enables FDRE inference for better timing at 400 MHz.
|
// Sync reset via reset_h (registered, max_fanout=50) — eliminates the shared
|
||||||
// Reset is already synchronous to clk via reset synchronizer in parent module.
|
// LUT1 inverter that previously fanned out to all fabric FDRE R pins plus
|
||||||
|
// DSP48E1 RST pins (702 loads total). See "RESET FAN-OUT INVARIANT" at top.
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
integrator_sampled <= 0;
|
integrator_sampled <= 0;
|
||||||
decimation_counter <= 0;
|
decimation_counter <= 0;
|
||||||
data_valid_delayed <= 0;
|
data_valid_delayed <= 0;
|
||||||
@@ -755,9 +795,9 @@ always @(posedge clk) begin
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Pipeline the valid signal for comb section
|
// Pipeline the valid signal for comb section
|
||||||
// Sync reset: matches decimation control block reset style.
|
// Sync reset via reset_h — same replicated-register source as DSP48E1 RSTs.
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
data_valid_comb <= 0;
|
data_valid_comb <= 0;
|
||||||
data_valid_comb_pipe <= 0;
|
data_valid_comb_pipe <= 0;
|
||||||
data_valid_comb_0_out <= 0;
|
data_valid_comb_0_out <= 0;
|
||||||
@@ -792,7 +832,7 @@ end
|
|||||||
// - Each stage: comb[i] = comb[i-1] - comb_delay[i][last]
|
// - Each stage: comb[i] = comb[i-1] - comb_delay[i][last]
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
for (i = 0; i < STAGES; i = i + 1) begin
|
for (i = 0; i < STAGES; i = i + 1) begin
|
||||||
comb[i] <= 0;
|
comb[i] <= 0;
|
||||||
for (j = 0; j < COMB_DELAY; j = j + 1) begin
|
for (j = 0; j < COMB_DELAY; j = j + 1) begin
|
||||||
|
|||||||
@@ -53,46 +53,6 @@ reg [2:0] saturation_count;
|
|||||||
reg overflow_detected;
|
reg overflow_detected;
|
||||||
reg [7:0] error_counter;
|
reg [7:0] error_counter;
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// 400 MHz Reset Synchronizer
|
|
||||||
//
|
|
||||||
// reset_n arrives from the 100 MHz domain (sys_reset_n from radar_system_top).
|
|
||||||
// Using it directly as an async reset in the 400 MHz domain causes the reset
|
|
||||||
// deassertion edge to violate timing: the 100 MHz flip-flop driving reset_n
|
|
||||||
// has its output fanning out to 1156 registers across the FPGA in the 400 MHz
|
|
||||||
// domain, requiring 18.243ns of routing (WNS = -18.081ns).
|
|
||||||
//
|
|
||||||
// Solution: 2-stage async-assert, sync-deassert reset synchronizer in the
|
|
||||||
// 400 MHz domain. Reset assertion is immediate (asynchronous — combinatorial
|
|
||||||
// path from reset_n to all 400 MHz registers). Reset deassertion is
|
|
||||||
// synchronized to clk_400m rising edge, preventing metastability.
|
|
||||||
//
|
|
||||||
// All 400 MHz submodules (NCO, CIC, mixers, LFSR) use reset_n_400m.
|
|
||||||
// All 100 MHz submodules (FIR, output stage) continue using reset_n directly
|
|
||||||
// (already synchronized to 100 MHz at radar_system_top level).
|
|
||||||
// ============================================================================
|
|
||||||
(* ASYNC_REG = "TRUE" *) reg [1:0] reset_sync_400m;
|
|
||||||
(* max_fanout = 50 *) wire reset_n_400m = reset_sync_400m[1];
|
|
||||||
|
|
||||||
// Active-high reset for DSP48E1 RST ports (avoids LUT1 inverter fan-out)
|
|
||||||
(* max_fanout = 50 *) reg reset_400m;
|
|
||||||
|
|
||||||
always @(posedge clk_400m or negedge reset_n) begin
|
|
||||||
if (!reset_n) begin
|
|
||||||
reset_sync_400m <= 2'b00;
|
|
||||||
reset_400m <= 1'b1;
|
|
||||||
end else begin
|
|
||||||
reset_sync_400m <= {reset_sync_400m[0], 1'b1};
|
|
||||||
reset_400m <= ~reset_sync_400m[1];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
// CDC synchronization for control signals (2-stage synchronizers)
|
|
||||||
(* ASYNC_REG = "TRUE" *) reg [1:0] mixers_enable_sync_chain;
|
|
||||||
(* ASYNC_REG = "TRUE" *) reg [1:0] force_saturation_sync_chain;
|
|
||||||
wire mixers_enable_sync;
|
|
||||||
wire force_saturation_sync;
|
|
||||||
|
|
||||||
// Debug monitoring signals
|
// Debug monitoring signals
|
||||||
reg [31:0] sample_counter;
|
reg [31:0] sample_counter;
|
||||||
wire signed [17:0] debug_mixed_i_trunc;
|
wire signed [17:0] debug_mixed_i_trunc;
|
||||||
@@ -130,8 +90,6 @@ reg baseband_valid_reg;
|
|||||||
wire [7:0] phase_dither_bits;
|
wire [7:0] phase_dither_bits;
|
||||||
reg [31:0] phase_inc_dithered;
|
reg [31:0] phase_inc_dithered;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Debug Signal Assignments
|
// Debug Signal Assignments
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -142,13 +100,66 @@ assign debug_mixed_i_trunc = mixed_i[25:8];
|
|||||||
assign debug_mixed_q_trunc = mixed_q[25:8];
|
assign debug_mixed_q_trunc = mixed_q[25:8];
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Clock Domain Crossing for Control Signals (2-stage synchronizers)
|
// 400 MHz Reset Synchronizer
|
||||||
|
//
|
||||||
|
// reset_n arrives from the 100 MHz domain (sys_reset_n from radar_system_top).
|
||||||
|
// Using it directly as an async reset in the 400 MHz domain causes the reset
|
||||||
|
// deassertion edge to violate timing: the 100 MHz flip-flop driving reset_n
|
||||||
|
// has its output fanning out to 1156 registers across the FPGA in the 400 MHz
|
||||||
|
// domain, requiring 18.243ns of routing (WNS = -18.081ns).
|
||||||
|
//
|
||||||
|
// Solution: 2-stage async-assert, sync-deassert reset synchronizer in the
|
||||||
|
// 400 MHz domain. Reset assertion is immediate (asynchronous — combinatorial
|
||||||
|
// path from reset_n to all 400 MHz registers). Reset deassertion is
|
||||||
|
//
|
||||||
|
// reset_400m : ACTIVE-HIGH registered reset with (* max_fanout = 50 *).
|
||||||
|
// This is THE signal fed to every synchronous 400 MHz FDRE
|
||||||
|
// and every DSP48E1 RST pin in this module and its children
|
||||||
|
// (NCO, CIC, LFSR). Vivado replicates the register (~14
|
||||||
|
// copies) so each replica drives ≈50 loads regionally,
|
||||||
|
// eliminating the single-LUT1 / 702-load net that caused
|
||||||
|
// WNS=-0.626 ns in Build N.
|
||||||
|
//
|
||||||
|
// System-level invariants preserved:
|
||||||
|
// I1 Reset assertion propagates to all 400 MHz regs within ≤3 clk edges
|
||||||
|
// (2 sync + 1 replicated-reg fanout). At 400 MHz = 7.5 ns << any
|
||||||
|
// system-level reset assertion duration.
|
||||||
|
// I2 Reset de-assertion is always synchronous to clk_400m (via
|
||||||
|
// reset_sync_400m), never glitches.
|
||||||
|
// I3 DSP48E1 RST pins are all fed from Q of a register — glitch-free.
|
||||||
|
// I4 No new CDC introduced: reset_400m is entirely in clk_400m domain.
|
||||||
|
// I5 Power-up: reset_n is asserted externally and mmcm_locked is low;
|
||||||
|
// reset_sync_400m stays 2'b00, reset_400m stays 1'b1, downstream
|
||||||
|
// FDREs stay cleared. Safe.
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
(* ASYNC_REG = "TRUE" *) reg [1:0] reset_sync_400m = 2'b00;
|
||||||
|
(* max_fanout = 50 *) wire reset_n_400m = reset_sync_400m[1];
|
||||||
|
|
||||||
|
// Active-high replicated reset for all synchronous 400 MHz consumers
|
||||||
|
(* max_fanout = 50 *) reg reset_400m = 1'b1;
|
||||||
|
|
||||||
|
always @(posedge clk_400m or negedge reset_n) begin
|
||||||
|
if (!reset_n) begin
|
||||||
|
reset_sync_400m <= 2'b00;
|
||||||
|
reset_400m <= 1'b1;
|
||||||
|
end else begin
|
||||||
|
reset_sync_400m <= {reset_sync_400m[0], 1'b1};
|
||||||
|
reset_400m <= ~reset_sync_400m[1];
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// CDC synchronization for control signals (2-stage synchronizers)
|
||||||
|
(* ASYNC_REG = "TRUE" *) reg [1:0] mixers_enable_sync_chain;
|
||||||
|
(* ASYNC_REG = "TRUE" *) reg [1:0] force_saturation_sync_chain;
|
||||||
|
wire mixers_enable_sync;
|
||||||
|
wire force_saturation_sync;
|
||||||
assign mixers_enable_sync = mixers_enable_sync_chain[1];
|
assign mixers_enable_sync = mixers_enable_sync_chain[1];
|
||||||
assign force_saturation_sync = force_saturation_sync_chain[1];
|
assign force_saturation_sync = force_saturation_sync_chain[1];
|
||||||
|
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
// Sync reset via reset_400m (replicated, max_fanout=50). Was async on
|
||||||
if (!reset_n_400m) begin
|
// reset_n_400m — see "400 MHz RESET DISTRIBUTION" comment above.
|
||||||
|
always @(posedge clk_400m) begin
|
||||||
|
if (reset_400m) begin
|
||||||
mixers_enable_sync_chain <= 2'b00;
|
mixers_enable_sync_chain <= 2'b00;
|
||||||
force_saturation_sync_chain <= 2'b00;
|
force_saturation_sync_chain <= 2'b00;
|
||||||
end else begin
|
end else begin
|
||||||
@@ -160,8 +171,8 @@ end
|
|||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Sample Counter and Debug Monitoring
|
// Sample Counter and Debug Monitoring
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m || reset_monitors) begin
|
if (reset_400m || reset_monitors) begin
|
||||||
sample_counter <= 0;
|
sample_counter <= 0;
|
||||||
error_counter <= 0;
|
error_counter <= 0;
|
||||||
end else if (adc_data_valid_i && adc_data_valid_q ) begin
|
end else if (adc_data_valid_i && adc_data_valid_q ) begin
|
||||||
@@ -189,8 +200,8 @@ lfsr_dither_enhanced #(
|
|||||||
localparam PHASE_INC_120MHZ = 32'h4CCCCCCD;
|
localparam PHASE_INC_120MHZ = 32'h4CCCCCCD;
|
||||||
|
|
||||||
// Apply dithering to reduce spurious tones (registered for 400 MHz timing)
|
// Apply dithering to reduce spurious tones (registered for 400 MHz timing)
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m)
|
if (reset_400m)
|
||||||
phase_inc_dithered <= PHASE_INC_120MHZ;
|
phase_inc_dithered <= PHASE_INC_120MHZ;
|
||||||
else
|
else
|
||||||
phase_inc_dithered <= PHASE_INC_120MHZ + {24'b0, phase_dither_bits};
|
phase_inc_dithered <= PHASE_INC_120MHZ + {24'b0, phase_dither_bits};
|
||||||
@@ -229,8 +240,8 @@ assign adc_signed_w = {1'b0, adc_data, {(MIXER_WIDTH-ADC_WIDTH-1){1'b0}}} -
|
|||||||
{1'b0, {ADC_WIDTH{1'b1}}, {(MIXER_WIDTH-ADC_WIDTH-1){1'b0}}} / 2;
|
{1'b0, {ADC_WIDTH{1'b1}}, {(MIXER_WIDTH-ADC_WIDTH-1){1'b0}}} / 2;
|
||||||
|
|
||||||
// Valid pipeline: 5-stage shift register (1 NCO pipe + 3 DSP48E1 AREG+MREG+PREG + 1 retiming)
|
// Valid pipeline: 5-stage shift register (1 NCO pipe + 3 DSP48E1 AREG+MREG+PREG + 1 retiming)
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m) begin
|
if (reset_400m) begin
|
||||||
dsp_valid_pipe <= 5'b00000;
|
dsp_valid_pipe <= 5'b00000;
|
||||||
end else begin
|
end else begin
|
||||||
dsp_valid_pipe <= {dsp_valid_pipe[3:0], (nco_ready && adc_data_valid_i && adc_data_valid_q)};
|
dsp_valid_pipe <= {dsp_valid_pipe[3:0], (nco_ready && adc_data_valid_i && adc_data_valid_q)};
|
||||||
@@ -246,8 +257,8 @@ reg signed [MIXER_WIDTH+NCO_WIDTH-1:0] mult_i_internal, mult_q_internal; // Mod
|
|||||||
reg signed [MIXER_WIDTH+NCO_WIDTH-1:0] mult_i_reg, mult_q_reg; // Models PREG
|
reg signed [MIXER_WIDTH+NCO_WIDTH-1:0] mult_i_reg, mult_q_reg; // Models PREG
|
||||||
|
|
||||||
// Stage 0: NCO pipeline — breaks long NCO→DSP route (matches synthesis fabric registers)
|
// Stage 0: NCO pipeline — breaks long NCO→DSP route (matches synthesis fabric registers)
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m) begin
|
if (reset_400m) begin
|
||||||
cos_nco_pipe <= 0;
|
cos_nco_pipe <= 0;
|
||||||
sin_nco_pipe <= 0;
|
sin_nco_pipe <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
@@ -257,8 +268,8 @@ always @(posedge clk_400m or negedge reset_n_400m) begin
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Stage 1: AREG/BREG equivalent (uses pipelined NCO outputs)
|
// Stage 1: AREG/BREG equivalent (uses pipelined NCO outputs)
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m) begin
|
if (reset_400m) begin
|
||||||
adc_signed_reg <= 0;
|
adc_signed_reg <= 0;
|
||||||
cos_pipe_reg <= 0;
|
cos_pipe_reg <= 0;
|
||||||
sin_pipe_reg <= 0;
|
sin_pipe_reg <= 0;
|
||||||
@@ -270,8 +281,8 @@ always @(posedge clk_400m or negedge reset_n_400m) begin
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Stage 2: MREG equivalent
|
// Stage 2: MREG equivalent
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m) begin
|
if (reset_400m) begin
|
||||||
mult_i_internal <= 0;
|
mult_i_internal <= 0;
|
||||||
mult_q_internal <= 0;
|
mult_q_internal <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
@@ -281,8 +292,8 @@ always @(posedge clk_400m or negedge reset_n_400m) begin
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Stage 3: PREG equivalent
|
// Stage 3: PREG equivalent
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m) begin
|
if (reset_400m) begin
|
||||||
mult_i_reg <= 0;
|
mult_i_reg <= 0;
|
||||||
mult_q_reg <= 0;
|
mult_q_reg <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
@@ -292,8 +303,8 @@ always @(posedge clk_400m or negedge reset_n_400m) begin
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Stage 4: Post-DSP retiming register (matches synthesis path)
|
// Stage 4: Post-DSP retiming register (matches synthesis path)
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m) begin
|
if (reset_400m) begin
|
||||||
mult_i_retimed <= 0;
|
mult_i_retimed <= 0;
|
||||||
mult_q_retimed <= 0;
|
mult_q_retimed <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
@@ -311,8 +322,8 @@ wire [47:0] dsp_p_i, dsp_p_q;
|
|||||||
// (1.505ns routing observed in Build 26). These fabric registers are placed
|
// (1.505ns routing observed in Build 26). These fabric registers are placed
|
||||||
// near the DSP by the placer, splitting the route into two shorter segments.
|
// near the DSP by the placer, splitting the route into two shorter segments.
|
||||||
// DONT_TOUCH on the reg declaration (above) prevents absorption/retiming.
|
// DONT_TOUCH on the reg declaration (above) prevents absorption/retiming.
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m) begin
|
if (reset_400m) begin
|
||||||
cos_nco_pipe <= 0;
|
cos_nco_pipe <= 0;
|
||||||
sin_nco_pipe <= 0;
|
sin_nco_pipe <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
@@ -329,11 +340,10 @@ DSP48E1 #(
|
|||||||
.USE_DPORT("FALSE"),
|
.USE_DPORT("FALSE"),
|
||||||
.USE_MULT("MULTIPLY"),
|
.USE_MULT("MULTIPLY"),
|
||||||
.USE_SIMD("ONE48"),
|
.USE_SIMD("ONE48"),
|
||||||
// Pipeline register attributes — all enabled for max timing
|
|
||||||
.AREG(1),
|
.AREG(1),
|
||||||
.BREG(1),
|
.BREG(1),
|
||||||
.MREG(1),
|
.MREG(1),
|
||||||
.PREG(1), // P register enabled — absorbs CLK→P delay for timing closure
|
.PREG(1),
|
||||||
.ADREG(0),
|
.ADREG(0),
|
||||||
.ACASCREG(1),
|
.ACASCREG(1),
|
||||||
.BCASCREG(1),
|
.BCASCREG(1),
|
||||||
@@ -344,7 +354,6 @@ DSP48E1 #(
|
|||||||
.DREG(0),
|
.DREG(0),
|
||||||
.INMODEREG(0),
|
.INMODEREG(0),
|
||||||
.OPMODEREG(0),
|
.OPMODEREG(0),
|
||||||
// Pattern detector (unused)
|
|
||||||
.AUTORESET_PATDET("NO_RESET"),
|
.AUTORESET_PATDET("NO_RESET"),
|
||||||
.MASK(48'h3fffffffffff),
|
.MASK(48'h3fffffffffff),
|
||||||
.PATTERN(48'h000000000000),
|
.PATTERN(48'h000000000000),
|
||||||
@@ -496,8 +505,8 @@ wire signed [MIXER_WIDTH+NCO_WIDTH-1:0] mult_q_reg = dsp_p_q[MIXER_WIDTH+NCO_WID
|
|||||||
// Stage 4: Post-DSP retiming register — breaks DSP48E1 CLK→P to fabric path
|
// Stage 4: Post-DSP retiming register — breaks DSP48E1 CLK→P to fabric path
|
||||||
// Without this, the DSP output prop delay (1.866ns) + routing (0.515ns) exceeds
|
// Without this, the DSP output prop delay (1.866ns) + routing (0.515ns) exceeds
|
||||||
// the 2.500ns clock period at slow process corner
|
// the 2.500ns clock period at slow process corner
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m) begin
|
if (reset_400m) begin
|
||||||
mult_i_retimed <= 0;
|
mult_i_retimed <= 0;
|
||||||
mult_q_retimed <= 0;
|
mult_q_retimed <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
@@ -513,8 +522,8 @@ end
|
|||||||
// force_saturation mux is intentionally AFTER the DSP48E1 output to avoid
|
// force_saturation mux is intentionally AFTER the DSP48E1 output to avoid
|
||||||
// polluting the critical input path with extra logic
|
// polluting the critical input path with extra logic
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n_400m) begin
|
if (reset_400m) begin
|
||||||
mixed_i <= 0;
|
mixed_i <= 0;
|
||||||
mixed_q <= 0;
|
mixed_q <= 0;
|
||||||
mixed_valid <= 0;
|
mixed_valid <= 0;
|
||||||
@@ -759,8 +768,17 @@ generate
|
|||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
always @(posedge clk or negedge reset_n) begin
|
// ============================================================================
|
||||||
if (!reset_n) begin
|
// RESET FAN-OUT INVARIANT: registered active-high reset with max_fanout=50.
|
||||||
|
// See cic_decimator_4x_enhanced.v for full reasoning. reset_n here is driven
|
||||||
|
// by the parent DDC's reset_n_400m (already synchronized to clk_400m), so
|
||||||
|
// sync reset on the LFSR is safe. INIT=1'b1 holds LFSR in reset on power-up.
|
||||||
|
// ============================================================================
|
||||||
|
(* max_fanout = 50 *) reg reset_h = 1'b1;
|
||||||
|
always @(posedge clk) reset_h <= ~reset_n;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset_h) begin
|
||||||
lfsr_reg <= {DITHER_WIDTH{1'b1}}; // Non-zero initial state
|
lfsr_reg <= {DITHER_WIDTH{1'b1}}; // Non-zero initial state
|
||||||
cycle_counter <= 0;
|
cycle_counter <= 0;
|
||||||
lock_detected <= 0;
|
lock_detected <= 0;
|
||||||
|
|||||||
@@ -59,6 +59,25 @@ reg [1:0] quadrant_reg2; // Pass-through for Stage 5 MUX
|
|||||||
// Valid pipeline: tracks 6-stage latency
|
// Valid pipeline: tracks 6-stage latency
|
||||||
reg [5:0] valid_pipe;
|
reg [5:0] valid_pipe;
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// RESET FAN-OUT INVARIANT (Build N+1 fix for WNS=-0.626ns at 400 MHz):
|
||||||
|
// ============================================================================
|
||||||
|
// reset_h is an ACTIVE-HIGH, REGISTERED copy of ~reset_n with (* max_fanout=50 *).
|
||||||
|
// Vivado replicates this register (14+ copies) so each copy drives ≈50 loads
|
||||||
|
// regionally, avoiding the single-LUT1 / 702-load net that caused timing
|
||||||
|
// failure in Build N. It feeds:
|
||||||
|
// - DSP48E1 RSTP/RSTC on the phase-accumulator DSP (below)
|
||||||
|
// - All pipeline-stage fabric FDREs (synchronous reset)
|
||||||
|
// Invariants (see cic_decimator_4x_enhanced.v for full reasoning):
|
||||||
|
// I1 correctness: reset_h == ~reset_n one cycle later
|
||||||
|
// I2 glitch-free: registered output
|
||||||
|
// I3 power-up safe: INIT=1'b1 holds all downstream in reset until first
|
||||||
|
// valid clock edge; reset_n is low on power-up anyway
|
||||||
|
// I4 de-assert lat.: +1 cycle vs. direct async; negligible at 400 MHz
|
||||||
|
// ============================================================================
|
||||||
|
(* max_fanout = 50 *) reg reset_h = 1'b1;
|
||||||
|
always @(posedge clk_400m) reset_h <= ~reset_n;
|
||||||
|
|
||||||
// Use only the top 8 bits for LUT addressing (256-entry LUT equivalent)
|
// Use only the top 8 bits for LUT addressing (256-entry LUT equivalent)
|
||||||
wire [7:0] lut_address = phase_with_offset[31:24];
|
wire [7:0] lut_address = phase_with_offset[31:24];
|
||||||
|
|
||||||
@@ -135,8 +154,8 @@ wire [15:0] cos_abs_w = sin_lut[63 - lut_index_pipe_cos];
|
|||||||
// Stage 2: phase_with_offset adds phase offset
|
// Stage 2: phase_with_offset adds phase offset
|
||||||
reg [31:0] phase_accumulator;
|
reg [31:0] phase_accumulator;
|
||||||
|
|
||||||
always @(posedge clk_400m or negedge reset_n) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
phase_accumulator <= 32'h00000000;
|
phase_accumulator <= 32'h00000000;
|
||||||
phase_accum_reg <= 32'h00000000;
|
phase_accum_reg <= 32'h00000000;
|
||||||
phase_with_offset <= 32'h00000000;
|
phase_with_offset <= 32'h00000000;
|
||||||
@@ -190,8 +209,8 @@ DSP48E1 #(
|
|||||||
.RSTA(1'b0),
|
.RSTA(1'b0),
|
||||||
.RSTB(1'b0),
|
.RSTB(1'b0),
|
||||||
.RSTM(1'b0),
|
.RSTM(1'b0),
|
||||||
.RSTP(!reset_n), // Reset P register (phase accumulator) on !reset_n
|
.RSTP(reset_h), // Reset P register (phase accumulator) — registered, max_fanout=50
|
||||||
.RSTC(!reset_n), // Reset C register (tuning word) on !reset_n
|
.RSTC(reset_h), // Reset C register (tuning word) — registered, max_fanout=50
|
||||||
.RSTALLCARRYIN(1'b0),
|
.RSTALLCARRYIN(1'b0),
|
||||||
.RSTALUMODE(1'b0),
|
.RSTALUMODE(1'b0),
|
||||||
.RSTCTRL(1'b0),
|
.RSTCTRL(1'b0),
|
||||||
@@ -245,8 +264,8 @@ DSP48E1 #(
|
|||||||
// Stage 1: Capture DSP48E1 P output into fabric register
|
// Stage 1: Capture DSP48E1 P output into fabric register
|
||||||
// Stage 2: Add phase offset to captured value
|
// Stage 2: Add phase offset to captured value
|
||||||
// Split into two registered stages to break DSP48E1.P→CARRY4 critical path
|
// Split into two registered stages to break DSP48E1.P→CARRY4 critical path
|
||||||
always @(posedge clk_400m or negedge reset_n) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
phase_accum_reg <= 32'h00000000;
|
phase_accum_reg <= 32'h00000000;
|
||||||
phase_with_offset <= 32'h00000000;
|
phase_with_offset <= 32'h00000000;
|
||||||
end else if (phase_valid) begin
|
end else if (phase_valid) begin
|
||||||
@@ -264,8 +283,8 @@ end
|
|||||||
// Only 2 registers driven (lut_index_pipe + quadrant_pipe)
|
// Only 2 registers driven (lut_index_pipe + quadrant_pipe)
|
||||||
// Minimal fanout → short routes → easy timing
|
// Minimal fanout → short routes → easy timing
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk_400m or negedge reset_n) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
lut_index_pipe_sin <= 6'b000000;
|
lut_index_pipe_sin <= 6'b000000;
|
||||||
lut_index_pipe_cos <= 6'b000000;
|
lut_index_pipe_cos <= 6'b000000;
|
||||||
quadrant_pipe <= 2'b00;
|
quadrant_pipe <= 2'b00;
|
||||||
@@ -281,8 +300,8 @@ end
|
|||||||
// Registered address → combinational LUT6 read → register
|
// Registered address → combinational LUT6 read → register
|
||||||
// Only 1 logic level (LUT6), trivial timing
|
// Only 1 logic level (LUT6), trivial timing
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk_400m or negedge reset_n) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
sin_abs_reg <= 16'h0000;
|
sin_abs_reg <= 16'h0000;
|
||||||
cos_abs_reg <= 16'h7FFF;
|
cos_abs_reg <= 16'h7FFF;
|
||||||
quadrant_reg <= 2'b00;
|
quadrant_reg <= 2'b00;
|
||||||
@@ -298,8 +317,8 @@ end
|
|||||||
// CARRY4 x4 chain has registered inputs — easily fits in 2.5ns
|
// CARRY4 x4 chain has registered inputs — easily fits in 2.5ns
|
||||||
// Also pass through abs values and quadrant for Stage 5
|
// Also pass through abs values and quadrant for Stage 5
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk_400m or negedge reset_n) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
sin_neg_reg <= 16'h0000;
|
sin_neg_reg <= 16'h0000;
|
||||||
cos_neg_reg <= -16'h7FFF;
|
cos_neg_reg <= -16'h7FFF;
|
||||||
sin_abs_reg2 <= 16'h0000;
|
sin_abs_reg2 <= 16'h0000;
|
||||||
@@ -318,8 +337,8 @@ end
|
|||||||
// Stage 5: Quadrant sign application → final sin/cos output
|
// Stage 5: Quadrant sign application → final sin/cos output
|
||||||
// Uses pre-computed negated values from Stage 4 — pure MUX, no arithmetic
|
// Uses pre-computed negated values from Stage 4 — pure MUX, no arithmetic
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk_400m or negedge reset_n) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
sin_out <= 16'h0000;
|
sin_out <= 16'h0000;
|
||||||
cos_out <= 16'h7FFF;
|
cos_out <= 16'h7FFF;
|
||||||
end else if (valid_pipe[4]) begin
|
end else if (valid_pipe[4]) begin
|
||||||
@@ -347,8 +366,8 @@ end
|
|||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Valid pipeline and dds_ready (6-stage latency)
|
// Valid pipeline and dds_ready (6-stage latency)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
always @(posedge clk_400m or negedge reset_n) begin
|
always @(posedge clk_400m) begin
|
||||||
if (!reset_n) begin
|
if (reset_h) begin
|
||||||
valid_pipe <= 6'b000000;
|
valid_pipe <= 6'b000000;
|
||||||
dds_ready <= 1'b0;
|
dds_ready <= 1'b0;
|
||||||
end else begin
|
end else begin
|
||||||
|
|||||||
@@ -142,7 +142,7 @@ module radar_system_top (
|
|||||||
parameter USE_LONG_CHIRP = 1'b1; // Default to long chirp
|
parameter USE_LONG_CHIRP = 1'b1; // Default to long chirp
|
||||||
parameter DOPPLER_ENABLE = 1'b1; // Enable Doppler processing
|
parameter DOPPLER_ENABLE = 1'b1; // Enable Doppler processing
|
||||||
parameter USB_ENABLE = 1'b1; // Enable USB data transfer
|
parameter USB_ENABLE = 1'b1; // Enable USB data transfer
|
||||||
parameter USB_MODE = 0; // 0=FT601 (32-bit, 200T), 1=FT2232H (8-bit, 50T)
|
parameter USB_MODE = 1; // 0=FT601 (32-bit, 200T), 1=FT2232H (8-bit, 50T) — default: FT2232H production board
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// INTERNAL SIGNALS
|
// INTERNAL SIGNALS
|
||||||
|
|||||||
@@ -70,6 +70,7 @@ PROD_RTL=(
|
|||||||
xfft_16.v
|
xfft_16.v
|
||||||
fft_engine.v
|
fft_engine.v
|
||||||
usb_data_interface.v
|
usb_data_interface.v
|
||||||
|
usb_data_interface_ft2232h.v
|
||||||
edge_detector.v
|
edge_detector.v
|
||||||
radar_mode_controller.v
|
radar_mode_controller.v
|
||||||
rx_gain_control.v
|
rx_gain_control.v
|
||||||
@@ -452,7 +453,8 @@ if [[ "$QUICK" -eq 0 ]]; then
|
|||||||
chirp_memory_loader_param.v latency_buffer.v \
|
chirp_memory_loader_param.v latency_buffer.v \
|
||||||
matched_filter_multi_segment.v matched_filter_processing_chain.v \
|
matched_filter_multi_segment.v matched_filter_processing_chain.v \
|
||||||
range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v \
|
range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v \
|
||||||
usb_data_interface.v edge_detector.v radar_mode_controller.v \
|
usb_data_interface.v usb_data_interface_ft2232h.v \
|
||||||
|
edge_detector.v radar_mode_controller.v \
|
||||||
rx_gain_control.v cfar_ca.v mti_canceller.v fpga_self_test.v
|
rx_gain_control.v cfar_ca.v mti_canceller.v fpga_self_test.v
|
||||||
|
|
||||||
# E2E integration (46 strict checks: TX, RX, USB R/W, CDC, safety, reset)
|
# E2E integration (46 strict checks: TX, RX, USB R/W, CDC, safety, reset)
|
||||||
@@ -466,7 +468,8 @@ if [[ "$QUICK" -eq 0 ]]; then
|
|||||||
chirp_memory_loader_param.v latency_buffer.v \
|
chirp_memory_loader_param.v latency_buffer.v \
|
||||||
matched_filter_multi_segment.v matched_filter_processing_chain.v \
|
matched_filter_multi_segment.v matched_filter_processing_chain.v \
|
||||||
range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v \
|
range_bin_decimator.v doppler_processor.v xfft_16.v fft_engine.v \
|
||||||
usb_data_interface.v edge_detector.v radar_mode_controller.v \
|
usb_data_interface.v usb_data_interface_ft2232h.v \
|
||||||
|
edge_detector.v radar_mode_controller.v \
|
||||||
rx_gain_control.v cfar_ca.v mti_canceller.v fpga_self_test.v
|
rx_gain_control.v cfar_ca.v mti_canceller.v fpga_self_test.v
|
||||||
else
|
else
|
||||||
echo " (skipped receiver golden + system top + E2E — use without --quick)"
|
echo " (skipped receiver golden + system top + E2E — use without --quick)"
|
||||||
|
|||||||
+2455
-2455
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -619,7 +619,7 @@ initial begin
|
|||||||
// Optional: dump specific signals for debugging
|
// Optional: dump specific signals for debugging
|
||||||
$dumpvars(1, dut.tx_inst);
|
$dumpvars(1, dut.tx_inst);
|
||||||
$dumpvars(1, dut.rx_inst);
|
$dumpvars(1, dut.rx_inst);
|
||||||
$dumpvars(1, dut.gen_ft601.usb_inst);
|
$dumpvars(1, dut.gen_ft2232h.usb_inst);
|
||||||
end
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
Reference in New Issue
Block a user