fix: FPGA timing margins (WNS +0.002→+0.080ns) + 11 bug fixes from code review
FPGA timing (400MHz domain WNS: +0.339ns, was +0.002ns): - DONT_TOUCH on BUFG to prevent AggressiveExplore cascade replication - NCO→mixer pipeline registers break critical 1.5ns route - Clock uncertainty reduced 200ps→100ps (adequate guardband) - Updated golden/cosim references for +1 cycle pipeline latency STM32 bug fixes: - Guard uint32_t underflow in processStartFlag (length<4) - Replace unbounded strcat in getSystemStatusForGUI with snprintf - Early-return error masking in checkSystemHealth - Add HAL_Delay in emergency blink loop GUI bug fixes: - Remove 0x03 from _HARDWARE_ONLY_OPCODES (was in both sets) - Wire real error count in V7 diagnostics panel - Fix _stop_demo showing 'Live' label during replay mode FPGA comment fixes + CI: add test_v7.py to pytest command Vivado build 50t passed: 0 failing endpoints, WHS=+0.056ns
This commit is contained in:
@@ -212,6 +212,11 @@ BUFG bufg_feedback (
|
||||
|
||||
// ---- Output BUFG ----
|
||||
// Routes the jitter-cleaned 400 MHz CLKOUT0 onto a global clock network.
|
||||
// DONT_TOUCH prevents phys_opt_design AggressiveExplore from replicating this
|
||||
// BUFG into a cascaded chain (4 BUFGs in series observed in Build 26), which
|
||||
// added ~243ps of clock insertion delay and caused -187ps clock skew on the
|
||||
// NCO→DSP mixer critical path.
|
||||
(* DONT_TOUCH = "TRUE" *)
|
||||
BUFG bufg_clk400m (
|
||||
.I(clk_mmcm_out0),
|
||||
.O(clk_400m_out)
|
||||
|
||||
@@ -85,10 +85,11 @@ set_false_path -through [get_pins rx_inst/adc/mmcm_inst/mmcm_adc_400m/LOCKED]
|
||||
set_false_path -hold -from [get_ports {adc_d_p[*]}] -to [get_clocks adc_dco_p]
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Timing margin for 400 MHz CIC critical path
|
||||
# Timing margin for 400 MHz critical paths
|
||||
# --------------------------------------------------------------------------
|
||||
# The CIC decimator at 400 MHz has near-zero margin (WNS = +0.001 ns in
|
||||
# Build 26). Adding 200 ps of extra setup uncertainty forces Vivado to
|
||||
# leave comfortable margin for temperature/voltage/aging variation.
|
||||
# Extra setup uncertainty forces Vivado to leave margin for temperature/voltage/
|
||||
# aging variation. Reduced from 200 ps to 100 ps after NCO→mixer pipeline
|
||||
# register fix eliminated the dominant timing bottleneck (WNS went from +0.002ns
|
||||
# to comfortable margin). 100 ps still provides ~4% guardband on the 2.5ns period.
|
||||
# This is additive to the existing jitter-based uncertainty (~53 ps).
|
||||
set_clock_uncertainty -setup -add 0.200 [get_clocks clk_mmcm_out0]
|
||||
set_clock_uncertainty -setup -add 0.100 [get_clocks clk_mmcm_out0]
|
||||
|
||||
@@ -102,14 +102,19 @@ wire signed [17:0] debug_mixed_q_trunc;
|
||||
reg [7:0] signal_power_i, signal_power_q;
|
||||
|
||||
// Internal mixing signals
|
||||
// DSP48E1 with AREG=1, BREG=1, MREG=1, PREG=1 handles all internal pipelining
|
||||
// Latency: 4 cycles (1 for AREG/BREG, 1 for MREG, 1 for PREG, 1 for post-DSP retiming)
|
||||
// Pipeline: NCO fabric reg (1) + DSP48E1 AREG/BREG (1) + MREG (1) + PREG (1) + retiming (1) = 5 cycles
|
||||
// The NCO fabric pipeline register was added to break the long NCO→DSP B-port route
|
||||
// (1.505ns routing in Build 26, WNS=+0.002ns). With BREG=1 still active inside the DSP,
|
||||
// total latency increases by 1 cycle (2.5ns at 400MHz — negligible for radar).
|
||||
wire signed [MIXER_WIDTH-1:0] adc_signed_w;
|
||||
reg signed [MIXER_WIDTH + NCO_WIDTH -1:0] mixed_i, mixed_q;
|
||||
reg mixed_valid;
|
||||
reg mixer_overflow_i, mixer_overflow_q;
|
||||
// Pipeline valid tracking: 4-stage shift register (3 for DSP48E1 + 1 for post-DSP retiming)
|
||||
reg [3:0] dsp_valid_pipe;
|
||||
// Pipeline valid tracking: 5-stage shift register (1 NCO pipe + 3 DSP48E1 + 1 retiming)
|
||||
reg [4:0] dsp_valid_pipe;
|
||||
// NCO→DSP pipeline registers — breaks the long NCO sin/cos → DSP48E1 B-port route
|
||||
// DONT_TOUCH prevents Vivado from absorbing these into the DSP or optimizing away
|
||||
(* DONT_TOUCH = "TRUE" *) reg signed [15:0] cos_nco_pipe, sin_nco_pipe;
|
||||
// Post-DSP retiming registers — breaks DSP48E1 CLK→P to fabric timing path
|
||||
// This extra pipeline stage absorbs the 1.866ns DSP output prop delay + routing,
|
||||
// ensuring WNS > 0 at 400 MHz regardless of placement seed
|
||||
@@ -210,11 +215,11 @@ nco_400m_enhanced nco_core (
|
||||
//
|
||||
// Architecture:
|
||||
// ADC data → sign-extend to 18b → DSP48E1 A-port (AREG=1 pipelines it)
|
||||
// NCO cos/sin → sign-extend to 18b → DSP48E1 B-port (BREG=1 pipelines it)
|
||||
// NCO cos/sin → fabric pipeline reg → DSP48E1 B-port (BREG=1 pipelines it)
|
||||
// Multiply result captured by MREG=1, then output registered by PREG=1
|
||||
// force_saturation override applied AFTER DSP48E1 output (not on input path)
|
||||
//
|
||||
// Latency: 3 clock cycles (AREG/BREG + MREG + PREG)
|
||||
// Latency: 4 clock cycles (1 NCO pipe + 1 AREG/BREG + 1 MREG + 1 PREG) + 1 retiming = 5 total
|
||||
// PREG=1 absorbs DSP48E1 CLK→P delay internally, preventing fabric timing violations
|
||||
// In simulation (Icarus), uses behavioral equivalent since DSP48E1 is Xilinx-only
|
||||
// ============================================================================
|
||||
@@ -223,24 +228,35 @@ nco_400m_enhanced nco_core (
|
||||
assign adc_signed_w = {1'b0, adc_data, {(MIXER_WIDTH-ADC_WIDTH-1){1'b0}}} -
|
||||
{1'b0, {ADC_WIDTH{1'b1}}, {(MIXER_WIDTH-ADC_WIDTH-1){1'b0}}} / 2;
|
||||
|
||||
// Valid pipeline: 4-stage shift register (3 for DSP48E1 AREG+MREG+PREG + 1 for retiming)
|
||||
// Valid pipeline: 5-stage shift register (1 NCO pipe + 3 DSP48E1 AREG+MREG+PREG + 1 retiming)
|
||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
||||
if (!reset_n_400m) begin
|
||||
dsp_valid_pipe <= 4'b0000;
|
||||
dsp_valid_pipe <= 5'b00000;
|
||||
end else begin
|
||||
dsp_valid_pipe <= {dsp_valid_pipe[2:0], (nco_ready && adc_data_valid_i && adc_data_valid_q)};
|
||||
dsp_valid_pipe <= {dsp_valid_pipe[3:0], (nco_ready && adc_data_valid_i && adc_data_valid_q)};
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef SIMULATION
|
||||
// ---- Behavioral model for Icarus Verilog simulation ----
|
||||
// Mimics DSP48E1 with AREG=1, BREG=1, MREG=1, PREG=1 (3-cycle latency)
|
||||
// Mimics NCO pipeline + DSP48E1 with AREG=1, BREG=1, MREG=1, PREG=1 (4-cycle DSP + 1 NCO pipe)
|
||||
reg signed [MIXER_WIDTH-1:0] adc_signed_reg; // Models AREG
|
||||
reg signed [15:0] cos_pipe_reg, sin_pipe_reg; // Models BREG
|
||||
reg signed [MIXER_WIDTH+NCO_WIDTH-1:0] mult_i_internal, mult_q_internal; // Models MREG
|
||||
reg signed [MIXER_WIDTH+NCO_WIDTH-1:0] mult_i_reg, mult_q_reg; // Models PREG
|
||||
|
||||
// Stage 1: AREG/BREG equivalent
|
||||
// Stage 0: NCO pipeline — breaks long NCO→DSP route (matches synthesis fabric registers)
|
||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
||||
if (!reset_n_400m) begin
|
||||
cos_nco_pipe <= 0;
|
||||
sin_nco_pipe <= 0;
|
||||
end else begin
|
||||
cos_nco_pipe <= cos_out;
|
||||
sin_nco_pipe <= sin_out;
|
||||
end
|
||||
end
|
||||
|
||||
// Stage 1: AREG/BREG equivalent (uses pipelined NCO outputs)
|
||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
||||
if (!reset_n_400m) begin
|
||||
adc_signed_reg <= 0;
|
||||
@@ -248,8 +264,8 @@ always @(posedge clk_400m or negedge reset_n_400m) begin
|
||||
sin_pipe_reg <= 0;
|
||||
end else begin
|
||||
adc_signed_reg <= adc_signed_w;
|
||||
cos_pipe_reg <= cos_out;
|
||||
sin_pipe_reg <= sin_out;
|
||||
cos_pipe_reg <= cos_nco_pipe;
|
||||
sin_pipe_reg <= sin_nco_pipe;
|
||||
end
|
||||
end
|
||||
|
||||
@@ -291,6 +307,20 @@ end
|
||||
// This guarantees AREG/BREG/MREG are used, achieving timing closure at 400 MHz
|
||||
wire [47:0] dsp_p_i, dsp_p_q;
|
||||
|
||||
// NCO pipeline stage — breaks the long NCO sin/cos → DSP48E1 B-port route
|
||||
// (1.505ns routing observed in Build 26). These fabric registers are placed
|
||||
// near the DSP by the placer, splitting the route into two shorter segments.
|
||||
// DONT_TOUCH on the reg declaration (above) prevents absorption/retiming.
|
||||
always @(posedge clk_400m or negedge reset_n_400m) begin
|
||||
if (!reset_n_400m) begin
|
||||
cos_nco_pipe <= 0;
|
||||
sin_nco_pipe <= 0;
|
||||
end else begin
|
||||
cos_nco_pipe <= cos_out;
|
||||
sin_nco_pipe <= sin_out;
|
||||
end
|
||||
end
|
||||
|
||||
// DSP48E1 for I-channel mixer (adc_signed * cos_out)
|
||||
DSP48E1 #(
|
||||
// Feature control attributes
|
||||
@@ -350,7 +380,7 @@ DSP48E1 #(
|
||||
.CEINMODE(1'b0),
|
||||
// Data ports
|
||||
.A({{12{adc_signed_w[MIXER_WIDTH-1]}}, adc_signed_w}), // Sign-extend 18b to 30b
|
||||
.B({{2{cos_out[15]}}, cos_out}), // Sign-extend 16b to 18b
|
||||
.B({{2{cos_nco_pipe[15]}}, cos_nco_pipe}), // Sign-extend 16b to 18b (pipelined)
|
||||
.C(48'b0),
|
||||
.D(25'b0),
|
||||
.CARRYIN(1'b0),
|
||||
@@ -432,7 +462,7 @@ DSP48E1 #(
|
||||
.CED(1'b0),
|
||||
.CEINMODE(1'b0),
|
||||
.A({{12{adc_signed_w[MIXER_WIDTH-1]}}, adc_signed_w}),
|
||||
.B({{2{sin_out[15]}}, sin_out}),
|
||||
.B({{2{sin_nco_pipe[15]}}, sin_nco_pipe}),
|
||||
.C(48'b0),
|
||||
.D(25'b0),
|
||||
.CARRYIN(1'b0),
|
||||
@@ -492,7 +522,7 @@ always @(posedge clk_400m or negedge reset_n_400m) begin
|
||||
mixer_overflow_q <= 0;
|
||||
saturation_count <= 0;
|
||||
overflow_detected <= 0;
|
||||
end else if (dsp_valid_pipe[3]) begin
|
||||
end else if (dsp_valid_pipe[4]) begin
|
||||
// Force saturation for testing (applied after DSP output, not on input path)
|
||||
if (force_saturation_sync) begin
|
||||
mixed_i <= 34'h1FFFFFFFF;
|
||||
|
||||
@@ -296,7 +296,7 @@ always @(posedge clk or negedge reset_n) begin
|
||||
state <= ST_DONE;
|
||||
end
|
||||
end
|
||||
// Timeout: if no ADC data after 10000 cycles, FAIL
|
||||
// Timeout: if no ADC data after 1000 cycles (10 us @ 100 MHz), FAIL
|
||||
step_cnt <= step_cnt + 1;
|
||||
if (step_cnt >= 10'd1000 && adc_cap_cnt == 0) begin
|
||||
result_flags[4] <= 1'b0;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -20,8 +20,8 @@ module usb_data_interface (
|
||||
// Control signals
|
||||
output reg ft601_txe_n, // Transmit enable (active low)
|
||||
output reg ft601_rxf_n, // Receive enable (active low)
|
||||
input wire ft601_txe, // Transmit FIFO empty
|
||||
input wire ft601_rxf, // Receive FIFO full
|
||||
input wire ft601_txe, // TXE: Transmit FIFO Not Full (high = space available to write)
|
||||
input wire ft601_rxf, // RXF: Receive FIFO Not Empty (high = data available to read)
|
||||
output reg ft601_wr_n, // Write strobe (active low)
|
||||
output reg ft601_rd_n, // Read strobe (active low)
|
||||
output reg ft601_oe_n, // Output enable (active low)
|
||||
|
||||
Reference in New Issue
Block a user