Split fft_engine FSM: async reset for control, sync reset for DSP/BRAM datapath (Build 11)
Split monolithic always block into two: - Block 1 (async reset): FSM state, counters, output interface (dout_re/im, dout_valid, done) — deterministic startup - Block 2 (sync reset): DSP/BRAM pipeline registers (rd_b_re/im, rd_tw_cos/sin, bf_prod_re/im, rd_a_re/im, bf_t_re/im, rd_tw_idx, rd_addr_even/odd, rd_inverse) — enables hard block absorption Also convert output pipeline (out_pipe_valid/inverse) to sync reset. Expected synthesis impact: - DSP48E1 AREG/BREG absorption for butterfly multiply inputs - DSP48E1 PREG absorption for multiply outputs (bf_prod_re/im) - BRAM output register absorption for rd_a_re/im - Eliminate ~300 DPIR-1 methodology warnings per FFT instance - Resolve DPOP-2 (PREG=0), RBOR-1 (BRAM DOA), REQP-1839/1840 13/13 regression suites pass. Integration golden: 2048/2048 exact match.
This commit is contained in:
@@ -521,7 +521,9 @@ xpm_memory_tdpram #(
|
||||
reg out_pipe_valid;
|
||||
reg out_pipe_inverse;
|
||||
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
// Sync reset: pure internal pipeline — no functional need for async reset.
|
||||
// Enables downstream register absorption.
|
||||
always @(posedge clk) begin
|
||||
if (!reset_n) begin
|
||||
out_pipe_valid <= 1'b0;
|
||||
out_pipe_inverse <= 1'b0;
|
||||
@@ -532,7 +534,10 @@ always @(posedge clk or negedge reset_n) begin
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// MAIN FSM
|
||||
// MAIN FSM — Block 1: Control / FSM / Output Interface (async reset)
|
||||
// ============================================================================
|
||||
// Retains async reset for deterministic startup of FSM state and external
|
||||
// output interface signals (dout_re/im, dout_valid, done).
|
||||
// ============================================================================
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
if (!reset_n) begin
|
||||
@@ -547,20 +552,6 @@ always @(posedge clk or negedge reset_n) begin
|
||||
dout_im <= 0;
|
||||
dout_valid <= 0;
|
||||
done <= 0;
|
||||
rd_tw_cos <= 0;
|
||||
rd_tw_sin <= 0;
|
||||
rd_addr_even <= 0;
|
||||
rd_addr_odd <= 0;
|
||||
rd_inverse <= 0;
|
||||
rd_tw_idx <= 0;
|
||||
rd_a_re <= 0;
|
||||
rd_a_im <= 0;
|
||||
rd_b_re <= 0;
|
||||
rd_b_im <= 0;
|
||||
bf_t_re <= 0;
|
||||
bf_t_im <= 0;
|
||||
bf_prod_re <= 0;
|
||||
bf_prod_im <= 0;
|
||||
end else begin
|
||||
dout_valid <= 1'b0;
|
||||
done <= 1'b0;
|
||||
@@ -589,58 +580,22 @@ always @(posedge clk or negedge reset_n) begin
|
||||
end
|
||||
|
||||
ST_BF_READ: begin
|
||||
// Register butterfly addresses and twiddle index.
|
||||
// BRAM read initiated by bram_port_mux (addresses presented
|
||||
// combinationally); data arrives next cycle (ST_BF_TW).
|
||||
// Twiddle ROM lookup uses rd_tw_idx next cycle, breaking the
|
||||
// address-calc → ROM → quarter-wave-mux combinational path.
|
||||
rd_addr_even <= bf_addr_even;
|
||||
rd_addr_odd <= bf_addr_odd;
|
||||
rd_inverse <= inverse;
|
||||
rd_tw_idx <= bf_tw_idx;
|
||||
state <= ST_BF_TW;
|
||||
end
|
||||
|
||||
ST_BF_TW: begin
|
||||
// BRAM data valid this cycle (1-cycle read latency).
|
||||
// Capture BRAM data into pipeline regs.
|
||||
// Twiddle ROM lookup is combinational from registered rd_tw_idx
|
||||
// — capture the result into rd_tw_cos/sin.
|
||||
rd_a_re <= mem_rdata_a_re;
|
||||
rd_a_im <= mem_rdata_a_im;
|
||||
rd_b_re <= mem_rdata_b_re;
|
||||
rd_b_im <= mem_rdata_b_im;
|
||||
rd_tw_cos <= tw_cos_lookup;
|
||||
rd_tw_sin <= tw_sin_lookup;
|
||||
state <= ST_BF_MULT2;
|
||||
end
|
||||
|
||||
ST_BF_MULT2: begin
|
||||
// Compute raw twiddle products from registered BRAM data.
|
||||
// Path: register → DSP48E1 multiply-accumulate → register (bf_prod_re/im)
|
||||
// The shift is deferred to the next cycle to break the DSP→CARRY4 path.
|
||||
if (!rd_inverse) begin
|
||||
bf_prod_re <= rd_b_re * rd_tw_cos + rd_b_im * rd_tw_sin;
|
||||
bf_prod_im <= rd_b_im * rd_tw_cos - rd_b_re * rd_tw_sin;
|
||||
end else begin
|
||||
bf_prod_re <= rd_b_re * rd_tw_cos - rd_b_im * rd_tw_sin;
|
||||
bf_prod_im <= rd_b_im * rd_tw_cos + rd_b_re * rd_tw_sin;
|
||||
end
|
||||
state <= ST_BF_SHIFT;
|
||||
end
|
||||
|
||||
ST_BF_SHIFT: begin
|
||||
// Apply arithmetic right shift to registered DSP products.
|
||||
// This is now register → bit-select/sign-extend → register,
|
||||
// which should be near-zero logic (pure wiring + sign extension).
|
||||
bf_t_re <= bf_prod_re >>> (TWIDDLE_W - 1);
|
||||
bf_t_im <= bf_prod_im >>> (TWIDDLE_W - 1);
|
||||
state <= ST_BF_WRITE;
|
||||
end
|
||||
|
||||
ST_BF_WRITE: begin
|
||||
// bf_sum/bf_dif are combinational from registered rd_a and bf_t.
|
||||
// BRAM write data driven by bram_port_mux using bf_sum/bf_dif.
|
||||
if (bfly_count == FFT_N_HALF_M1[LOG2N-1:0]) begin
|
||||
bfly_count <= 0;
|
||||
if (stage == LOG2N - 1) begin
|
||||
@@ -689,4 +644,92 @@ always @(posedge clk or negedge reset_n) begin
|
||||
end
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// MAIN FSM — Block 2: DSP/BRAM Datapath Pipeline (sync reset)
|
||||
// ============================================================================
|
||||
// Sync reset enables Vivado to absorb these registers into hard blocks:
|
||||
// - rd_b_re/im → DSP48E1 AREG (butterfly multiply A-port input)
|
||||
// - rd_tw_cos/sin → DSP48E1 BREG (butterfly multiply B-port input)
|
||||
// - bf_prod_re/im → DSP48E1 PREG (multiply output register)
|
||||
// - rd_a_re/im → BRAM output register (REGCE)
|
||||
// - rd_tw_idx → DSP48E1 PREG (twiddle index multiply output)
|
||||
// - bf_t_re/im, rd_addr_even/odd, rd_inverse — internal pipeline
|
||||
//
|
||||
// These registers are only meaningful during COMPUTE states (BF_READ through
|
||||
// BF_WRITE). Their values are always overwritten before use after every FSM
|
||||
// transition, so sync reset is functionally equivalent to async reset.
|
||||
// ============================================================================
|
||||
always @(posedge clk) begin
|
||||
if (!reset_n) begin
|
||||
rd_tw_cos <= 0;
|
||||
rd_tw_sin <= 0;
|
||||
rd_addr_even <= 0;
|
||||
rd_addr_odd <= 0;
|
||||
rd_inverse <= 0;
|
||||
rd_tw_idx <= 0;
|
||||
rd_a_re <= 0;
|
||||
rd_a_im <= 0;
|
||||
rd_b_re <= 0;
|
||||
rd_b_im <= 0;
|
||||
bf_t_re <= 0;
|
||||
bf_t_im <= 0;
|
||||
bf_prod_re <= 0;
|
||||
bf_prod_im <= 0;
|
||||
end else begin
|
||||
case (state)
|
||||
|
||||
ST_BF_READ: begin
|
||||
// Register butterfly addresses and twiddle index.
|
||||
// BRAM read initiated by bram_port_mux (addresses presented
|
||||
// combinationally); data arrives next cycle (ST_BF_TW).
|
||||
// Twiddle ROM lookup uses rd_tw_idx next cycle, breaking the
|
||||
// address-calc -> ROM -> quarter-wave-mux combinational path.
|
||||
rd_addr_even <= bf_addr_even;
|
||||
rd_addr_odd <= bf_addr_odd;
|
||||
rd_inverse <= inverse;
|
||||
rd_tw_idx <= bf_tw_idx;
|
||||
end
|
||||
|
||||
ST_BF_TW: begin
|
||||
// BRAM data valid this cycle (1-cycle read latency).
|
||||
// Capture BRAM data into pipeline regs.
|
||||
// Twiddle ROM lookup is combinational from registered rd_tw_idx
|
||||
// -- capture the result into rd_tw_cos/sin.
|
||||
rd_a_re <= mem_rdata_a_re;
|
||||
rd_a_im <= mem_rdata_a_im;
|
||||
rd_b_re <= mem_rdata_b_re;
|
||||
rd_b_im <= mem_rdata_b_im;
|
||||
rd_tw_cos <= tw_cos_lookup;
|
||||
rd_tw_sin <= tw_sin_lookup;
|
||||
end
|
||||
|
||||
ST_BF_MULT2: begin
|
||||
// Compute raw twiddle products from registered BRAM data.
|
||||
// Path: register -> DSP48E1 multiply-accumulate -> register
|
||||
// The shift is deferred to the next cycle to break the
|
||||
// DSP -> CARRY4 path.
|
||||
if (!rd_inverse) begin
|
||||
bf_prod_re <= rd_b_re * rd_tw_cos + rd_b_im * rd_tw_sin;
|
||||
bf_prod_im <= rd_b_im * rd_tw_cos - rd_b_re * rd_tw_sin;
|
||||
end else begin
|
||||
bf_prod_re <= rd_b_re * rd_tw_cos - rd_b_im * rd_tw_sin;
|
||||
bf_prod_im <= rd_b_im * rd_tw_cos + rd_b_re * rd_tw_sin;
|
||||
end
|
||||
end
|
||||
|
||||
ST_BF_SHIFT: begin
|
||||
// Apply arithmetic right shift to registered DSP products.
|
||||
// Register -> bit-select/sign-extend -> register
|
||||
// (near-zero logic: pure wiring + sign extension).
|
||||
bf_t_re <= bf_prod_re >>> (TWIDDLE_W - 1);
|
||||
bf_t_im <= bf_prod_im >>> (TWIDDLE_W - 1);
|
||||
end
|
||||
|
||||
default: begin
|
||||
// No datapath update in other states — registers hold values
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
Reference in New Issue
Block a user