Replace FFT stubs with synthesizable radix-2 DIT engine, fix BRAM inference

Implement iterative single-butterfly FFT engine (fft_engine.v) supporting
1024-pt and 32-pt transforms with quarter-wave twiddle ROM, XPM_MEMORY_TDPRAM
for guaranteed BRAM mapping in Vivado, and behavioral model for simulation.

Add xfft_32.v AXI-Stream wrapper for doppler_processor integration and
dual-branch matched_filter_processing_chain.v (behavioral + synthesis paths).

Fix placement failure caused by 68K+ registers from dissolved memory arrays:
- doppler_processor.v: extract mem writes to sync-only always block for BRAM
- xfft_32.v: extract buffer writes to sync-only always block for LUTRAM

Post-implementation: 37K regs (29%), 23K LUTs (37%), 10 BRAM (7%), fully routed.
All testbenches pass: fft_engine 12/12, xfft_32 10/10, mf_chain 27/27.
This commit is contained in:
Jason
2026-03-16 10:25:07 +02:00
parent deb2e81ec4
commit 692b6a3bfa
9 changed files with 3428 additions and 190 deletions
+179 -148
View File
@@ -124,157 +124,188 @@ always @(posedge clk or negedge reset_n) begin
end end
wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1; wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1;
// ============================================== // ==============================================
// Main State Machine - FIXED // Main State Machine - FIXED
// ============================================== // ==============================================
reg [5:0] fft_sample_counter; reg [5:0] fft_sample_counter;
reg [9:0] processing_timeout; reg [9:0] processing_timeout;
always @(posedge clk or negedge reset_n) begin // Memory write enable and data signals (extracted for BRAM inference)
if (!reset_n) begin reg mem_we;
state <= S_IDLE; reg [10:0] mem_waddr_r;
write_range_bin <= 0; reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q;
write_chirp_index <= 0;
read_range_bin <= 0; // Memory read data (registered for BRAM read latency)
read_doppler_index <= 0; reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q;
frame_buffer_full <= 0;
doppler_valid <= 0; // ----------------------------------------------------------
fft_start <= 0; // Separate always block for memory writes NO async reset
fft_input_valid <= 0; // in sensitivity list, so Vivado can infer Block RAM.
fft_input_last <= 0; // ----------------------------------------------------------
fft_sample_counter <= 0; always @(posedge clk) begin
processing_timeout <= 0; if (mem_we) begin
status <= 0; doppler_i_mem[mem_waddr_r] <= mem_wdata_i;
chirps_received <= 0; doppler_q_mem[mem_waddr_r] <= mem_wdata_q;
chirp_state <= 0; end
end else begin // Registered read address driven by mem_read_addr from FSM
doppler_valid <= 0; mem_rdata_i <= doppler_i_mem[mem_read_addr];
fft_input_valid <= 0; mem_rdata_q <= doppler_q_mem[mem_read_addr];
fft_input_last <= 0; end
if (processing_timeout > 0) begin // ----------------------------------------------------------
processing_timeout <= processing_timeout - 1; // Main FSM async reset for control registers only.
end // Memory arrays are NOT touched here.
// ----------------------------------------------------------
case (state) always @(posedge clk or negedge reset_n) begin
S_IDLE: begin if (!reset_n) begin
if (frame_start_pulse) begin state <= S_IDLE;
// Start new frame write_range_bin <= 0;
write_chirp_index <= 0; write_chirp_index <= 0;
write_range_bin <= 0; read_range_bin <= 0;
frame_buffer_full <= 0; read_doppler_index <= 0;
chirps_received <= 0; frame_buffer_full <= 0;
//chirp_state <= 1; // Start accumulating doppler_valid <= 0;
end fft_start <= 0;
fft_input_valid <= 0;
if (data_valid && !frame_buffer_full) begin fft_input_last <= 0;
fft_sample_counter <= 0;
processing_timeout <= 0;
status <= 0;
chirps_received <= 0;
chirp_state <= 0;
mem_we <= 0;
mem_waddr_r <= 0;
mem_wdata_i <= 0;
mem_wdata_q <= 0;
mult_i <= 0;
mult_q <= 0;
fft_input_i <= 0;
fft_input_q <= 0;
doppler_output <= 0;
doppler_bin <= 0;
end else begin
doppler_valid <= 0;
fft_input_valid <= 0;
fft_input_last <= 0;
mem_we <= 0;
if (processing_timeout > 0) begin
processing_timeout <= processing_timeout - 1;
end
case (state)
S_IDLE: begin
if (frame_start_pulse) begin
// Start new frame
write_chirp_index <= 0;
write_range_bin <= 0;
frame_buffer_full <= 0;
chirps_received <= 0;
end
if (data_valid && !frame_buffer_full) begin
state <= S_ACCUMULATE; state <= S_ACCUMULATE;
write_range_bin <= 0; write_range_bin <= 0;
end end
end end
S_ACCUMULATE: begin S_ACCUMULATE: begin
if (data_valid) begin if (data_valid) begin
// Store with proper addressing // Drive memory write signals (actual write in separate block)
doppler_i_mem[mem_write_addr] <= range_data[15:0]; mem_we <= 1;
doppler_q_mem[mem_write_addr] <= range_data[31:16]; mem_waddr_r <= mem_write_addr;
mem_wdata_i <= range_data[15:0];
// Debug output to see what's being written mem_wdata_q <= range_data[31:16];
// $display("Time=%t: Write addr=%d (chirp=%d, range=%d), Data=%h",
// $time, mem_write_addr, write_chirp_index, write_range_bin, range_data);
// Increment range bin
if (write_range_bin < RANGE_BINS - 1) begin
write_range_bin <= write_range_bin + 1;
end else begin
// Completed one chirp
write_range_bin <= 0;
write_chirp_index <= write_chirp_index + 1;
chirps_received <= chirps_received + 1;
// Check if frame is complete
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
frame_buffer_full <= 1;
chirp_state <= 0; // Stop accumulating
// Could automatically start processing here:
state <= S_LOAD_FFT;
read_range_bin <= 0;
read_doppler_index <= 0;
fft_sample_counter <= 0;
fft_start <= 1;
end
end
end
end
// [Rest of S_LOAD_FFT, S_FFT_WAIT, S_OUTPUT states remain similar]
// But with fixed addressing in S_LOAD_FFT:
S_LOAD_FFT: begin
fft_start <= 0;
if (fft_sample_counter < DOPPLER_FFT_SIZE) begin
// Use correct addressing for reading
mult_i <= $signed(doppler_i_mem[mem_read_addr]) *
$signed(window_coeff[read_doppler_index]);
mult_q <= $signed(doppler_q_mem[mem_read_addr]) *
$signed(window_coeff[read_doppler_index]);
// Round instead of truncate // Increment range bin
fft_input_i <= (mult_i + (1 << 14)) >>> 15; // Round to nearest if (write_range_bin < RANGE_BINS - 1) begin
fft_input_q <= (mult_q + (1 << 14)) >>> 15; write_range_bin <= write_range_bin + 1;
end else begin
// Completed one chirp
write_range_bin <= 0;
write_chirp_index <= write_chirp_index + 1;
chirps_received <= chirps_received + 1;
// Check if frame is complete
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
frame_buffer_full <= 1;
chirp_state <= 0;
state <= S_LOAD_FFT;
read_range_bin <= 0;
read_doppler_index <= 0;
fft_sample_counter <= 0;
fft_start <= 1;
end
end
end
end
S_LOAD_FFT: begin
fft_start <= 0;
if (fft_sample_counter < DOPPLER_FFT_SIZE) begin
// Use registered read data (one cycle latency from BRAM)
mult_i <= $signed(mem_rdata_i) *
$signed(window_coeff[read_doppler_index]);
mult_q <= $signed(mem_rdata_q) *
$signed(window_coeff[read_doppler_index]);
fft_input_valid <= 1; // Round instead of truncate
fft_input_i <= (mult_i + (1 << 14)) >>> 15;
if (fft_sample_counter == DOPPLER_FFT_SIZE - 1) begin fft_input_q <= (mult_q + (1 << 14)) >>> 15;
fft_input_last <= 1;
end fft_input_valid <= 1;
// Increment chirp index for next sample if (fft_sample_counter == DOPPLER_FFT_SIZE - 1) begin
read_doppler_index <= read_doppler_index + 1; fft_input_last <= 1;
fft_sample_counter <= fft_sample_counter + 1; end
end else begin
state <= S_FFT_WAIT; // Increment chirp index for next sample
fft_sample_counter <= 0; read_doppler_index <= read_doppler_index + 1;
processing_timeout <= 100; fft_sample_counter <= fft_sample_counter + 1;
end end else begin
end state <= S_FFT_WAIT;
fft_sample_counter <= 0;
S_FFT_WAIT: begin processing_timeout <= 100;
if (fft_output_valid) begin end
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]}; end
doppler_bin <= fft_sample_counter;
range_bin <= read_range_bin; S_FFT_WAIT: begin
doppler_valid <= 1; if (fft_output_valid) begin
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]};
fft_sample_counter <= fft_sample_counter + 1; doppler_bin <= fft_sample_counter;
range_bin <= read_range_bin;
if (fft_output_last) begin doppler_valid <= 1;
state <= S_OUTPUT;
fft_sample_counter <= 0; fft_sample_counter <= fft_sample_counter + 1;
end
end if (fft_output_last) begin
state <= S_OUTPUT;
if (processing_timeout == 0) begin fft_sample_counter <= 0;
state <= S_OUTPUT; end
end end
end
if (processing_timeout == 0) begin
S_OUTPUT: begin state <= S_OUTPUT;
if (read_range_bin < RANGE_BINS - 1) begin end
read_range_bin <= read_range_bin + 1; end
read_doppler_index <= 0;
state <= S_LOAD_FFT; S_OUTPUT: begin
fft_start <= 1; if (read_range_bin < RANGE_BINS - 1) begin
end else begin read_range_bin <= read_range_bin + 1;
state <= S_IDLE; read_doppler_index <= 0;
frame_buffer_full <= 0; state <= S_LOAD_FFT;
end fft_start <= 1;
end end else begin
state <= S_IDLE;
endcase frame_buffer_full <= 0;
end
status <= {state, frame_buffer_full}; end
end
endcase
status <= {state, frame_buffer_full};
end
end end
// ============================================== // ==============================================
+606
View File
@@ -0,0 +1,606 @@
`timescale 1ns / 1ps
/**
* fft_engine.v
*
* Synthesizable parameterized radix-2 DIT FFT/IFFT engine.
* Iterative single-butterfly architecture with quarter-wave twiddle ROM.
*
* Architecture:
* - LOAD: Accept N input samples, store bit-reversed in BRAM
* - COMPUTE: LOG2N stages x N/2 butterflies, 2-cycle pipeline:
* BF_READ: Present BRAM addresses, capture twiddle
* BF_CALC: BRAM data valid; butterfly compute + writeback
* - OUTPUT: Stream N results (1/N scaling for IFFT)
*
* Data memory uses xpm_memory_tdpram (Xilinx Parameterized Macros) for
* guaranteed BRAM mapping in synthesis. Under `ifdef SIMULATION, a
* behavioral Verilog-2001 model replaces the XPM so the design compiles
* with Icarus Verilog or any non-Xilinx simulator.
*
* Clock domain: single clock (clk), active-low async reset (reset_n).
*/
module fft_engine #(
parameter N = 1024,
parameter LOG2N = 10,
parameter DATA_W = 16,
parameter INTERNAL_W = 32,
parameter TWIDDLE_W = 16,
parameter TWIDDLE_FILE = "fft_twiddle_1024.mem"
)(
input wire clk,
input wire reset_n,
// Control
input wire start,
input wire inverse,
// Data input
input wire signed [DATA_W-1:0] din_re,
input wire signed [DATA_W-1:0] din_im,
input wire din_valid,
// Data output
output reg signed [DATA_W-1:0] dout_re,
output reg signed [DATA_W-1:0] dout_im,
output reg dout_valid,
// Status
output wire busy,
output reg done
);
// ============================================================================
// SAFE WIDTH CONSTANTS
// ============================================================================
localparam [LOG2N:0] FFT_N = N;
localparam [LOG2N:0] FFT_N_HALF = N / 2;
localparam [LOG2N:0] FFT_N_QTR = N / 4;
localparam [LOG2N:0] FFT_N_HALF_M1 = N / 2 - 1;
localparam [LOG2N:0] FFT_N_M1 = N - 1;
// ============================================================================
// STATES
// ============================================================================
localparam [2:0] ST_IDLE = 3'd0,
ST_LOAD = 3'd1,
ST_BF_READ = 3'd2,
ST_BF_CALC = 3'd3,
ST_OUTPUT = 3'd4,
ST_DONE = 3'd5;
reg [2:0] state;
assign busy = (state != ST_IDLE);
// ============================================================================
// DATA MEMORY DECLARATIONS
// ============================================================================
// BRAM read data (registered outputs from port blocks)
reg signed [INTERNAL_W-1:0] mem_rdata_a_re, mem_rdata_a_im;
reg signed [INTERNAL_W-1:0] mem_rdata_b_re, mem_rdata_b_im;
// ============================================================================
// TWIDDLE ROM
// ============================================================================
localparam TW_QUARTER = N / 4;
localparam TW_ADDR_W = LOG2N - 2;
(* rom_style = "block" *) reg signed [TWIDDLE_W-1:0] cos_rom [0:TW_QUARTER-1];
initial begin
$readmemh(TWIDDLE_FILE, cos_rom);
end
// ============================================================================
// BIT-REVERSE
// ============================================================================
function [LOG2N-1:0] bit_reverse;
input [LOG2N-1:0] val;
integer b;
begin
bit_reverse = 0;
for (b = 0; b < LOG2N; b = b + 1)
bit_reverse[LOG2N-1-b] = val[b];
end
endfunction
// ============================================================================
// COUNTERS AND PIPELINE REGISTERS
// ============================================================================
reg [LOG2N-1:0] load_count;
reg [LOG2N:0] out_count;
reg [LOG2N-1:0] bfly_count;
reg [3:0] stage;
// Registered values (captured in BF_READ, used in BF_CALC)
reg signed [TWIDDLE_W-1:0] rd_tw_cos, rd_tw_sin;
reg [LOG2N-1:0] rd_addr_even, rd_addr_odd;
reg rd_inverse;
// Half and twiddle stride
reg [LOG2N-1:0] half_reg;
reg [LOG2N-1:0] tw_stride_reg;
// ============================================================================
// BUTTERFLY ADDRESS COMPUTATION (combinational)
// ============================================================================
reg [LOG2N-1:0] bf_addr_even;
reg [LOG2N-1:0] bf_addr_odd;
reg [LOG2N-1:0] bf_tw_idx;
always @(*) begin : bf_addr_calc
reg [LOG2N-1:0] half_val;
reg [LOG2N-1:0] idx_val;
reg [LOG2N-1:0] grp_val;
half_val = half_reg;
idx_val = bfly_count & (half_val - 1);
grp_val = (bfly_count - idx_val);
bf_addr_even = (grp_val << 1) | idx_val;
bf_addr_odd = bf_addr_even + half_val;
bf_tw_idx = idx_val * tw_stride_reg;
end
// ============================================================================
// TWIDDLE LOOKUP (combinational)
// ============================================================================
reg signed [TWIDDLE_W-1:0] tw_cos_lookup;
reg signed [TWIDDLE_W-1:0] tw_sin_lookup;
always @(*) begin : tw_lookup
reg [LOG2N-1:0] k;
reg [LOG2N-1:0] rom_idx;
k = bf_tw_idx;
tw_cos_lookup = 0;
tw_sin_lookup = 0;
if (k == 0) begin
tw_cos_lookup = cos_rom[0];
tw_sin_lookup = {TWIDDLE_W{1'b0}};
end else if (k == FFT_N_QTR[LOG2N-1:0]) begin
tw_cos_lookup = {TWIDDLE_W{1'b0}};
tw_sin_lookup = cos_rom[0];
end else if (k < FFT_N_QTR[LOG2N-1:0]) begin
tw_cos_lookup = cos_rom[k[TW_ADDR_W-1:0]];
rom_idx = FFT_N_QTR[LOG2N-1:0] - k;
tw_sin_lookup = cos_rom[rom_idx[TW_ADDR_W-1:0]];
end else begin
rom_idx = k - FFT_N_QTR[LOG2N-1:0];
tw_sin_lookup = cos_rom[rom_idx[TW_ADDR_W-1:0]];
rom_idx = FFT_N_HALF[LOG2N-1:0] - k;
tw_cos_lookup = -cos_rom[rom_idx[TW_ADDR_W-1:0]];
end
end
// ============================================================================
// SATURATION
// ============================================================================
function signed [DATA_W-1:0] saturate;
input signed [INTERNAL_W-1:0] val;
reg signed [INTERNAL_W-1:0] max_pos;
reg signed [INTERNAL_W-1:0] max_neg;
begin
max_pos = (1 << (DATA_W - 1)) - 1;
max_neg = -(1 << (DATA_W - 1));
if (val > max_pos)
saturate = max_pos[DATA_W-1:0];
else if (val < max_neg)
saturate = max_neg[DATA_W-1:0];
else
saturate = val[DATA_W-1:0];
end
endfunction
// ============================================================================
// BUTTERFLY COMPUTATION (combinational, for BF_CALC write data)
// ============================================================================
reg signed [INTERNAL_W-1:0] bf_t_re, bf_t_im;
reg signed [INTERNAL_W-1:0] bf_sum_re, bf_sum_im;
reg signed [INTERNAL_W-1:0] bf_dif_re, bf_dif_im;
always @(*) begin : bf_compute
if (!rd_inverse) begin
bf_t_re = (mem_rdata_b_re * rd_tw_cos + mem_rdata_b_im * rd_tw_sin) >>> (TWIDDLE_W - 1);
bf_t_im = (mem_rdata_b_im * rd_tw_cos - mem_rdata_b_re * rd_tw_sin) >>> (TWIDDLE_W - 1);
end else begin
bf_t_re = (mem_rdata_b_re * rd_tw_cos - mem_rdata_b_im * rd_tw_sin) >>> (TWIDDLE_W - 1);
bf_t_im = (mem_rdata_b_im * rd_tw_cos + mem_rdata_b_re * rd_tw_sin) >>> (TWIDDLE_W - 1);
end
bf_sum_re = mem_rdata_a_re + bf_t_re;
bf_sum_im = mem_rdata_a_im + bf_t_im;
bf_dif_re = mem_rdata_a_re - bf_t_re;
bf_dif_im = mem_rdata_a_im - bf_t_im;
end
// ============================================================================
// BRAM PORT ADDRESS / WE / WDATA — combinational mux (registered signals)
// ============================================================================
// Drives port A and port B control signals from FSM state.
// These are registered (via NBA) so they are stable at the next posedge
// when the BRAM template blocks sample them. This avoids any NBA race.
// ============================================================================
reg bram_we_a;
reg [LOG2N-1:0] bram_addr_a;
reg signed [INTERNAL_W-1:0] bram_wdata_a_re;
reg signed [INTERNAL_W-1:0] bram_wdata_a_im;
reg bram_we_b;
reg [LOG2N-1:0] bram_addr_b;
reg signed [INTERNAL_W-1:0] bram_wdata_b_re;
reg signed [INTERNAL_W-1:0] bram_wdata_b_im;
always @(*) begin : bram_port_mux
// Port A defaults
bram_we_a = 1'b0;
bram_addr_a = 0;
bram_wdata_a_re = 0;
bram_wdata_a_im = 0;
// Port B defaults
bram_we_b = 1'b0;
bram_addr_b = 0;
bram_wdata_b_re = 0;
bram_wdata_b_im = 0;
case (state)
ST_LOAD: begin
bram_we_a = din_valid;
bram_addr_a = bit_reverse(load_count);
bram_wdata_a_re = {{(INTERNAL_W-DATA_W){din_re[DATA_W-1]}}, din_re};
bram_wdata_a_im = {{(INTERNAL_W-DATA_W){din_im[DATA_W-1]}}, din_im};
end
ST_BF_READ: begin
bram_addr_a = bf_addr_even;
bram_addr_b = bf_addr_odd;
end
ST_BF_CALC: begin
bram_we_a = 1'b1;
bram_addr_a = rd_addr_even;
bram_wdata_a_re = bf_sum_re;
bram_wdata_a_im = bf_sum_im;
bram_we_b = 1'b1;
bram_addr_b = rd_addr_odd;
bram_wdata_b_re = bf_dif_re;
bram_wdata_b_im = bf_dif_im;
end
ST_OUTPUT: begin
bram_addr_a = out_count[LOG2N-1:0];
end
default: begin
// keep defaults
end
endcase
end
// ============================================================================
// DATA MEMORY — True Dual-Port BRAM
// ============================================================================
// For synthesis: xpm_memory_tdpram (Xilinx Parameterized Macros)
// For simulation: behavioral Verilog-2001 model (Icarus-compatible)
// ============================================================================
// XPM read-data wires (directly assigned to rdata regs below)
wire [INTERNAL_W-1:0] xpm_douta_re, xpm_doutb_re;
wire [INTERNAL_W-1:0] xpm_douta_im, xpm_doutb_im;
always @(*) begin
mem_rdata_a_re = $signed(xpm_douta_re);
mem_rdata_a_im = $signed(xpm_douta_im);
mem_rdata_b_re = $signed(xpm_doutb_re);
mem_rdata_b_im = $signed(xpm_doutb_im);
end
`ifndef FFT_XPM_BRAM
// ----------------------------------------------------------------------------
// Default: behavioral TDP model (works with Icarus Verilog -g2001)
// For Vivado synthesis, define FFT_XPM_BRAM to use xpm_memory_tdpram.
// ----------------------------------------------------------------------------
reg [INTERNAL_W-1:0] sim_mem_re [0:N-1];
reg [INTERNAL_W-1:0] sim_mem_im [0:N-1];
// Port A
reg [INTERNAL_W-1:0] sim_douta_re, sim_douta_im;
always @(posedge clk) begin
if (bram_we_a) begin
sim_mem_re[bram_addr_a] <= bram_wdata_a_re;
sim_mem_im[bram_addr_a] <= bram_wdata_a_im;
end
sim_douta_re <= sim_mem_re[bram_addr_a];
sim_douta_im <= sim_mem_im[bram_addr_a];
end
assign xpm_douta_re = sim_douta_re;
assign xpm_douta_im = sim_douta_im;
// Port B
reg [INTERNAL_W-1:0] sim_doutb_re, sim_doutb_im;
always @(posedge clk) begin
if (bram_we_b) begin
sim_mem_re[bram_addr_b] <= bram_wdata_b_re;
sim_mem_im[bram_addr_b] <= bram_wdata_b_im;
end
sim_doutb_re <= sim_mem_re[bram_addr_b];
sim_doutb_im <= sim_mem_im[bram_addr_b];
end
assign xpm_doutb_re = sim_doutb_re;
assign xpm_doutb_im = sim_doutb_im;
integer init_i;
initial begin
for (init_i = 0; init_i < N; init_i = init_i + 1) begin
sim_mem_re[init_i] = 0;
sim_mem_im[init_i] = 0;
end
end
`else
// ----------------------------------------------------------------------------
// Synthesis: xpm_memory_tdpram — guaranteed BRAM mapping
// Enabled when FFT_XPM_BRAM is defined (e.g. in Vivado TCL script).
// ----------------------------------------------------------------------------
// Note: Vivado auto-finds XPM library; no `include needed.
// Two instances: one for real, one for imaginary.
// WRITE_MODE = "write_first" matches the behavioral TDP template.
// READ_LATENCY = 1 (registered output).
// ----------------------------------------------------------------------------
xpm_memory_tdpram #(
.ADDR_WIDTH_A (LOG2N),
.ADDR_WIDTH_B (LOG2N),
.AUTO_SLEEP_TIME (0),
.BYTE_WRITE_WIDTH_A (INTERNAL_W),
.BYTE_WRITE_WIDTH_B (INTERNAL_W),
.CASCADE_HEIGHT (0),
.CLOCKING_MODE ("common_clock"),
.ECC_BIT_RANGE ("7:0"),
.ECC_MODE ("no_ecc"),
.ECC_TYPE ("none"),
.IGNORE_INIT_SYNTH (0),
.MEMORY_INIT_FILE ("none"),
.MEMORY_INIT_PARAM ("0"),
.MEMORY_OPTIMIZATION ("true"),
.MEMORY_PRIMITIVE ("block"),
.MEMORY_SIZE (N * INTERNAL_W),
.MESSAGE_CONTROL (0),
.RAM_DECOMP ("auto"),
.READ_DATA_WIDTH_A (INTERNAL_W),
.READ_DATA_WIDTH_B (INTERNAL_W),
.READ_LATENCY_A (1),
.READ_LATENCY_B (1),
.READ_RESET_VALUE_A ("0"),
.READ_RESET_VALUE_B ("0"),
.RST_MODE_A ("SYNC"),
.RST_MODE_B ("SYNC"),
.SIM_ASSERT_CHK (0),
.USE_EMBEDDED_CONSTRAINT (0),
.USE_MEM_INIT (1),
.USE_MEM_INIT_MMI (0),
.WAKEUP_TIME ("disable_sleep"),
.WRITE_DATA_WIDTH_A (INTERNAL_W),
.WRITE_DATA_WIDTH_B (INTERNAL_W),
.WRITE_MODE_A ("read_first"),
.WRITE_MODE_B ("read_first"),
.WRITE_PROTECT (1)
) u_bram_re (
.clka (clk),
.clkb (clk),
.rsta (1'b0),
.rstb (1'b0),
.ena (1'b1),
.enb (1'b1),
.regcea (1'b1),
.regceb (1'b1),
.addra (bram_addr_a),
.addrb (bram_addr_b),
.dina (bram_wdata_a_re),
.dinb (bram_wdata_b_re),
.wea (bram_we_a),
.web (bram_we_b),
.douta (xpm_douta_re),
.doutb (xpm_doutb_re),
.injectdbiterra (1'b0),
.injectdbiterrb (1'b0),
.injectsbiterra (1'b0),
.injectsbiterrb (1'b0),
.sbiterra (),
.sbiterrb (),
.dbiterra (),
.dbiterrb (),
.sleep (1'b0)
);
xpm_memory_tdpram #(
.ADDR_WIDTH_A (LOG2N),
.ADDR_WIDTH_B (LOG2N),
.AUTO_SLEEP_TIME (0),
.BYTE_WRITE_WIDTH_A (INTERNAL_W),
.BYTE_WRITE_WIDTH_B (INTERNAL_W),
.CASCADE_HEIGHT (0),
.CLOCKING_MODE ("common_clock"),
.ECC_BIT_RANGE ("7:0"),
.ECC_MODE ("no_ecc"),
.ECC_TYPE ("none"),
.IGNORE_INIT_SYNTH (0),
.MEMORY_INIT_FILE ("none"),
.MEMORY_INIT_PARAM ("0"),
.MEMORY_OPTIMIZATION ("true"),
.MEMORY_PRIMITIVE ("block"),
.MEMORY_SIZE (N * INTERNAL_W),
.MESSAGE_CONTROL (0),
.RAM_DECOMP ("auto"),
.READ_DATA_WIDTH_A (INTERNAL_W),
.READ_DATA_WIDTH_B (INTERNAL_W),
.READ_LATENCY_A (1),
.READ_LATENCY_B (1),
.READ_RESET_VALUE_A ("0"),
.READ_RESET_VALUE_B ("0"),
.RST_MODE_A ("SYNC"),
.RST_MODE_B ("SYNC"),
.SIM_ASSERT_CHK (0),
.USE_EMBEDDED_CONSTRAINT (0),
.USE_MEM_INIT (1),
.USE_MEM_INIT_MMI (0),
.WAKEUP_TIME ("disable_sleep"),
.WRITE_DATA_WIDTH_A (INTERNAL_W),
.WRITE_DATA_WIDTH_B (INTERNAL_W),
.WRITE_MODE_A ("read_first"),
.WRITE_MODE_B ("read_first"),
.WRITE_PROTECT (1)
) u_bram_im (
.clka (clk),
.clkb (clk),
.rsta (1'b0),
.rstb (1'b0),
.ena (1'b1),
.enb (1'b1),
.regcea (1'b1),
.regceb (1'b1),
.addra (bram_addr_a),
.addrb (bram_addr_b),
.dina (bram_wdata_a_im),
.dinb (bram_wdata_b_im),
.wea (bram_we_a),
.web (bram_we_b),
.douta (xpm_douta_im),
.doutb (xpm_doutb_im),
.injectdbiterra (1'b0),
.injectdbiterrb (1'b0),
.injectsbiterra (1'b0),
.injectsbiterrb (1'b0),
.sbiterra (),
.sbiterrb (),
.dbiterra (),
.dbiterrb (),
.sleep (1'b0)
);
`endif
// ============================================================================
// OUTPUT PIPELINE
// ============================================================================
reg out_pipe_valid;
reg out_pipe_inverse;
always @(posedge clk or negedge reset_n) begin
if (!reset_n) begin
out_pipe_valid <= 1'b0;
out_pipe_inverse <= 1'b0;
end else begin
out_pipe_valid <= (state == ST_OUTPUT) && (out_count <= FFT_N_M1[LOG2N-1:0]);
out_pipe_inverse <= inverse;
end
end
// ============================================================================
// MAIN FSM
// ============================================================================
always @(posedge clk or negedge reset_n) begin
if (!reset_n) begin
state <= ST_IDLE;
load_count <= 0;
out_count <= 0;
bfly_count <= 0;
stage <= 0;
half_reg <= 1;
tw_stride_reg <= FFT_N_HALF[LOG2N-1:0];
dout_re <= 0;
dout_im <= 0;
dout_valid <= 0;
done <= 0;
rd_tw_cos <= 0;
rd_tw_sin <= 0;
rd_addr_even <= 0;
rd_addr_odd <= 0;
rd_inverse <= 0;
end else begin
dout_valid <= 1'b0;
done <= 1'b0;
case (state)
ST_IDLE: begin
if (start) begin
state <= ST_LOAD;
load_count <= 0;
end
end
ST_LOAD: begin
if (din_valid) begin
if (load_count == FFT_N_M1[LOG2N-1:0]) begin
state <= ST_BF_READ;
stage <= 0;
bfly_count <= 0;
half_reg <= 1;
tw_stride_reg <= FFT_N_HALF[LOG2N-1:0];
end else begin
load_count <= load_count + 1;
end
end
end
ST_BF_READ: begin
rd_tw_cos <= tw_cos_lookup;
rd_tw_sin <= tw_sin_lookup;
rd_addr_even <= bf_addr_even;
rd_addr_odd <= bf_addr_odd;
rd_inverse <= inverse;
state <= ST_BF_CALC;
end
ST_BF_CALC: begin
if (bfly_count == FFT_N_HALF_M1[LOG2N-1:0]) begin
bfly_count <= 0;
if (stage == LOG2N - 1) begin
state <= ST_OUTPUT;
out_count <= 0;
end else begin
stage <= stage + 1;
half_reg <= half_reg << 1;
tw_stride_reg <= tw_stride_reg >> 1;
state <= ST_BF_READ;
end
end else begin
bfly_count <= bfly_count + 1;
state <= ST_BF_READ;
end
end
ST_OUTPUT: begin
if (out_count <= FFT_N_M1[LOG2N-1:0]) begin
out_count <= out_count + 1;
end
if (out_pipe_valid) begin
if (out_pipe_inverse) begin
dout_re <= saturate(mem_rdata_a_re >>> LOG2N);
dout_im <= saturate(mem_rdata_a_im >>> LOG2N);
end else begin
dout_re <= saturate(mem_rdata_a_re);
dout_im <= saturate(mem_rdata_a_im);
end
dout_valid <= 1'b1;
end
if (out_count > FFT_N_M1[LOG2N-1:0] && !out_pipe_valid) begin
state <= ST_DONE;
end
end
ST_DONE: begin
done <= 1'b1;
state <= ST_IDLE;
end
default: state <= ST_IDLE;
endcase
end
end
endmodule
+259
View File
@@ -0,0 +1,259 @@
// Quarter-wave cosine ROM for 1024-point FFT
// 256 entries, 16-bit signed Q15 ($readmemh format)
// cos(2*pi*k/1024) for k = 0..255
7FFF
7FFE
7FFD
7FF9
7FF5
7FF0
7FE9
7FE1
7FD8
7FCD
7FC1
7FB4
7FA6
7F97
7F86
7F74
7F61
7F4D
7F37
7F21
7F09
7EEF
7ED5
7EB9
7E9C
7E7E
7E5F
7E3E
7E1D
7DFA
7DD5
7DB0
7D89
7D62
7D39
7D0E
7CE3
7CB6
7C88
7C59
7C29
7BF8
7BC5
7B91
7B5C
7B26
7AEE
7AB6
7A7C
7A41
7A05
79C8
7989
794A
7909
78C7
7884
783F
77FA
77B3
776B
7722
76D8
768D
7641
75F3
75A5
7555
7504
74B2
745F
740A
73B5
735E
7307
72AE
7254
71F9
719D
7140
70E2
7083
7022
6FC1
6F5E
6EFB
6E96
6E30
6DC9
6D61
6CF8
6C8E
6C23
6BB7
6B4A
6ADC
6A6D
69FD
698B
6919
68A6
6832
67BC
6746
66CF
6656
65DD
6563
64E8
646C
63EE
6370
62F1
6271
61F0
616E
60EB
6068
5FE3
5F5D
5ED7
5E4F
5DC7
5D3E
5CB3
5C28
5B9C
5B0F
5A82
59F3
5964
58D3
5842
57B0
571D
568A
55F5
5560
54C9
5432
539B
5302
5268
51CE
5133
5097
4FFB
4F5D
4EBF
4E20
4D81
4CE0
4C3F
4B9D
4AFB
4A58
49B4
490F
4869
47C3
471C
4675
45CD
4524
447A
43D0
4325
427A
41CE
4121
4073
3FC5
3F17
3E68
3DB8
3D07
3C56
3BA5
3AF2
3A40
398C
38D9
3824
376F
36BA
3604
354D
3496
33DF
3326
326E
31B5
30FB
3041
2F87
2ECC
2E11
2D55
2C99
2BDC
2B1F
2A61
29A3
28E5
2826
2767
26A8
25E8
2528
2467
23A6
22E5
2223
2161
209F
1FDD
1F1A
1E57
1D93
1CCF
1C0B
1B47
1A82
19BE
18F9
1833
176E
16A8
15E2
151C
1455
138F
12C8
1201
113A
1072
0FAB
0EE3
0E1C
0D54
0C8C
0BC4
0AFB
0A33
096A
08A2
07D9
0711
0648
057F
04B6
03ED
0324
025B
0192
00C9
+11
View File
@@ -0,0 +1,11 @@
// Quarter-wave cosine ROM for 32-point FFT
// 8 entries, 16-bit signed Q15 ($readmemh format)
// cos(2*pi*k/32) for k = 0..7
7FFF
7D89
7641
6A6D
5A82
471C
30FB
18F9
@@ -529,18 +529,718 @@ end
`else `else
// ============================================================================ // ============================================================================
// SYNTHESIS STUB // SYNTHESIS IMPLEMENTATION Radix-2 DIT FFT via fft_engine
// ============================================================================ // ============================================================================
// The behavioral FFT implementation above uses $cos/$sin/$rtoi (non- // Uses a single fft_engine instance (1024-pt) reused 3 times:
// synthesizable). For real hardware, replace this stub with Xilinx xfft // 1. Forward FFT of signal
// IP cores or a synthesizable pipelined FFT. The stub ties outputs to // 2. Forward FFT of reference
// safe defaults so the rest of the design can be synthesized and verified. // 3. Inverse FFT of conjugate product
// Conjugate multiply done via frequency_matched_filter (4-stage pipeline).
//
// Buffer scheme (BRAM-inferrable):
// sig_buf[1024]: ADC input -> signal FFT output
// ref_buf[1024]: Reference input -> reference FFT output
// prod_buf[1024]: Conjugate multiply output -> IFFT output
//
// Memory access is INSIDE always @(posedge clk) blocks (no async reset)
// using local blocking variables. This eliminates NBA race conditions
// and enables Vivado BRAM inference (same pattern as fft_engine.v).
//
// BRAM read latency (1 cycle) is handled by "primed" flags:
// feed_primed for FFT feed operations
// mult_primed for conjugate multiply feed
// out_primed for output streaming
// ============================================================================ // ============================================================================
assign range_profile_i = 16'd0; localparam FFT_SIZE = 1024;
assign range_profile_q = 16'd0; localparam ADDR_BITS = 10;
assign range_profile_valid = 1'b0;
assign chain_state = 4'd0; // permanently IDLE // State encoding
localparam [3:0] ST_IDLE = 4'd0,
ST_COLLECT = 4'd1, // Collect 1024 ADC + ref samples
ST_SIG_FFT = 4'd2, // Forward FFT of signal
ST_SIG_CAP = 4'd3, // Capture signal FFT output
ST_REF_FFT = 4'd4, // Forward FFT of reference
ST_REF_CAP = 4'd5, // Capture reference FFT output
ST_MULTIPLY = 4'd6, // Conjugate multiply (pipelined)
ST_INV_FFT = 4'd7, // Inverse FFT of product
ST_INV_CAP = 4'd8, // Capture IFFT output
ST_OUTPUT = 4'd9, // Stream 1024 results
ST_DONE = 4'd10;
reg [3:0] state;
// ============================================================================
// DATA BUFFERS (block RAM) declared here, accessed in BRAM port blocks
// ============================================================================
(* ram_style = "block" *) reg signed [15:0] sig_buf_i [0:FFT_SIZE-1];
(* ram_style = "block" *) reg signed [15:0] sig_buf_q [0:FFT_SIZE-1];
(* ram_style = "block" *) reg signed [15:0] ref_buf_i [0:FFT_SIZE-1];
(* ram_style = "block" *) reg signed [15:0] ref_buf_q [0:FFT_SIZE-1];
(* ram_style = "block" *) reg signed [15:0] prod_buf_i [0:FFT_SIZE-1];
(* ram_style = "block" *) reg signed [15:0] prod_buf_q [0:FFT_SIZE-1];
// BRAM read data (registered outputs from port blocks)
reg signed [15:0] sig_rdata_i, sig_rdata_q;
reg signed [15:0] ref_rdata_i, ref_rdata_q;
reg signed [15:0] prod_rdata_i, prod_rdata_q;
// ============================================================================
// COUNTERS
// ============================================================================
reg [ADDR_BITS:0] collect_count; // 0..1024 for sample collection
reg [ADDR_BITS:0] feed_count; // 0..1024 for feeding FFT engine
reg [ADDR_BITS:0] cap_count; // 0..1024 for capturing FFT output
reg [ADDR_BITS:0] mult_count; // 0..1024 for multiply feeding
reg [ADDR_BITS:0] out_count; // 0..1024 for output streaming
// BRAM read latency pipeline flags
reg feed_primed; // 1 = BRAM rdata valid for feed operations
reg mult_primed; // 1 = BRAM rdata valid for multiply reads
reg out_primed; // 1 = BRAM rdata valid for output reads
// ============================================================================
// FFT ENGINE INTERFACE (single instance, reused 3 times)
// ============================================================================
reg fft_start;
reg fft_inverse;
reg signed [15:0] fft_din_re, fft_din_im;
reg fft_din_valid;
wire signed [15:0] fft_dout_re, fft_dout_im;
wire fft_dout_valid;
wire fft_busy;
wire fft_done;
fft_engine #(
.N(FFT_SIZE),
.LOG2N(ADDR_BITS),
.DATA_W(16),
.INTERNAL_W(32),
.TWIDDLE_W(16),
.TWIDDLE_FILE("fft_twiddle_1024.mem")
) fft_inst (
.clk(clk),
.reset_n(reset_n),
.start(fft_start),
.inverse(fft_inverse),
.din_re(fft_din_re),
.din_im(fft_din_im),
.din_valid(fft_din_valid),
.dout_re(fft_dout_re),
.dout_im(fft_dout_im),
.dout_valid(fft_dout_valid),
.busy(fft_busy),
.done(fft_done)
);
// ============================================================================
// CONJUGATE MULTIPLY INTERFACE (frequency_matched_filter)
// ============================================================================
reg signed [15:0] mf_sig_re, mf_sig_im;
reg signed [15:0] mf_ref_re, mf_ref_im;
reg mf_valid_in;
wire signed [15:0] mf_out_re, mf_out_im;
wire mf_valid_out;
frequency_matched_filter mf_inst (
.clk(clk),
.reset_n(reset_n),
.fft_real_in(mf_sig_re),
.fft_imag_in(mf_sig_im),
.fft_valid_in(mf_valid_in),
.ref_chirp_real(mf_ref_re),
.ref_chirp_imag(mf_ref_im),
.filtered_real(mf_out_re),
.filtered_imag(mf_out_im),
.filtered_valid(mf_valid_out),
.state()
);
// Pipeline flush counter for matched filter (4-stage pipeline)
reg [2:0] mf_flush_count;
// ============================================================================
// OUTPUT REGISTERS
// ============================================================================
reg out_valid_reg;
reg signed [15:0] out_i_reg, out_q_reg;
// ============================================================================
// BRAM PORT: sig_buf all address/we/wdata computed inline (race-free)
// ============================================================================
// Handles: IDLE/COLLECT writes, SIG_FFT/SIG_CAP capture writes,
// SIG_FFT feed reads, MULTIPLY signal reads
// No async reset in sensitivity list enables Vivado BRAM inference.
// ============================================================================
always @(posedge clk) begin : sig_bram_port
reg we;
reg [ADDR_BITS-1:0] addr;
reg signed [15:0] wdata_i, wdata_q;
// Defaults
we = 1'b0;
addr = 0;
wdata_i = 0;
wdata_q = 0;
case (state)
ST_IDLE: begin
if (adc_valid) begin
we = 1'b1;
addr = 0;
wdata_i = $signed(adc_data_i);
wdata_q = $signed(adc_data_q);
end
end
ST_COLLECT: begin
if (adc_valid && collect_count < FFT_SIZE) begin
we = 1'b1;
addr = collect_count[ADDR_BITS-1:0];
wdata_i = $signed(adc_data_i);
wdata_q = $signed(adc_data_q);
end
end
ST_SIG_FFT: begin
if (feed_count < FFT_SIZE && !feed_primed) begin
// Pre-read cycle: present address, no write
addr = feed_count[ADDR_BITS-1:0];
end else if (feed_count <= FFT_SIZE && feed_primed) begin
// Primed: read address for NEXT sample (or hold last)
if (feed_count < FFT_SIZE)
addr = feed_count[ADDR_BITS-1:0];
else
addr = 0; // don't care, past last sample
end
// Capture FFT output (write) happens after feeding is done
if (fft_dout_valid && cap_count < FFT_SIZE) begin
we = 1'b1;
addr = cap_count[ADDR_BITS-1:0];
wdata_i = fft_dout_re;
wdata_q = fft_dout_im;
end
end
ST_SIG_CAP: begin
if (fft_dout_valid && cap_count < FFT_SIZE) begin
we = 1'b1;
addr = cap_count[ADDR_BITS-1:0];
wdata_i = fft_dout_re;
wdata_q = fft_dout_im;
end
end
ST_MULTIPLY: begin
// Read signal FFT results for conjugate multiply
if (mult_count < FFT_SIZE && !mult_primed) begin
addr = mult_count[ADDR_BITS-1:0];
end else if (mult_count <= FFT_SIZE && mult_primed) begin
if (mult_count < FFT_SIZE)
addr = mult_count[ADDR_BITS-1:0];
else
addr = 0;
end
end
default: begin
// keep defaults
end
endcase
// BRAM write
if (we) begin
sig_buf_i[addr] <= wdata_i;
sig_buf_q[addr] <= wdata_q;
end
// BRAM read (1-cycle latency)
sig_rdata_i <= sig_buf_i[addr];
sig_rdata_q <= sig_buf_q[addr];
end
// ============================================================================
// BRAM PORT: ref_buf all address/we/wdata computed inline (race-free)
// ============================================================================
// Handles: IDLE/COLLECT writes, REF_FFT/REF_CAP capture writes,
// REF_FFT feed reads, MULTIPLY reference reads
// ============================================================================
always @(posedge clk) begin : ref_bram_port
reg we;
reg [ADDR_BITS-1:0] addr;
reg signed [15:0] wdata_i, wdata_q;
// Defaults
we = 1'b0;
addr = 0;
wdata_i = 0;
wdata_q = 0;
case (state)
ST_IDLE: begin
if (adc_valid) begin
we = 1'b1;
addr = 0;
wdata_i = $signed(long_chirp_real);
wdata_q = $signed(long_chirp_imag);
end
end
ST_COLLECT: begin
if (adc_valid && collect_count < FFT_SIZE) begin
we = 1'b1;
addr = collect_count[ADDR_BITS-1:0];
wdata_i = $signed(long_chirp_real);
wdata_q = $signed(long_chirp_imag);
end
end
ST_REF_FFT: begin
if (feed_count < FFT_SIZE && !feed_primed) begin
addr = feed_count[ADDR_BITS-1:0];
end else if (feed_count <= FFT_SIZE && feed_primed) begin
if (feed_count < FFT_SIZE)
addr = feed_count[ADDR_BITS-1:0];
else
addr = 0;
end
// Capture FFT output
if (fft_dout_valid && cap_count < FFT_SIZE) begin
we = 1'b1;
addr = cap_count[ADDR_BITS-1:0];
wdata_i = fft_dout_re;
wdata_q = fft_dout_im;
end
end
ST_REF_CAP: begin
if (fft_dout_valid && cap_count < FFT_SIZE) begin
we = 1'b1;
addr = cap_count[ADDR_BITS-1:0];
wdata_i = fft_dout_re;
wdata_q = fft_dout_im;
end
end
ST_MULTIPLY: begin
// Read reference FFT results for conjugate multiply
if (mult_count < FFT_SIZE && !mult_primed) begin
addr = mult_count[ADDR_BITS-1:0];
end else if (mult_count <= FFT_SIZE && mult_primed) begin
if (mult_count < FFT_SIZE)
addr = mult_count[ADDR_BITS-1:0];
else
addr = 0;
end
end
default: begin
// keep defaults
end
endcase
// BRAM write
if (we) begin
ref_buf_i[addr] <= wdata_i;
ref_buf_q[addr] <= wdata_q;
end
// BRAM read (1-cycle latency)
ref_rdata_i <= ref_buf_i[addr];
ref_rdata_q <= ref_buf_q[addr];
end
// ============================================================================
// BRAM PORT: prod_buf all address/we/wdata computed inline (race-free)
// ============================================================================
// Handles: MULTIPLY capture writes, INV_FFT/INV_CAP capture writes,
// INV_FFT feed reads, OUTPUT reads
// ============================================================================
always @(posedge clk) begin : prod_bram_port
reg we;
reg [ADDR_BITS-1:0] addr;
reg signed [15:0] wdata_i, wdata_q;
// Defaults
we = 1'b0;
addr = 0;
wdata_i = 0;
wdata_q = 0;
case (state)
ST_MULTIPLY: begin
// Capture conjugate multiply output
if (mf_valid_out && cap_count < FFT_SIZE) begin
we = 1'b1;
addr = cap_count[ADDR_BITS-1:0];
wdata_i = mf_out_re;
wdata_q = mf_out_im;
end
end
ST_INV_FFT: begin
if (feed_count < FFT_SIZE && !feed_primed) begin
addr = feed_count[ADDR_BITS-1:0];
end else if (feed_count <= FFT_SIZE && feed_primed) begin
if (feed_count < FFT_SIZE)
addr = feed_count[ADDR_BITS-1:0];
else
addr = 0;
end
// Capture IFFT output
if (fft_dout_valid && cap_count < FFT_SIZE) begin
we = 1'b1;
addr = cap_count[ADDR_BITS-1:0];
wdata_i = fft_dout_re;
wdata_q = fft_dout_im;
end
end
ST_INV_CAP: begin
if (fft_dout_valid && cap_count < FFT_SIZE) begin
we = 1'b1;
addr = cap_count[ADDR_BITS-1:0];
wdata_i = fft_dout_re;
wdata_q = fft_dout_im;
end
end
ST_OUTPUT: begin
// Read product buffer for output streaming
if (out_count < FFT_SIZE && !out_primed) begin
addr = out_count[ADDR_BITS-1:0];
end else if (out_count <= FFT_SIZE && out_primed) begin
if (out_count < FFT_SIZE)
addr = out_count[ADDR_BITS-1:0];
else
addr = 0;
end
end
default: begin
// keep defaults
end
endcase
// BRAM write
if (we) begin
prod_buf_i[addr] <= wdata_i;
prod_buf_q[addr] <= wdata_q;
end
// BRAM read (1-cycle latency)
prod_rdata_i <= prod_buf_i[addr];
prod_rdata_q <= prod_buf_q[addr];
end
// ============================================================================
// MAIN FSM no buffer array accesses here (all via BRAM ports above)
// ============================================================================
always @(posedge clk or negedge reset_n) begin
if (!reset_n) begin
state <= ST_IDLE;
collect_count <= 0;
feed_count <= 0;
cap_count <= 0;
mult_count <= 0;
out_count <= 0;
feed_primed <= 1'b0;
mult_primed <= 1'b0;
out_primed <= 1'b0;
fft_start <= 1'b0;
fft_inverse <= 1'b0;
fft_din_re <= 0;
fft_din_im <= 0;
fft_din_valid <= 1'b0;
mf_sig_re <= 0;
mf_sig_im <= 0;
mf_ref_re <= 0;
mf_ref_im <= 0;
mf_valid_in <= 1'b0;
mf_flush_count <= 0;
out_valid_reg <= 1'b0;
out_i_reg <= 0;
out_q_reg <= 0;
end else begin
// Defaults
fft_start <= 1'b0;
fft_din_valid <= 1'b0;
mf_valid_in <= 1'b0;
out_valid_reg <= 1'b0;
case (state)
// ================================================================
ST_IDLE: begin
collect_count <= 0;
feed_primed <= 1'b0;
mult_primed <= 1'b0;
out_primed <= 1'b0;
if (adc_valid) begin
// First sample written by sig/ref BRAM ports (they see
// state==ST_IDLE && adc_valid)
collect_count <= 1;
state <= ST_COLLECT;
end
end
// ================================================================
// COLLECT: Gather 1024 ADC + reference samples
// Writes happen in sig/ref BRAM ports (they see state==ST_COLLECT)
// ================================================================
ST_COLLECT: begin
if (adc_valid && collect_count < FFT_SIZE) begin
collect_count <= collect_count + 1;
end
if (collect_count == FFT_SIZE) begin
// All 1024 samples collected start signal FFT
state <= ST_SIG_FFT;
fft_start <= 1'b1;
fft_inverse <= 1'b0; // Forward FFT
feed_count <= 0;
cap_count <= 0;
feed_primed <= 1'b0;
end
end
// ================================================================
// SIG_FFT: Feed signal buffer to FFT engine (forward)
// BRAM read has 1-cycle latency: address presented in BRAM port,
// data available in sig_rdata_i/q next cycle.
// ================================================================
ST_SIG_FFT: begin
// Feed phase: read sig_buf -> fft_din
if (feed_count < FFT_SIZE) begin
if (!feed_primed) begin
// Pre-read cycle: address presented to BRAM, wait 1 cycle
feed_primed <= 1'b1;
feed_count <= feed_count + 1;
// fft_din_valid stays 0 (default)
end else begin
// Primed: BRAM rdata is valid for previous address
fft_din_re <= sig_rdata_i;
fft_din_im <= sig_rdata_q;
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1;
end
end else if (feed_count == FFT_SIZE && feed_primed) begin
// Last sample: BRAM rdata has data for address 1023
fft_din_re <= sig_rdata_i;
fft_din_im <= sig_rdata_q;
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1; // -> 1025, stops feeding
end
// Capture FFT output (writes happen in BRAM port)
if (fft_dout_valid && cap_count < FFT_SIZE) begin
cap_count <= cap_count + 1;
end
if (fft_done) begin
state <= ST_SIG_CAP;
end
end
// ================================================================
// SIG_CAP: Ensure all signal FFT outputs captured
// ================================================================
ST_SIG_CAP: begin
if (fft_dout_valid && cap_count < FFT_SIZE) begin
cap_count <= cap_count + 1;
end
// Start reference FFT
state <= ST_REF_FFT;
fft_start <= 1'b1;
fft_inverse <= 1'b0; // Forward FFT
feed_count <= 0;
cap_count <= 0;
feed_primed <= 1'b0;
end
// ================================================================
// REF_FFT: Feed reference buffer to FFT engine (forward)
// ================================================================
ST_REF_FFT: begin
// Feed phase: read ref_buf -> fft_din
if (feed_count < FFT_SIZE) begin
if (!feed_primed) begin
feed_primed <= 1'b1;
feed_count <= feed_count + 1;
end else begin
fft_din_re <= ref_rdata_i;
fft_din_im <= ref_rdata_q;
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1;
end
end else if (feed_count == FFT_SIZE && feed_primed) begin
fft_din_re <= ref_rdata_i;
fft_din_im <= ref_rdata_q;
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1;
end
if (fft_dout_valid && cap_count < FFT_SIZE) begin
cap_count <= cap_count + 1;
end
if (fft_done) begin
state <= ST_REF_CAP;
end
end
// ================================================================
// REF_CAP: Ensure all ref FFT outputs captured
// ================================================================
ST_REF_CAP: begin
if (fft_dout_valid && cap_count < FFT_SIZE) begin
cap_count <= cap_count + 1;
end
state <= ST_MULTIPLY;
mult_count <= 0;
cap_count <= 0;
mf_flush_count <= 0;
mult_primed <= 1'b0;
end
// ================================================================
// MULTIPLY: Stream sig FFT and ref FFT through freq_matched_filter
// Both sig_buf and ref_buf are read simultaneously (separate BRAM
// ports). Pipeline latency = 4 clocks. Feed 1024 pairs, then flush.
// ================================================================
ST_MULTIPLY: begin
if (mult_count < FFT_SIZE) begin
if (!mult_primed) begin
// Pre-read cycle
mult_primed <= 1'b1;
mult_count <= mult_count + 1;
end else begin
mf_sig_re <= sig_rdata_i;
mf_sig_im <= sig_rdata_q;
mf_ref_re <= ref_rdata_i;
mf_ref_im <= ref_rdata_q;
mf_valid_in <= 1'b1;
mult_count <= mult_count + 1;
end
end else if (mult_count == FFT_SIZE && mult_primed) begin
// Last sample
mf_sig_re <= sig_rdata_i;
mf_sig_im <= sig_rdata_q;
mf_ref_re <= ref_rdata_i;
mf_ref_im <= ref_rdata_q;
mf_valid_in <= 1'b1;
mult_count <= mult_count + 1;
end else begin
// Pipeline flush wait for remaining outputs
mf_flush_count <= mf_flush_count + 1;
end
// Capture multiply outputs (writes happen in BRAM port)
if (mf_valid_out && cap_count < FFT_SIZE) begin
cap_count <= cap_count + 1;
end
// Done when all outputs captured
if (cap_count == FFT_SIZE) begin
state <= ST_INV_FFT;
fft_start <= 1'b1;
fft_inverse <= 1'b1; // Inverse FFT
feed_count <= 0;
cap_count <= 0;
feed_primed <= 1'b0;
end
end
// ================================================================
// INV_FFT: Feed product buffer to FFT engine (inverse)
// ================================================================
ST_INV_FFT: begin
if (feed_count < FFT_SIZE) begin
if (!feed_primed) begin
feed_primed <= 1'b1;
feed_count <= feed_count + 1;
end else begin
fft_din_re <= prod_rdata_i;
fft_din_im <= prod_rdata_q;
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1;
end
end else if (feed_count == FFT_SIZE && feed_primed) begin
fft_din_re <= prod_rdata_i;
fft_din_im <= prod_rdata_q;
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1;
end
if (fft_dout_valid && cap_count < FFT_SIZE) begin
cap_count <= cap_count + 1;
end
if (fft_done) begin
state <= ST_INV_CAP;
end
end
// ================================================================
// INV_CAP: Ensure all IFFT outputs captured
// ================================================================
ST_INV_CAP: begin
if (fft_dout_valid && cap_count < FFT_SIZE) begin
cap_count <= cap_count + 1;
end
state <= ST_OUTPUT;
out_count <= 0;
out_primed <= 1'b0;
end
// ================================================================
// OUTPUT: Stream 1024 range profile samples
// BRAM read latency: present address, data valid next cycle.
// ================================================================
ST_OUTPUT: begin
if (out_count < FFT_SIZE) begin
if (!out_primed) begin
// Pre-read cycle
out_primed <= 1'b1;
out_count <= out_count + 1;
end else begin
out_i_reg <= prod_rdata_i;
out_q_reg <= prod_rdata_q;
out_valid_reg <= 1'b1;
out_count <= out_count + 1;
end
end else if (out_count == FFT_SIZE && out_primed) begin
// Last sample
out_i_reg <= prod_rdata_i;
out_q_reg <= prod_rdata_q;
out_valid_reg <= 1'b1;
out_count <= out_count + 1;
end else begin
state <= ST_DONE;
end
end
// ================================================================
// DONE: Return to idle
// ================================================================
ST_DONE: begin
state <= ST_IDLE;
end
default: state <= ST_IDLE;
endcase
end
end
// ============================================================================
// OUTPUT ASSIGNMENTS
// ============================================================================
assign range_profile_i = out_i_reg;
assign range_profile_q = out_q_reg;
assign range_profile_valid = out_valid_reg;
assign chain_state = state;
// ============================================================================
// BUFFER INIT (for simulation Vivado ignores initial blocks on arrays)
// ============================================================================
integer init_idx;
initial begin
for (init_idx = 0; init_idx < FFT_SIZE; init_idx = init_idx + 1) begin
sig_buf_i[init_idx] = 0;
sig_buf_q[init_idx] = 0;
ref_buf_i[init_idx] = 0;
ref_buf_q[init_idx] = 0;
prod_buf_i[init_idx] = 0;
prod_buf_q[init_idx] = 0;
end
end
`endif `endif
+526
View File
@@ -0,0 +1,526 @@
`timescale 1ns / 1ps
/**
* tb_fft_engine.v
*
* Testbench for the synthesizable FFT engine.
* Tests with N=32 first (fast), then validates key properties.
*
* Test Groups:
* 1. Impulse response: FFT of delta[0] should be all 1s
* 2. DC input: FFT of all-1s should be delta at bin 0
* 3. Single tone: FFT of cos(2*pi*k/N) should peak at bin k
* 4. Roundtrip: FFT then IFFT should recover original
* 5. Linearity: FFT(a+b) ~= FFT(a) + FFT(b)
*
* Convention: standard check task with pass/fail tracking.
*/
module tb_fft_engine;
// ============================================================================
// PARAMETERS test with 32-pt for speed
// ============================================================================
localparam N = 32;
localparam LOG2N = 5;
localparam DATA_W = 16;
localparam INT_W = 32;
localparam TW_W = 16;
localparam CLK_PERIOD = 10;
// ============================================================================
// SIGNALS
// ============================================================================
reg clk, reset_n;
reg start, inverse;
reg signed [DATA_W-1:0] din_re, din_im;
reg din_valid;
wire signed [DATA_W-1:0] dout_re, dout_im;
wire dout_valid, busy, done_sig;
// ============================================================================
// DUT
// ============================================================================
fft_engine #(
.N(N),
.LOG2N(LOG2N),
.DATA_W(DATA_W),
.INTERNAL_W(INT_W),
.TWIDDLE_W(TW_W),
.TWIDDLE_FILE("fft_twiddle_32.mem")
) dut (
.clk(clk),
.reset_n(reset_n),
.start(start),
.inverse(inverse),
.din_re(din_re),
.din_im(din_im),
.din_valid(din_valid),
.dout_re(dout_re),
.dout_im(dout_im),
.dout_valid(dout_valid),
.busy(busy),
.done(done_sig)
);
// ============================================================================
// CLOCK
// ============================================================================
initial clk = 0;
always #(CLK_PERIOD/2) clk = ~clk;
// ============================================================================
// PASS/FAIL TRACKING
// ============================================================================
integer pass_count, fail_count;
task check;
input cond;
input [512*8-1:0] label;
begin
if (cond) begin
$display(" [PASS] %0s", label);
pass_count = pass_count + 1;
end else begin
$display(" [FAIL] %0s", label);
fail_count = fail_count + 1;
end
end
endtask
// ============================================================================
// STORAGE FOR CAPTURED OUTPUTS
// ============================================================================
reg signed [DATA_W-1:0] out_re [0:N-1];
reg signed [DATA_W-1:0] out_im [0:N-1];
integer out_idx;
// Second set for roundtrip
reg signed [DATA_W-1:0] out2_re [0:N-1];
reg signed [DATA_W-1:0] out2_im [0:N-1];
// Input storage for roundtrip comparison
reg signed [DATA_W-1:0] in_re [0:N-1];
reg signed [DATA_W-1:0] in_im [0:N-1];
// ============================================================================
// HELPER TASKS
// ============================================================================
// Reset
task do_reset;
begin
reset_n = 0;
start = 0;
inverse = 0;
din_re = 0;
din_im = 0;
din_valid = 0;
repeat(5) @(posedge clk); #1;
reset_n = 1;
repeat(2) @(posedge clk); #1;
end
endtask
// Run FFT: load N samples from in_re/in_im arrays, capture output to out_re/out_im
task run_fft;
input inv;
integer i;
begin
inverse = inv;
@(posedge clk); #1;
start = 1;
@(posedge clk); #1;
start = 0;
// Feed N samples
for (i = 0; i < N; i = i + 1) begin
din_re = in_re[i];
din_im = in_im[i];
din_valid = 1;
@(posedge clk); #1;
end
din_valid = 0;
din_re = 0;
din_im = 0;
// Wait for output and capture
out_idx = 0;
while (out_idx < N) begin
@(posedge clk); #1;
if (dout_valid) begin
out_re[out_idx] = dout_re;
out_im[out_idx] = dout_im;
out_idx = out_idx + 1;
end
end
// Wait for done
@(posedge clk); #1;
end
endtask
// Run FFT and capture to out2 arrays
task run_fft_to_out2;
input inv;
integer i;
begin
inverse = inv;
@(posedge clk); #1;
start = 1;
@(posedge clk); #1;
start = 0;
for (i = 0; i < N; i = i + 1) begin
din_re = in_re[i];
din_im = in_im[i];
din_valid = 1;
@(posedge clk); #1;
end
din_valid = 0;
din_re = 0;
din_im = 0;
out_idx = 0;
while (out_idx < N) begin
@(posedge clk); #1;
if (dout_valid) begin
out2_re[out_idx] = dout_re;
out2_im[out_idx] = dout_im;
out_idx = out_idx + 1;
end
end
@(posedge clk); #1;
end
endtask
// ============================================================================
// VCD + CSV
// ============================================================================
initial begin
$dumpfile("tb_fft_engine.vcd");
$dumpvars(0, tb_fft_engine);
end
// ============================================================================
// MAIN TEST
// ============================================================================
integer i, j;
integer max_mag_bin;
reg signed [31:0] max_mag;
reg signed [31:0] mag;
reg signed [31:0] err;
integer max_err;
integer total_energy_in, total_energy_out;
// For tone generation
real angle;
reg signed [DATA_W-1:0] cos_val;
initial begin
pass_count = 0;
fail_count = 0;
$display("============================================================");
$display(" FFT Engine Testbench N=%0d", N);
$display("============================================================");
do_reset;
// ================================================================
// TEST GROUP 1: Impulse Response
// FFT(delta[0]) should give all bins = 1 (in_re[0]=1, rest=0)
// Since input is Q15-ish (16-bit signed), use amplitude = 1000
// FFT of impulse with amplitude A: all bins = A
// ================================================================
$display("");
$display("--- Test Group 1: Impulse Response ---");
for (i = 0; i < N; i = i + 1) begin
in_re[i] = (i == 0) ? 16'sd1000 : 16'sd0;
in_im[i] = 16'sd0;
end
run_fft(0); // Forward FFT
// All bins should have re ~= 1000, im ~= 0
max_err = 0;
for (i = 0; i < N; i = i + 1) begin
err = out_re[i] - 1000;
if (err < 0) err = -err;
if (err > max_err) max_err = err;
err = out_im[i];
if (err < 0) err = -err;
if (err > max_err) max_err = err;
end
$display(" Impulse FFT max error from expected: %0d", max_err);
check(max_err < 10, "Impulse FFT: all bins ~= input amplitude");
check(out_re[0] == 1000 || (out_re[0] >= 998 && out_re[0] <= 1002),
"Impulse FFT: bin 0 real ~= 1000");
// ================================================================
// TEST GROUP 2: DC Input
// FFT of constant value A across all N samples:
// bin 0 = A*N, all other bins = 0
// Use amplitude 100 so bin 0 = 100*32 = 3200
// ================================================================
$display("");
$display("--- Test Group 2: DC Input ---");
for (i = 0; i < N; i = i + 1) begin
in_re[i] = 16'sd100;
in_im[i] = 16'sd0;
end
run_fft(0);
$display(" DC FFT bin[0] = %0d + j%0d (expect %0d + j0)", out_re[0], out_im[0], 100*N);
// Q15 twiddle rounding over N butterflies can cause ~1% error
check(out_re[0] >= (100*N - 50) && out_re[0] <= (100*N + 50),
"DC FFT: bin 0 real ~= A*N (1.5% tol)");
max_err = 0;
for (i = 1; i < N; i = i + 1) begin
mag = out_re[i] * out_re[i] + out_im[i] * out_im[i];
if (out_re[i] > max_err || -out_re[i] > max_err)
max_err = (out_re[i] > 0) ? out_re[i] : -out_re[i];
if (out_im[i] > max_err || -out_im[i] > max_err)
max_err = (out_im[i] > 0) ? out_im[i] : -out_im[i];
end
$display(" DC FFT max non-DC bin magnitude: %0d", max_err);
check(max_err < 20, "DC FFT: non-DC bins ~= 0 (Q15 rounding tol)");
// ================================================================
// TEST GROUP 3: Single Tone (cosine at bin 4)
// cos(2*pi*4*n/32) -> peaks at bins 4 and N-4=28
// Amplitude 1000 -> each peak = 1000*N/2 = 16000
// ================================================================
$display("");
$display("--- Test Group 3: Single Tone (bin 4) ---");
for (i = 0; i < N; i = i + 1) begin
// cos(2*pi*4*i/32) in Q15-ish
angle = 6.28318530718 * 4.0 * i / 32.0;
cos_val = $rtoi($cos(angle) * 1000.0);
in_re[i] = cos_val;
in_im[i] = 16'sd0;
end
run_fft(0);
// Find peak bin
max_mag = 0;
max_mag_bin = 0;
for (i = 0; i < N; i = i + 1) begin
mag = out_re[i] * out_re[i] + out_im[i] * out_im[i];
if (mag > max_mag) begin
max_mag = mag;
max_mag_bin = i;
end
end
$display(" Tone FFT peak bin: %0d (expect 4)", max_mag_bin);
$display(" Tone FFT bin[4] = %0d + j%0d", out_re[4], out_im[4]);
$display(" Tone FFT bin[28] = %0d + j%0d", out_re[28], out_im[28]);
check(max_mag_bin == 4 || max_mag_bin == 28,
"Tone FFT: peak at bin 4 or 28");
// Bin 4 and 28 should have magnitude ~= N/2 * 1000 = 16000
mag = out_re[4] * out_re[4] + out_im[4] * out_im[4];
check(mag > 15000*15000 && mag < 17000*17000,
"Tone FFT: bin 4 magnitude ~= 16000");
// ================================================================
// TEST GROUP 4: Roundtrip (FFT then IFFT = identity)
// Load random-ish data, FFT, IFFT, compare to original
// ================================================================
$display("");
$display("--- Test Group 4: Roundtrip (FFT->IFFT) ---");
// Use a simple deterministic pattern
for (i = 0; i < N; i = i + 1) begin
in_re[i] = (i * 137 + 42) % 2001 - 1000; // [-1000, 1000]
in_im[i] = (i * 251 + 17) % 2001 - 1000;
end
// Forward FFT
run_fft(0);
// Copy FFT output as input for IFFT
for (i = 0; i < N; i = i + 1) begin
in_re[i] = out_re[i];
in_im[i] = out_im[i];
end
// Save original input for comparison
// (we need to recompute since in_re was overwritten)
// Actually let's redo: store originals first
// We'll do it properly with separate storage
// Re-do: load original pattern
for (i = 0; i < N; i = i + 1) begin
out2_re[i] = (i * 137 + 42) % 2001 - 1000;
out2_im[i] = (i * 251 + 17) % 2001 - 1000;
end
// Now in_re/in_im has FFT output. Run IFFT.
run_fft(1);
// out_re/out_im should match original (out2_re/out2_im) within tolerance
max_err = 0;
for (i = 0; i < N; i = i + 1) begin
err = out_re[i] - out2_re[i];
if (err < 0) err = -err;
if (err > max_err) max_err = err;
err = out_im[i] - out2_im[i];
if (err < 0) err = -err;
if (err > max_err) max_err = err;
end
$display(" Roundtrip max error: %0d", max_err);
check(max_err < 20, "Roundtrip: FFT->IFFT recovers original (err < 20)");
check(max_err < 5, "Roundtrip: FFT->IFFT tight tolerance (err < 5)");
// Print first few samples for debugging
$display(" Sample comparison (idx: original vs recovered):");
for (i = 0; i < 8; i = i + 1) begin
$display(" [%0d] re: %0d vs %0d, im: %0d vs %0d",
i, out2_re[i], out_re[i], out2_im[i], out_im[i]);
end
// ================================================================
// TEST GROUP 5: IFFT of impulse
// IFFT(delta[0]) = 1/N for all bins -> should be ~1 for amplitude N
// Input: bin[0] = N (=32), rest = 0
// IFFT output: all samples = 1
// ================================================================
$display("");
$display("--- Test Group 5: IFFT of Impulse ---");
for (i = 0; i < N; i = i + 1) begin
in_re[i] = (i == 0) ? N : 16'sd0;
in_im[i] = 16'sd0;
end
run_fft(1); // Inverse FFT
max_err = 0;
for (i = 0; i < N; i = i + 1) begin
err = out_re[i] - 1;
if (err < 0) err = -err;
if (err > max_err) max_err = err;
err = out_im[i];
if (err < 0) err = -err;
if (err > max_err) max_err = err;
end
$display(" IFFT impulse max error: %0d", max_err);
check(max_err < 2, "IFFT impulse: all samples ~= 1");
// ================================================================
// TEST GROUP 6: Parseval's theorem (energy conservation)
// Sum |x[n]|^2 should equal (1/N) * Sum |X[k]|^2
// We compare N * sum_time vs sum_freq
// ================================================================
$display("");
$display("--- Test Group 6: Parseval's Theorem ---");
for (i = 0; i < N; i = i + 1) begin
in_re[i] = (i * 137 + 42) % 2001 - 1000;
in_im[i] = (i * 251 + 17) % 2001 - 1000;
end
// Compute time-domain energy
total_energy_in = 0;
for (i = 0; i < N; i = i + 1) begin
total_energy_in = total_energy_in + in_re[i] * in_re[i] + in_im[i] * in_im[i];
end
run_fft(0);
// Compute frequency-domain energy
total_energy_out = 0;
for (i = 0; i < N; i = i + 1) begin
total_energy_out = total_energy_out + out_re[i] * out_re[i] + out_im[i] * out_im[i];
end
// Parseval: sum_time = (1/N) * sum_freq => N * sum_time = sum_freq
$display(" Time energy * N = %0d", total_energy_in * N);
$display(" Freq energy = %0d", total_energy_out);
// Allow some tolerance for fixed-point rounding
err = total_energy_in * N - total_energy_out;
if (err < 0) err = -err;
$display(" Parseval error = %0d", err);
// Relative error
if (total_energy_in * N > 0) begin
$display(" Parseval rel error = %0d%%", (err * 100) / (total_energy_in * N));
check((err * 100) / (total_energy_in * N) < 5,
"Parseval: energy conserved within 5%");
end
// ================================================================
// TEST GROUP 7: Pure imaginary input
// FFT of j*sin(2*pi*2*n/N) -> peaks at bins 2 and N-2
// ================================================================
$display("");
$display("--- Test Group 7: Pure Imaginary Tone (bin 2) ---");
for (i = 0; i < N; i = i + 1) begin
in_re[i] = 16'sd0;
angle = 6.28318530718 * 2.0 * i / 32.0;
in_im[i] = $rtoi($sin(angle) * 1000.0);
end
run_fft(0);
// Find peak
max_mag = 0;
max_mag_bin = 0;
for (i = 0; i < N; i = i + 1) begin
mag = out_re[i] * out_re[i] + out_im[i] * out_im[i];
if (mag > max_mag) begin
max_mag = mag;
max_mag_bin = i;
end
end
$display(" Imag tone peak bin: %0d (expect 2 or 30)", max_mag_bin);
check(max_mag_bin == 2 || max_mag_bin == 30,
"Imag tone: peak at bin 2 or 30");
// ================================================================
// TEST GROUP 8: Zero input
// ================================================================
$display("");
$display("--- Test Group 8: Zero Input ---");
for (i = 0; i < N; i = i + 1) begin
in_re[i] = 16'sd0;
in_im[i] = 16'sd0;
end
run_fft(0);
max_err = 0;
for (i = 0; i < N; i = i + 1) begin
err = out_re[i];
if (err < 0) err = -err;
if (err > max_err) max_err = err;
err = out_im[i];
if (err < 0) err = -err;
if (err > max_err) max_err = err;
end
check(max_err == 0, "Zero input: all output bins = 0");
// ================================================================
// SUMMARY
// ================================================================
$display("");
$display("============================================================");
$display(" RESULTS: %0d/%0d passed", pass_count, pass_count + fail_count);
if (fail_count == 0)
$display(" ALL TESTS PASSED");
else
$display(" SOME TESTS FAILED");
$display("============================================================");
$finish;
end
endmodule
+543
View File
@@ -0,0 +1,543 @@
`timescale 1ns / 1ps
/**
* tb_mf_chain_synth.v
*
* Testbench for the SYNTHESIS branch of matched_filter_processing_chain.v.
* This is compiled WITHOUT -DSIMULATION so the `else` branch (fft_engine-based)
* is activated.
*
* The synthesis branch uses an iterative fft_engine (1024-pt, single butterfly),
* so processing takes ~40K+ clock cycles per frame. Timeouts are set accordingly.
*/
module tb_mf_chain_synth;
// Parameters
localparam CLK_PERIOD = 10.0; // 100 MHz
localparam FFT_SIZE = 1024;
// Timeout for full frame processing:
// 3 FFTs × ~12K cycles each + multiply ~1K + overhead 40K
// Use 200K for safety margin
localparam FRAME_TIMEOUT = 200000;
// Signals
reg clk;
reg reset_n;
reg [15:0] adc_data_i;
reg [15:0] adc_data_q;
reg adc_valid;
reg [5:0] chirp_counter;
reg [15:0] long_chirp_real;
reg [15:0] long_chirp_imag;
reg [15:0] short_chirp_real;
reg [15:0] short_chirp_imag;
wire signed [15:0] range_profile_i;
wire signed [15:0] range_profile_q;
wire range_profile_valid;
wire [3:0] chain_state;
// Test bookkeeping
integer pass_count;
integer fail_count;
integer test_num;
integer i;
// Synthesis-branch states (mirror DUT)
localparam [3:0] ST_IDLE = 4'd0,
ST_COLLECT = 4'd1,
ST_SIG_FFT = 4'd2,
ST_SIG_CAP = 4'd3,
ST_REF_FFT = 4'd4,
ST_REF_CAP = 4'd5,
ST_MULTIPLY = 4'd6,
ST_INV_FFT = 4'd7,
ST_INV_CAP = 4'd8,
ST_OUTPUT = 4'd9,
ST_DONE = 4'd10;
// Concurrent output capture
integer cap_count;
reg cap_enable;
integer cap_max_abs;
integer cap_peak_bin;
integer cap_cur_abs;
// Output capture arrays
reg signed [15:0] cap_out_i [0:1023];
reg signed [15:0] cap_out_q [0:1023];
// Clock
always #(CLK_PERIOD/2) clk = ~clk;
// DUT
matched_filter_processing_chain uut (
.clk (clk),
.reset_n (reset_n),
.adc_data_i (adc_data_i),
.adc_data_q (adc_data_q),
.adc_valid (adc_valid),
.chirp_counter (chirp_counter),
.long_chirp_real (long_chirp_real),
.long_chirp_imag (long_chirp_imag),
.short_chirp_real (short_chirp_real),
.short_chirp_imag (short_chirp_imag),
.range_profile_i (range_profile_i),
.range_profile_q (range_profile_q),
.range_profile_valid (range_profile_valid),
.chain_state (chain_state)
);
// Concurrent output capture block
always @(posedge clk) begin
#1;
if (cap_enable && range_profile_valid) begin
if (cap_count < FFT_SIZE) begin
cap_out_i[cap_count] = range_profile_i;
cap_out_q[cap_count] = range_profile_q;
end
cap_cur_abs = (range_profile_i[15] ? -range_profile_i : range_profile_i)
+ (range_profile_q[15] ? -range_profile_q : range_profile_q);
if (cap_cur_abs > cap_max_abs) begin
cap_max_abs = cap_cur_abs;
cap_peak_bin = cap_count;
end
cap_count = cap_count + 1;
end
end
// Check task
task check;
input cond;
input [511:0] label;
begin
test_num = test_num + 1;
if (cond) begin
$display("[PASS] Test %0d: %0s", test_num, label);
pass_count = pass_count + 1;
end else begin
$display("[FAIL] Test %0d: %0s", test_num, label);
fail_count = fail_count + 1;
end
end
endtask
// Helper: apply reset
task apply_reset;
begin
reset_n = 0;
adc_valid = 0;
adc_data_i = 16'd0;
adc_data_q = 16'd0;
chirp_counter = 6'd0;
long_chirp_real = 16'd0;
long_chirp_imag = 16'd0;
short_chirp_real = 16'd0;
short_chirp_imag = 16'd0;
cap_enable = 0;
cap_count = 0;
cap_max_abs = 0;
cap_peak_bin = -1;
repeat (4) @(posedge clk);
reset_n = 1;
@(posedge clk);
#1;
end
endtask
// Helper: start capture
task start_capture;
begin
cap_count = 0;
cap_max_abs = 0;
cap_peak_bin = -1;
cap_enable = 1;
end
endtask
// Helper: wait for IDLE with long timeout
task wait_for_idle;
integer wait_count;
begin
wait_count = 0;
while (chain_state != ST_IDLE && wait_count < FRAME_TIMEOUT) begin
@(posedge clk);
wait_count = wait_count + 1;
end
#1;
if (wait_count >= FRAME_TIMEOUT)
$display(" WARNING: wait_for_idle timed out at %0d cycles", wait_count);
end
endtask
// Helper: feed DC frame
task feed_dc_frame;
integer k;
begin
for (k = 0; k < FFT_SIZE; k = k + 1) begin
adc_data_i = 16'sh1000; // +4096
adc_data_q = 16'sh0000;
long_chirp_real = 16'sh1000;
long_chirp_imag = 16'sh0000;
short_chirp_real = 16'd0;
short_chirp_imag = 16'd0;
adc_valid = 1'b1;
@(posedge clk);
#1;
end
adc_valid = 1'b0;
end
endtask
// Helper: feed tone frame (signal=reference=tone at bin)
task feed_tone_frame;
input integer tone_bin;
integer k;
real angle;
begin
for (k = 0; k < FFT_SIZE; k = k + 1) begin
angle = 6.28318530718 * tone_bin * k / (1.0 * FFT_SIZE);
adc_data_i = $rtoi(8000.0 * $cos(angle));
adc_data_q = $rtoi(8000.0 * $sin(angle));
long_chirp_real = $rtoi(8000.0 * $cos(angle));
long_chirp_imag = $rtoi(8000.0 * $sin(angle));
short_chirp_real = 16'd0;
short_chirp_imag = 16'd0;
adc_valid = 1'b1;
@(posedge clk);
#1;
end
adc_valid = 1'b0;
end
endtask
// Helper: feed impulse frame (delta at sample 0)
task feed_impulse_frame;
integer k;
begin
for (k = 0; k < FFT_SIZE; k = k + 1) begin
if (k == 0) begin
adc_data_i = 16'sh4000; // 0.5 in Q15
adc_data_q = 16'sh0000;
long_chirp_real = 16'sh4000;
long_chirp_imag = 16'sh0000;
end else begin
adc_data_i = 16'sh0000;
adc_data_q = 16'sh0000;
long_chirp_real = 16'sh0000;
long_chirp_imag = 16'sh0000;
end
short_chirp_real = 16'd0;
short_chirp_imag = 16'd0;
adc_valid = 1'b1;
@(posedge clk);
#1;
end
adc_valid = 1'b0;
end
endtask
// Stimulus
initial begin
$dumpfile("tb_mf_chain_synth.vcd");
$dumpvars(0, tb_mf_chain_synth);
// Init
clk = 0;
pass_count = 0;
fail_count = 0;
test_num = 0;
cap_enable = 0;
cap_count = 0;
cap_max_abs = 0;
cap_peak_bin = -1;
//
// TEST GROUP 1: Reset behaviour
//
$display("\n--- Test Group 1: Reset Behaviour ---");
apply_reset;
reset_n = 0;
repeat (4) @(posedge clk); #1;
check(range_profile_valid === 1'b0, "range_profile_valid=0 during reset");
check(chain_state === ST_IDLE, "chain_state=IDLE during reset");
reset_n = 1;
@(posedge clk); #1;
//
// TEST GROUP 2: No valid input stays IDLE
//
$display("\n--- Test Group 2: No Valid Input Stays IDLE ---");
apply_reset;
repeat (100) @(posedge clk);
#1;
check(chain_state === ST_IDLE, "Stays in IDLE with no valid input");
check(range_profile_valid === 1'b0, "No output when no input");
//
// TEST GROUP 3: DC frame state transitions and output count
//
$display("\n--- Test Group 3: DC Frame Full Processing ---");
apply_reset;
start_capture;
feed_dc_frame;
$display(" Waiting for processing (3 FFTs + multiply)...");
wait_for_idle;
cap_enable = 0;
$display(" Output count: %0d (expected %0d)", cap_count, FFT_SIZE);
$display(" Peak bin: %0d, magnitude: %0d", cap_peak_bin, cap_max_abs);
check(cap_count == FFT_SIZE, "DC: Outputs exactly 1024 range profile samples");
check(chain_state === ST_IDLE, "DC: Returns to IDLE after frame");
// DC autocorrelation: FFT of DC = energy at bin 0 only
// conj multiply = |bin0|^2 at bin 0, zeros elsewhere
// IFFT of single bin = constant => peak at bin 0 (or any bin since all equal)
// With Q15 truncation, expect non-zero output
check(cap_max_abs > 0, "DC: Non-zero output");
//
// TEST GROUP 4: Zero input zero output
//
$display("\n--- Test Group 4: Zero Input Zero Output ---");
apply_reset;
start_capture;
for (i = 0; i < FFT_SIZE; i = i + 1) begin
adc_data_i = 16'd0;
adc_data_q = 16'd0;
long_chirp_real = 16'd0;
long_chirp_imag = 16'd0;
short_chirp_real = 16'd0;
short_chirp_imag = 16'd0;
adc_valid = 1'b1;
@(posedge clk); #1;
end
adc_valid = 1'b0;
wait_for_idle;
cap_enable = 0;
$display(" Output count: %0d", cap_count);
$display(" Max magnitude: %0d", cap_max_abs);
check(cap_count == FFT_SIZE, "Zero: Got 1024 output samples");
// Allow small rounding noise (fft_engine Q15 rounding can produce ±1)
check(cap_max_abs <= 2, "Zero: Output magnitude <= 2 (near zero)");
//
// TEST GROUP 5: Tone autocorrelation (bin 5)
// signal = reference = tone at bin 5
// Autocorrelation peak at bin 0 (time lag 0)
//
$display("\n--- Test Group 5: Tone Autocorrelation (bin 5) ---");
apply_reset;
start_capture;
feed_tone_frame(5);
$display(" Waiting for processing...");
wait_for_idle;
cap_enable = 0;
$display(" Output count: %0d", cap_count);
$display(" Peak bin: %0d, magnitude: %0d", cap_peak_bin, cap_max_abs);
check(cap_count == FFT_SIZE, "Tone: Got 1024 output samples");
// Autocorrelation of a pure tone: peak at bin 0
check(cap_peak_bin <= 5 || cap_peak_bin >= FFT_SIZE - 5,
"Tone: Autocorrelation peak near bin 0");
check(cap_max_abs > 0, "Tone: Peak magnitude > 0");
//
// TEST GROUP 6: Impulse autocorrelation
//
$display("\n--- Test Group 6: Impulse Autocorrelation ---");
apply_reset;
start_capture;
feed_impulse_frame;
$display(" Waiting for processing...");
wait_for_idle;
cap_enable = 0;
$display(" Output count: %0d", cap_count);
$display(" Peak bin: %0d, magnitude: %0d", cap_peak_bin, cap_max_abs);
check(cap_count == FFT_SIZE, "Impulse: Got 1024 output samples");
check(cap_max_abs > 0, "Impulse: Non-zero output");
check(chain_state === ST_IDLE, "Impulse: Returns to IDLE");
//
// TEST GROUP 7: Reset mid-operation
//
$display("\n--- Test Group 7: Reset Mid-Operation ---");
apply_reset;
// Feed ~512 samples (halfway through collection)
for (i = 0; i < 512; i = i + 1) begin
adc_data_i = 16'sh1000;
adc_data_q = 16'sh0000;
long_chirp_real = 16'sh1000;
long_chirp_imag = 16'sh0000;
short_chirp_real = 16'd0;
short_chirp_imag = 16'd0;
adc_valid = 1'b1;
@(posedge clk); #1;
end
adc_valid = 1'b0;
// Assert reset
reset_n = 0;
repeat (4) @(posedge clk); #1;
reset_n = 1;
@(posedge clk); #1;
check(chain_state === ST_IDLE, "Mid-op reset: Returns to IDLE");
check(range_profile_valid === 1'b0, "Mid-op reset: No output");
// Feed a complete frame after reset
start_capture;
feed_dc_frame;
wait_for_idle;
cap_enable = 0;
$display(" Post-reset frame: %0d outputs", cap_count);
check(cap_count == FFT_SIZE, "Mid-op reset: Post-reset frame gives 1024 outputs");
//
// TEST GROUP 8: Back-to-back frames
//
$display("\n--- Test Group 8: Back-to-Back Frames ---");
apply_reset;
// Frame 1
start_capture;
feed_dc_frame;
wait_for_idle;
cap_enable = 0;
$display(" Frame 1: %0d outputs, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs);
check(cap_count == FFT_SIZE, "B2B Frame 1: 1024 outputs");
// Frame 2
start_capture;
feed_tone_frame(3);
wait_for_idle;
cap_enable = 0;
$display(" Frame 2: %0d outputs, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs);
check(cap_count == FFT_SIZE, "B2B Frame 2: 1024 outputs");
//
// TEST GROUP 9: Mismatched signal vs reference
// Signal at bin 5, reference at bin 10
//
$display("\n--- Test Group 9: Mismatched Signal vs Reference ---");
apply_reset;
start_capture;
for (i = 0; i < FFT_SIZE; i = i + 1) begin
adc_data_i = $rtoi(8000.0 * $cos(6.28318530718 * 5 * i / 1024.0));
adc_data_q = $rtoi(8000.0 * $sin(6.28318530718 * 5 * i / 1024.0));
long_chirp_real = $rtoi(8000.0 * $cos(6.28318530718 * 10 * i / 1024.0));
long_chirp_imag = $rtoi(8000.0 * $sin(6.28318530718 * 10 * i / 1024.0));
short_chirp_real = 16'd0;
short_chirp_imag = 16'd0;
adc_valid = 1'b1;
@(posedge clk); #1;
end
adc_valid = 1'b0;
wait_for_idle;
cap_enable = 0;
$display(" Mismatched: peak bin=%0d, magnitude=%0d", cap_peak_bin, cap_max_abs);
check(cap_count == FFT_SIZE, "Mismatch: Got 1024 output samples");
// Signal=bin5, ref=bin10: product has energy at bin(5-10)=bin(-5)=bin(1019)
// IFFT of that gives a tone at sample spacing of 5
// The key check is that it completes and produces output
check(cap_max_abs > 0, "Mismatch: Non-zero output");
check(chain_state === ST_IDLE, "Mismatch: Returns to IDLE");
//
// TEST GROUP 10: Saturation max positive values
//
$display("\n--- Test Group 10: Saturation Max Positive ---");
apply_reset;
start_capture;
for (i = 0; i < FFT_SIZE; i = i + 1) begin
adc_data_i = 16'sh7FFF;
adc_data_q = 16'sh7FFF;
long_chirp_real = 16'sh7FFF;
long_chirp_imag = 16'sh7FFF;
short_chirp_real = 16'd0;
short_chirp_imag = 16'd0;
adc_valid = 1'b1;
@(posedge clk); #1;
end
adc_valid = 1'b0;
wait_for_idle;
cap_enable = 0;
$display(" Saturation: count=%0d, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs);
check(cap_count == FFT_SIZE, "Saturation: Completes with 1024 outputs");
check(chain_state === ST_IDLE, "Saturation: Returns to IDLE");
//
// TEST GROUP 11: Valid-gap / stall test
//
$display("\n--- Test Group 11: Valid-Gap Stall Test ---");
apply_reset;
start_capture;
for (i = 0; i < FFT_SIZE; i = i + 1) begin
adc_data_i = 16'sh1000;
adc_data_q = 16'sh0000;
long_chirp_real = 16'sh1000;
long_chirp_imag = 16'sh0000;
short_chirp_real = 16'd0;
short_chirp_imag = 16'd0;
adc_valid = 1'b1;
@(posedge clk); #1;
// Every 100 samples, insert a 10-cycle gap
if ((i % 100) == 99 && i < FFT_SIZE - 1) begin : stall_block
integer gap_j;
adc_valid = 1'b0;
for (gap_j = 0; gap_j < 10; gap_j = gap_j + 1) begin
@(posedge clk); #1;
end
end
end
adc_valid = 1'b0;
wait_for_idle;
cap_enable = 0;
$display(" Stall: count=%0d, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs);
check(cap_count == FFT_SIZE, "Stall: 1024 outputs emitted");
check(chain_state === ST_IDLE, "Stall: Returns to IDLE");
//
// Summary
//
$display("");
$display("========================================");
$display(" MATCHED FILTER PROCESSING CHAIN");
$display(" (SYNTHESIS BRANCH fft_engine)");
$display(" PASSED: %0d / %0d", pass_count, test_num);
$display(" FAILED: %0d / %0d", fail_count, test_num);
if (fail_count == 0)
$display(" ** ALL TESTS PASSED **");
else
$display(" ** SOME TESTS FAILED **");
$display("========================================");
$display("");
#100;
$finish;
end
endmodule
+355
View File
@@ -0,0 +1,355 @@
`timescale 1ns / 1ps
/**
* tb_xfft_32.v
*
* Testbench for xfft_32 AXI-Stream FFT wrapper.
* Verifies the wrapper correctly interfaces with fft_engine via AXI-Stream.
*
* Test Groups:
* 1. Impulse response (all output bins = input amplitude)
* 2. DC input (bin 0 = A*N, rest ~= 0)
* 3. Single tone detection
* 4. AXI-Stream handshake correctness (tvalid, tlast, tready)
* 5. Back-to-back transforms (no state leakage)
*/
module tb_xfft_32;
// ============================================================================
// PARAMETERS
// ============================================================================
localparam N = 32;
localparam CLK_PERIOD = 10;
// ============================================================================
// SIGNALS
// ============================================================================
reg aclk, aresetn;
reg [7:0] cfg_tdata;
reg cfg_tvalid;
wire cfg_tready;
reg [31:0] din_tdata;
reg din_tvalid;
reg din_tlast;
wire [31:0] dout_tdata;
wire dout_tvalid;
wire dout_tlast;
reg dout_tready;
// ============================================================================
// DUT
// ============================================================================
xfft_32 dut (
.aclk(aclk),
.aresetn(aresetn),
.s_axis_config_tdata(cfg_tdata),
.s_axis_config_tvalid(cfg_tvalid),
.s_axis_config_tready(cfg_tready),
.s_axis_data_tdata(din_tdata),
.s_axis_data_tvalid(din_tvalid),
.s_axis_data_tlast(din_tlast),
.m_axis_data_tdata(dout_tdata),
.m_axis_data_tvalid(dout_tvalid),
.m_axis_data_tlast(dout_tlast),
.m_axis_data_tready(dout_tready)
);
// ============================================================================
// CLOCK
// ============================================================================
initial aclk = 0;
always #(CLK_PERIOD/2) aclk = ~aclk;
// ============================================================================
// PASS/FAIL TRACKING
// ============================================================================
integer pass_count, fail_count;
task check;
input cond;
input [512*8-1:0] label;
begin
if (cond) begin
$display(" [PASS] %0s", label);
pass_count = pass_count + 1;
end else begin
$display(" [FAIL] %0s", label);
fail_count = fail_count + 1;
end
end
endtask
// ============================================================================
// OUTPUT CAPTURE
// ============================================================================
reg signed [15:0] out_re [0:N-1];
reg signed [15:0] out_im [0:N-1];
integer out_idx;
reg got_tlast;
integer tlast_count;
// ============================================================================
// HELPER TASKS
// ============================================================================
task do_reset;
begin
aresetn = 0;
cfg_tdata = 0;
cfg_tvalid = 0;
din_tdata = 0;
din_tvalid = 0;
din_tlast = 0;
dout_tready = 1;
repeat(5) @(posedge aclk);
aresetn = 1;
repeat(2) @(posedge aclk);
end
endtask
// Send config (forward FFT: tdata[0]=1)
// Waits for cfg_tready (wrapper in S_IDLE) before sending
task send_config;
input [7:0] cfg;
integer wait_cnt;
begin
// Wait for wrapper to be ready (S_IDLE)
wait_cnt = 0;
while (!cfg_tready && wait_cnt < 5000) begin
@(posedge aclk);
wait_cnt = wait_cnt + 1;
end
cfg_tdata = cfg;
cfg_tvalid = 1;
@(posedge aclk);
cfg_tvalid = 0;
cfg_tdata = 0;
end
endtask
// Feed N samples: each sample is {im[15:0], re[15:0]}
// in_re_arr and in_im_arr must be pre-loaded
reg signed [15:0] feed_re [0:N-1];
reg signed [15:0] feed_im [0:N-1];
task feed_data;
integer i;
begin
for (i = 0; i < N; i = i + 1) begin
din_tdata = {feed_im[i], feed_re[i]};
din_tvalid = 1;
din_tlast = (i == N - 1) ? 1 : 0;
@(posedge aclk);
end
din_tvalid = 0;
din_tlast = 0;
din_tdata = 0;
end
endtask
// Capture N output samples
task capture_output;
integer timeout;
begin
out_idx = 0;
got_tlast = 0;
tlast_count = 0;
timeout = 0;
while (out_idx < N && timeout < 5000) begin
@(posedge aclk);
if (dout_tvalid && dout_tready) begin
out_re[out_idx] = dout_tdata[15:0];
out_im[out_idx] = dout_tdata[31:16];
if (dout_tlast) begin
got_tlast = 1;
tlast_count = tlast_count + 1;
end
out_idx = out_idx + 1;
end
timeout = timeout + 1;
end
end
endtask
// ============================================================================
// VCD
// ============================================================================
initial begin
$dumpfile("tb_xfft_32.vcd");
$dumpvars(0, tb_xfft_32);
end
// ============================================================================
// MAIN TEST
// ============================================================================
integer i;
reg signed [31:0] err;
integer max_err;
integer max_mag_bin;
reg signed [31:0] max_mag, mag;
real angle;
initial begin
pass_count = 0;
fail_count = 0;
$display("============================================================");
$display(" xfft_32 AXI-Stream Wrapper Testbench");
$display("============================================================");
do_reset;
// ================================================================
// TEST 1: Impulse Response
// ================================================================
$display("");
$display("--- Test 1: Impulse Response ---");
for (i = 0; i < N; i = i + 1) begin
feed_re[i] = (i == 0) ? 16'sd1000 : 16'sd0;
feed_im[i] = 16'sd0;
end
send_config(8'h01); // Forward FFT
feed_data;
capture_output;
check(out_idx == N, "Received N output samples");
check(got_tlast == 1, "Got tlast on output");
max_err = 0;
for (i = 0; i < N; i = i + 1) begin
err = out_re[i] - 1000;
if (err < 0) err = -err;
if (err > max_err) max_err = err;
err = out_im[i];
if (err < 0) err = -err;
if (err > max_err) max_err = err;
end
$display(" Impulse max error: %0d", max_err);
check(max_err < 10, "Impulse: all bins ~= 1000");
// ================================================================
// TEST 2: DC Input
// ================================================================
$display("");
$display("--- Test 2: DC Input ---");
for (i = 0; i < N; i = i + 1) begin
feed_re[i] = 16'sd100;
feed_im[i] = 16'sd0;
end
send_config(8'h01);
feed_data;
capture_output;
$display(" DC bin[0] = %0d + j%0d (expect ~3200)", out_re[0], out_im[0]);
check(out_re[0] >= 3100 && out_re[0] <= 3300, "DC: bin 0 ~= 3200 (5% tol)");
max_err = 0;
for (i = 1; i < N; i = i + 1) begin
err = out_re[i]; if (err < 0) err = -err;
if (err > max_err) max_err = err;
err = out_im[i]; if (err < 0) err = -err;
if (err > max_err) max_err = err;
end
$display(" DC max non-DC: %0d", max_err);
check(max_err < 25, "DC: non-DC bins ~= 0");
// ================================================================
// TEST 3: Single Tone (bin 4)
// ================================================================
$display("");
$display("--- Test 3: Single Tone (bin 4) ---");
for (i = 0; i < N; i = i + 1) begin
angle = 6.28318530718 * 4.0 * i / 32.0;
feed_re[i] = $rtoi($cos(angle) * 1000.0);
feed_im[i] = 16'sd0;
end
send_config(8'h01);
feed_data;
capture_output;
max_mag = 0;
max_mag_bin = 0;
for (i = 0; i < N; i = i + 1) begin
mag = out_re[i] * out_re[i] + out_im[i] * out_im[i];
if (mag > max_mag) begin
max_mag = mag;
max_mag_bin = i;
end
end
$display(" Tone peak bin: %0d (expect 4 or 28)", max_mag_bin);
check(max_mag_bin == 4 || max_mag_bin == 28, "Tone: peak at bin 4 or 28");
// ================================================================
// TEST 4: Back-to-back transforms
// ================================================================
$display("");
$display("--- Test 4: Back-to-Back Transforms ---");
// First: impulse
for (i = 0; i < N; i = i + 1) begin
feed_re[i] = (i == 0) ? 16'sd500 : 16'sd0;
feed_im[i] = 16'sd0;
end
send_config(8'h01);
feed_data;
capture_output;
check(out_idx == N, "Back-to-back 1st: got N outputs");
// Second: DC immediately after
for (i = 0; i < N; i = i + 1) begin
feed_re[i] = 16'sd50;
feed_im[i] = 16'sd0;
end
send_config(8'h01);
feed_data;
capture_output;
check(out_idx == N, "Back-to-back 2nd: got N outputs");
$display(" 2nd transform bin[0] = %0d (expect ~1600)", out_re[0]);
check(out_re[0] >= 1500 && out_re[0] <= 1700, "Back-to-back 2nd: bin 0 ~= 1600");
// ================================================================
// TEST 5: Zero input
// ================================================================
$display("");
$display("--- Test 5: Zero Input ---");
for (i = 0; i < N; i = i + 1) begin
feed_re[i] = 16'sd0;
feed_im[i] = 16'sd0;
end
send_config(8'h01);
feed_data;
capture_output;
max_err = 0;
for (i = 0; i < N; i = i + 1) begin
err = out_re[i]; if (err < 0) err = -err;
if (err > max_err) max_err = err;
err = out_im[i]; if (err < 0) err = -err;
if (err > max_err) max_err = err;
end
check(max_err == 0, "Zero input: all outputs = 0");
// ================================================================
// SUMMARY
// ================================================================
$display("");
$display("============================================================");
$display(" RESULTS: %0d/%0d passed", pass_count, pass_count + fail_count);
if (fail_count == 0)
$display(" ALL TESTS PASSED");
else
$display(" SOME TESTS FAILED");
$display("============================================================");
$finish;
end
endmodule
+240 -33
View File
@@ -1,18 +1,15 @@
`timescale 1ns / 1ps `timescale 1ns / 1ps
// ============================================================================ // ============================================================================
// xfft_32.v Synthesis stub for Xilinx 32-point FFT IP core // xfft_32.v 32-point FFT with AXI-Stream interface
// ============================================================================ // ============================================================================
// This is a PLACEHOLDER module that provides the port interface expected by // Wraps the synthesizable fft_engine (radix-2 DIT) with the AXI-Stream port
// doppler_processor.v. It does NOT perform an actual FFT it simply passes // interface expected by doppler_processor.v.
// input data through with a one-cycle latency and generates proper AXI-Stream
// handshake signals.
//
// For real hardware, replace this stub with either:
// (a) A Xilinx FFT IP core generated via Vivado IP Catalog, or
// (b) A custom synthesizable radix-2 DIT 32-point FFT in Verilog.
// //
// Port interface matches the Xilinx LogiCORE IP Fast Fourier Transform // Port interface matches the Xilinx LogiCORE IP Fast Fourier Transform
// (AXI-Stream variant) as instantiated in doppler_processor.v. // (AXI-Stream variant) as instantiated in doppler_processor.v.
//
// Data format: {Q[15:0], I[15:0]} packed 32-bit.
// Config tdata[0]: 1 = forward FFT, 0 = inverse FFT.
// ============================================================================ // ============================================================================
module xfft_32 ( module xfft_32 (
@@ -36,36 +33,246 @@ module xfft_32 (
input wire m_axis_data_tready input wire m_axis_data_tready
); );
// ---------------------------------------------------------------------------- // ============================================================================
// Synthesis stub: pass-through with one-cycle latency // PARAMETERS
// ---------------------------------------------------------------------------- // ============================================================================
// This gives Vivado a real module to synthesize so it can check port localparam N = 32;
// connectivity, infer timing paths, and estimate utilization. The actual localparam LOG2N = 5;
// FFT computation is deferred to IP integration or a custom RTL FFT.
// ----------------------------------------------------------------------------
// Always accept config // ============================================================================
assign s_axis_config_tready = 1'b1; // INTERNAL SIGNALS
// ============================================================================
// Pipeline registers for data pass-through // FSM states
reg [31:0] data_reg; localparam [2:0] S_IDLE = 3'd0,
reg valid_reg; S_CONFIG = 3'd1, // Latch config (fwd/inv)
reg last_reg; S_FEED = 3'd2, // Feed input to FFT engine
S_WAIT = 3'd3, // Wait for FFT to complete
S_OUTPUT = 3'd4; // Stream output
reg [2:0] state;
// Configuration
reg inverse_reg;
// Input buffering
reg signed [15:0] in_buf_re [0:N-1];
reg signed [15:0] in_buf_im [0:N-1];
reg [5:0] in_count; // 0..31 for loading, extra bit for overflow check
// Output buffering
reg signed [15:0] out_buf_re [0:N-1];
reg signed [15:0] out_buf_im [0:N-1];
reg [5:0] out_count;
reg [5:0] out_total; // counts how many outputs captured from engine
// FFT engine interface
reg fft_start;
reg fft_inverse;
reg signed [15:0] fft_din_re, fft_din_im;
reg fft_din_valid;
wire signed [15:0] fft_dout_re, fft_dout_im;
wire fft_dout_valid;
wire fft_busy;
wire fft_done;
// Feed counter for streaming into engine
reg [5:0] feed_count;
// ============================================================================
// FFT ENGINE INSTANCE
// ============================================================================
fft_engine #(
.N(N),
.LOG2N(LOG2N),
.DATA_W(16),
.INTERNAL_W(32),
.TWIDDLE_W(16),
.TWIDDLE_FILE("fft_twiddle_32.mem")
) fft_core (
.clk(aclk),
.reset_n(aresetn),
.start(fft_start),
.inverse(fft_inverse),
.din_re(fft_din_re),
.din_im(fft_din_im),
.din_valid(fft_din_valid),
.dout_re(fft_dout_re),
.dout_im(fft_dout_im),
.dout_valid(fft_dout_valid),
.busy(fft_busy),
.done(fft_done)
);
// ============================================================================
// AXI-STREAM OUTPUTS
// ============================================================================
// Config is accepted when idle
assign s_axis_config_tready = (state == S_IDLE);
// Output data: {Q, I} packed
assign m_axis_data_tdata = {out_buf_im[out_count[4:0]], out_buf_re[out_count[4:0]]};
assign m_axis_data_tvalid = (state == S_OUTPUT) && (out_count < N);
assign m_axis_data_tlast = (state == S_OUTPUT) && (out_count == N - 1);
// ============================================================================
// BUFFER WRITE LOGIC separate always block, NO async reset
// Allows Vivado to infer distributed RAM instead of dissolving into registers.
// ============================================================================
// Input buffer write enable
reg in_buf_we;
reg [4:0] in_buf_waddr;
reg signed [15:0] in_buf_wdata_re, in_buf_wdata_im;
// Output buffer write enable
reg out_buf_we;
reg [4:0] out_buf_waddr;
reg signed [15:0] out_buf_wdata_re, out_buf_wdata_im;
always @(posedge aclk) begin always @(posedge aclk) begin
if (!aresetn) begin if (in_buf_we) begin
data_reg <= 32'd0; in_buf_re[in_buf_waddr] <= in_buf_wdata_re;
valid_reg <= 1'b0; in_buf_im[in_buf_waddr] <= in_buf_wdata_im;
last_reg <= 1'b0; end
end else begin if (out_buf_we) begin
data_reg <= s_axis_data_tdata; out_buf_re[out_buf_waddr] <= out_buf_wdata_re;
valid_reg <= s_axis_data_tvalid; out_buf_im[out_buf_waddr] <= out_buf_wdata_im;
last_reg <= s_axis_data_tlast;
end end
end end
assign m_axis_data_tdata = data_reg; // ============================================================================
assign m_axis_data_tvalid = valid_reg; // MAIN FSM
assign m_axis_data_tlast = last_reg; // ============================================================================
always @(posedge aclk or negedge aresetn) begin
if (!aresetn) begin
state <= S_IDLE;
inverse_reg <= 1'b0;
in_count <= 0;
out_count <= 0;
out_total <= 0;
feed_count <= 0;
fft_start <= 1'b0;
fft_inverse <= 1'b0;
fft_din_re <= 0;
fft_din_im <= 0;
fft_din_valid <= 1'b0;
in_buf_we <= 1'b0;
in_buf_waddr <= 0;
in_buf_wdata_re <= 0;
in_buf_wdata_im <= 0;
out_buf_we <= 1'b0;
out_buf_waddr <= 0;
out_buf_wdata_re <= 0;
out_buf_wdata_im <= 0;
end else begin
// Defaults
fft_start <= 1'b0;
fft_din_valid <= 1'b0;
in_buf_we <= 1'b0;
out_buf_we <= 1'b0;
case (state)
// ================================================================
S_IDLE: begin
in_count <= 0;
if (s_axis_config_tvalid) begin
// Config tdata[0]: 1=forward, 0=inverse
// fft_engine: inverse=0 means forward, inverse=1 means inverse
inverse_reg <= ~s_axis_config_tdata[0];
state <= S_FEED;
in_count <= 0;
feed_count <= 0;
end
end
// ================================================================
// S_FEED: Buffer all N inputs first, then start engine.
// ================================================================
S_FEED: begin
if (in_count < N) begin
// Still accepting input data
if (s_axis_data_tvalid) begin
in_buf_we <= 1'b1;
in_buf_waddr <= in_count[4:0];
in_buf_wdata_re <= s_axis_data_tdata[15:0];
in_buf_wdata_im <= s_axis_data_tdata[31:16];
in_count <= in_count + 1;
end
end else if (feed_count == 0) begin
// All N inputs buffered, start the FFT engine
fft_start <= 1'b1;
fft_inverse <= inverse_reg;
feed_count <= 0;
state <= S_WAIT;
out_total <= 0;
end
end
// ================================================================
// S_WAIT: Feed buffered data to engine, then wait for output
// ================================================================
S_WAIT: begin
if (feed_count < N) begin
fft_din_re <= in_buf_re[feed_count[4:0]];
fft_din_im <= in_buf_im[feed_count[4:0]];
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1;
end
// Capture engine outputs
if (fft_dout_valid && out_total < N) begin
out_buf_we <= 1'b1;
out_buf_waddr <= out_total[4:0];
out_buf_wdata_re <= fft_dout_re;
out_buf_wdata_im <= fft_dout_im;
out_total <= out_total + 1;
end
// Engine done
if (fft_done) begin
state <= S_OUTPUT;
out_count <= 0;
end
end
// ================================================================
// S_OUTPUT: Stream buffered results via AXI-Stream master
// ================================================================
S_OUTPUT: begin
if (m_axis_data_tready || !m_axis_data_tvalid) begin
if (out_count < N) begin
// m_axis_data_tdata driven combinationally from out_buf
if (m_axis_data_tready) begin
out_count <= out_count + 1;
end
end
if (out_count >= N - 1 && m_axis_data_tready) begin
state <= S_IDLE;
end
end
end
default: state <= S_IDLE;
endcase
end
end
// ============================================================================
// MEMORY INIT (simulation only)
// ============================================================================
`ifdef SIMULATION
integer init_k;
initial begin
for (init_k = 0; init_k < N; init_k = init_k + 1) begin
in_buf_re[init_k] = 0;
in_buf_im[init_k] = 0;
out_buf_re[init_k] = 0;
out_buf_im[init_k] = 0;
end
end
`endif
endmodule endmodule