Replace FFT stubs with synthesizable radix-2 DIT engine, fix BRAM inference
Implement iterative single-butterfly FFT engine (fft_engine.v) supporting 1024-pt and 32-pt transforms with quarter-wave twiddle ROM, XPM_MEMORY_TDPRAM for guaranteed BRAM mapping in Vivado, and behavioral model for simulation. Add xfft_32.v AXI-Stream wrapper for doppler_processor integration and dual-branch matched_filter_processing_chain.v (behavioral + synthesis paths). Fix placement failure caused by 68K+ registers from dissolved memory arrays: - doppler_processor.v: extract mem writes to sync-only always block for BRAM - xfft_32.v: extract buffer writes to sync-only always block for LUTRAM Post-implementation: 37K regs (29%), 23K LUTs (37%), 10 BRAM (7%), fully routed. All testbenches pass: fft_engine 12/12, xfft_32 10/10, mf_chain 27/27.
This commit is contained in:
@@ -124,157 +124,188 @@ always @(posedge clk or negedge reset_n) begin
|
||||
end
|
||||
wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1;
|
||||
|
||||
// ==============================================
|
||||
// Main State Machine - FIXED
|
||||
// ==============================================
|
||||
reg [5:0] fft_sample_counter;
|
||||
reg [9:0] processing_timeout;
|
||||
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
if (!reset_n) begin
|
||||
state <= S_IDLE;
|
||||
write_range_bin <= 0;
|
||||
write_chirp_index <= 0;
|
||||
read_range_bin <= 0;
|
||||
read_doppler_index <= 0;
|
||||
frame_buffer_full <= 0;
|
||||
doppler_valid <= 0;
|
||||
fft_start <= 0;
|
||||
fft_input_valid <= 0;
|
||||
fft_input_last <= 0;
|
||||
fft_sample_counter <= 0;
|
||||
processing_timeout <= 0;
|
||||
status <= 0;
|
||||
chirps_received <= 0;
|
||||
chirp_state <= 0;
|
||||
end else begin
|
||||
doppler_valid <= 0;
|
||||
fft_input_valid <= 0;
|
||||
fft_input_last <= 0;
|
||||
|
||||
if (processing_timeout > 0) begin
|
||||
processing_timeout <= processing_timeout - 1;
|
||||
end
|
||||
|
||||
case (state)
|
||||
S_IDLE: begin
|
||||
if (frame_start_pulse) begin
|
||||
// Start new frame
|
||||
write_chirp_index <= 0;
|
||||
write_range_bin <= 0;
|
||||
frame_buffer_full <= 0;
|
||||
chirps_received <= 0;
|
||||
//chirp_state <= 1; // Start accumulating
|
||||
end
|
||||
|
||||
if (data_valid && !frame_buffer_full) begin
|
||||
// ==============================================
|
||||
// Main State Machine - FIXED
|
||||
// ==============================================
|
||||
reg [5:0] fft_sample_counter;
|
||||
reg [9:0] processing_timeout;
|
||||
|
||||
// Memory write enable and data signals (extracted for BRAM inference)
|
||||
reg mem_we;
|
||||
reg [10:0] mem_waddr_r;
|
||||
reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q;
|
||||
|
||||
// Memory read data (registered for BRAM read latency)
|
||||
reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q;
|
||||
|
||||
// ----------------------------------------------------------
|
||||
// Separate always block for memory writes — NO async reset
|
||||
// in sensitivity list, so Vivado can infer Block RAM.
|
||||
// ----------------------------------------------------------
|
||||
always @(posedge clk) begin
|
||||
if (mem_we) begin
|
||||
doppler_i_mem[mem_waddr_r] <= mem_wdata_i;
|
||||
doppler_q_mem[mem_waddr_r] <= mem_wdata_q;
|
||||
end
|
||||
// Registered read — address driven by mem_read_addr from FSM
|
||||
mem_rdata_i <= doppler_i_mem[mem_read_addr];
|
||||
mem_rdata_q <= doppler_q_mem[mem_read_addr];
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------
|
||||
// Main FSM — async reset for control registers only.
|
||||
// Memory arrays are NOT touched here.
|
||||
// ----------------------------------------------------------
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
if (!reset_n) begin
|
||||
state <= S_IDLE;
|
||||
write_range_bin <= 0;
|
||||
write_chirp_index <= 0;
|
||||
read_range_bin <= 0;
|
||||
read_doppler_index <= 0;
|
||||
frame_buffer_full <= 0;
|
||||
doppler_valid <= 0;
|
||||
fft_start <= 0;
|
||||
fft_input_valid <= 0;
|
||||
fft_input_last <= 0;
|
||||
fft_sample_counter <= 0;
|
||||
processing_timeout <= 0;
|
||||
status <= 0;
|
||||
chirps_received <= 0;
|
||||
chirp_state <= 0;
|
||||
mem_we <= 0;
|
||||
mem_waddr_r <= 0;
|
||||
mem_wdata_i <= 0;
|
||||
mem_wdata_q <= 0;
|
||||
mult_i <= 0;
|
||||
mult_q <= 0;
|
||||
fft_input_i <= 0;
|
||||
fft_input_q <= 0;
|
||||
doppler_output <= 0;
|
||||
doppler_bin <= 0;
|
||||
end else begin
|
||||
doppler_valid <= 0;
|
||||
fft_input_valid <= 0;
|
||||
fft_input_last <= 0;
|
||||
mem_we <= 0;
|
||||
|
||||
if (processing_timeout > 0) begin
|
||||
processing_timeout <= processing_timeout - 1;
|
||||
end
|
||||
|
||||
case (state)
|
||||
S_IDLE: begin
|
||||
if (frame_start_pulse) begin
|
||||
// Start new frame
|
||||
write_chirp_index <= 0;
|
||||
write_range_bin <= 0;
|
||||
frame_buffer_full <= 0;
|
||||
chirps_received <= 0;
|
||||
end
|
||||
|
||||
if (data_valid && !frame_buffer_full) begin
|
||||
state <= S_ACCUMULATE;
|
||||
write_range_bin <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
S_ACCUMULATE: begin
|
||||
if (data_valid) begin
|
||||
// Store with proper addressing
|
||||
doppler_i_mem[mem_write_addr] <= range_data[15:0];
|
||||
doppler_q_mem[mem_write_addr] <= range_data[31:16];
|
||||
|
||||
// Debug output to see what's being written
|
||||
// $display("Time=%t: Write addr=%d (chirp=%d, range=%d), Data=%h",
|
||||
// $time, mem_write_addr, write_chirp_index, write_range_bin, range_data);
|
||||
|
||||
// Increment range bin
|
||||
if (write_range_bin < RANGE_BINS - 1) begin
|
||||
write_range_bin <= write_range_bin + 1;
|
||||
end else begin
|
||||
// Completed one chirp
|
||||
write_range_bin <= 0;
|
||||
write_chirp_index <= write_chirp_index + 1;
|
||||
chirps_received <= chirps_received + 1;
|
||||
|
||||
// Check if frame is complete
|
||||
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
|
||||
frame_buffer_full <= 1;
|
||||
chirp_state <= 0; // Stop accumulating
|
||||
// Could automatically start processing here:
|
||||
state <= S_LOAD_FFT;
|
||||
read_range_bin <= 0;
|
||||
read_doppler_index <= 0;
|
||||
fft_sample_counter <= 0;
|
||||
fft_start <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// [Rest of S_LOAD_FFT, S_FFT_WAIT, S_OUTPUT states remain similar]
|
||||
// But with fixed addressing in S_LOAD_FFT:
|
||||
S_LOAD_FFT: begin
|
||||
fft_start <= 0;
|
||||
|
||||
if (fft_sample_counter < DOPPLER_FFT_SIZE) begin
|
||||
// Use correct addressing for reading
|
||||
mult_i <= $signed(doppler_i_mem[mem_read_addr]) *
|
||||
$signed(window_coeff[read_doppler_index]);
|
||||
mult_q <= $signed(doppler_q_mem[mem_read_addr]) *
|
||||
$signed(window_coeff[read_doppler_index]);
|
||||
write_range_bin <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
S_ACCUMULATE: begin
|
||||
if (data_valid) begin
|
||||
// Drive memory write signals (actual write in separate block)
|
||||
mem_we <= 1;
|
||||
mem_waddr_r <= mem_write_addr;
|
||||
mem_wdata_i <= range_data[15:0];
|
||||
mem_wdata_q <= range_data[31:16];
|
||||
|
||||
// Round instead of truncate
|
||||
fft_input_i <= (mult_i + (1 << 14)) >>> 15; // Round to nearest
|
||||
fft_input_q <= (mult_q + (1 << 14)) >>> 15;
|
||||
// Increment range bin
|
||||
if (write_range_bin < RANGE_BINS - 1) begin
|
||||
write_range_bin <= write_range_bin + 1;
|
||||
end else begin
|
||||
// Completed one chirp
|
||||
write_range_bin <= 0;
|
||||
write_chirp_index <= write_chirp_index + 1;
|
||||
chirps_received <= chirps_received + 1;
|
||||
|
||||
// Check if frame is complete
|
||||
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
|
||||
frame_buffer_full <= 1;
|
||||
chirp_state <= 0;
|
||||
state <= S_LOAD_FFT;
|
||||
read_range_bin <= 0;
|
||||
read_doppler_index <= 0;
|
||||
fft_sample_counter <= 0;
|
||||
fft_start <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
S_LOAD_FFT: begin
|
||||
fft_start <= 0;
|
||||
|
||||
if (fft_sample_counter < DOPPLER_FFT_SIZE) begin
|
||||
// Use registered read data (one cycle latency from BRAM)
|
||||
mult_i <= $signed(mem_rdata_i) *
|
||||
$signed(window_coeff[read_doppler_index]);
|
||||
mult_q <= $signed(mem_rdata_q) *
|
||||
$signed(window_coeff[read_doppler_index]);
|
||||
|
||||
fft_input_valid <= 1;
|
||||
|
||||
if (fft_sample_counter == DOPPLER_FFT_SIZE - 1) begin
|
||||
fft_input_last <= 1;
|
||||
end
|
||||
|
||||
// Increment chirp index for next sample
|
||||
read_doppler_index <= read_doppler_index + 1;
|
||||
fft_sample_counter <= fft_sample_counter + 1;
|
||||
end else begin
|
||||
state <= S_FFT_WAIT;
|
||||
fft_sample_counter <= 0;
|
||||
processing_timeout <= 100;
|
||||
end
|
||||
end
|
||||
|
||||
S_FFT_WAIT: begin
|
||||
if (fft_output_valid) begin
|
||||
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]};
|
||||
doppler_bin <= fft_sample_counter;
|
||||
range_bin <= read_range_bin;
|
||||
doppler_valid <= 1;
|
||||
|
||||
fft_sample_counter <= fft_sample_counter + 1;
|
||||
|
||||
if (fft_output_last) begin
|
||||
state <= S_OUTPUT;
|
||||
fft_sample_counter <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (processing_timeout == 0) begin
|
||||
state <= S_OUTPUT;
|
||||
end
|
||||
end
|
||||
|
||||
S_OUTPUT: begin
|
||||
if (read_range_bin < RANGE_BINS - 1) begin
|
||||
read_range_bin <= read_range_bin + 1;
|
||||
read_doppler_index <= 0;
|
||||
state <= S_LOAD_FFT;
|
||||
fft_start <= 1;
|
||||
end else begin
|
||||
state <= S_IDLE;
|
||||
frame_buffer_full <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
endcase
|
||||
|
||||
status <= {state, frame_buffer_full};
|
||||
end
|
||||
// Round instead of truncate
|
||||
fft_input_i <= (mult_i + (1 << 14)) >>> 15;
|
||||
fft_input_q <= (mult_q + (1 << 14)) >>> 15;
|
||||
|
||||
fft_input_valid <= 1;
|
||||
|
||||
if (fft_sample_counter == DOPPLER_FFT_SIZE - 1) begin
|
||||
fft_input_last <= 1;
|
||||
end
|
||||
|
||||
// Increment chirp index for next sample
|
||||
read_doppler_index <= read_doppler_index + 1;
|
||||
fft_sample_counter <= fft_sample_counter + 1;
|
||||
end else begin
|
||||
state <= S_FFT_WAIT;
|
||||
fft_sample_counter <= 0;
|
||||
processing_timeout <= 100;
|
||||
end
|
||||
end
|
||||
|
||||
S_FFT_WAIT: begin
|
||||
if (fft_output_valid) begin
|
||||
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]};
|
||||
doppler_bin <= fft_sample_counter;
|
||||
range_bin <= read_range_bin;
|
||||
doppler_valid <= 1;
|
||||
|
||||
fft_sample_counter <= fft_sample_counter + 1;
|
||||
|
||||
if (fft_output_last) begin
|
||||
state <= S_OUTPUT;
|
||||
fft_sample_counter <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (processing_timeout == 0) begin
|
||||
state <= S_OUTPUT;
|
||||
end
|
||||
end
|
||||
|
||||
S_OUTPUT: begin
|
||||
if (read_range_bin < RANGE_BINS - 1) begin
|
||||
read_range_bin <= read_range_bin + 1;
|
||||
read_doppler_index <= 0;
|
||||
state <= S_LOAD_FFT;
|
||||
fft_start <= 1;
|
||||
end else begin
|
||||
state <= S_IDLE;
|
||||
frame_buffer_full <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
endcase
|
||||
|
||||
status <= {state, frame_buffer_full};
|
||||
end
|
||||
end
|
||||
|
||||
// ==============================================
|
||||
|
||||
@@ -0,0 +1,606 @@
|
||||
`timescale 1ns / 1ps
|
||||
|
||||
/**
|
||||
* fft_engine.v
|
||||
*
|
||||
* Synthesizable parameterized radix-2 DIT FFT/IFFT engine.
|
||||
* Iterative single-butterfly architecture with quarter-wave twiddle ROM.
|
||||
*
|
||||
* Architecture:
|
||||
* - LOAD: Accept N input samples, store bit-reversed in BRAM
|
||||
* - COMPUTE: LOG2N stages x N/2 butterflies, 2-cycle pipeline:
|
||||
* BF_READ: Present BRAM addresses, capture twiddle
|
||||
* BF_CALC: BRAM data valid; butterfly compute + writeback
|
||||
* - OUTPUT: Stream N results (1/N scaling for IFFT)
|
||||
*
|
||||
* Data memory uses xpm_memory_tdpram (Xilinx Parameterized Macros) for
|
||||
* guaranteed BRAM mapping in synthesis. Under `ifdef SIMULATION, a
|
||||
* behavioral Verilog-2001 model replaces the XPM so the design compiles
|
||||
* with Icarus Verilog or any non-Xilinx simulator.
|
||||
*
|
||||
* Clock domain: single clock (clk), active-low async reset (reset_n).
|
||||
*/
|
||||
|
||||
module fft_engine #(
|
||||
parameter N = 1024,
|
||||
parameter LOG2N = 10,
|
||||
parameter DATA_W = 16,
|
||||
parameter INTERNAL_W = 32,
|
||||
parameter TWIDDLE_W = 16,
|
||||
parameter TWIDDLE_FILE = "fft_twiddle_1024.mem"
|
||||
)(
|
||||
input wire clk,
|
||||
input wire reset_n,
|
||||
|
||||
// Control
|
||||
input wire start,
|
||||
input wire inverse,
|
||||
|
||||
// Data input
|
||||
input wire signed [DATA_W-1:0] din_re,
|
||||
input wire signed [DATA_W-1:0] din_im,
|
||||
input wire din_valid,
|
||||
|
||||
// Data output
|
||||
output reg signed [DATA_W-1:0] dout_re,
|
||||
output reg signed [DATA_W-1:0] dout_im,
|
||||
output reg dout_valid,
|
||||
|
||||
// Status
|
||||
output wire busy,
|
||||
output reg done
|
||||
);
|
||||
|
||||
// ============================================================================
|
||||
// SAFE WIDTH CONSTANTS
|
||||
// ============================================================================
|
||||
localparam [LOG2N:0] FFT_N = N;
|
||||
localparam [LOG2N:0] FFT_N_HALF = N / 2;
|
||||
localparam [LOG2N:0] FFT_N_QTR = N / 4;
|
||||
localparam [LOG2N:0] FFT_N_HALF_M1 = N / 2 - 1;
|
||||
localparam [LOG2N:0] FFT_N_M1 = N - 1;
|
||||
|
||||
// ============================================================================
|
||||
// STATES
|
||||
// ============================================================================
|
||||
localparam [2:0] ST_IDLE = 3'd0,
|
||||
ST_LOAD = 3'd1,
|
||||
ST_BF_READ = 3'd2,
|
||||
ST_BF_CALC = 3'd3,
|
||||
ST_OUTPUT = 3'd4,
|
||||
ST_DONE = 3'd5;
|
||||
|
||||
reg [2:0] state;
|
||||
assign busy = (state != ST_IDLE);
|
||||
|
||||
// ============================================================================
|
||||
// DATA MEMORY DECLARATIONS
|
||||
// ============================================================================
|
||||
|
||||
// BRAM read data (registered outputs from port blocks)
|
||||
reg signed [INTERNAL_W-1:0] mem_rdata_a_re, mem_rdata_a_im;
|
||||
reg signed [INTERNAL_W-1:0] mem_rdata_b_re, mem_rdata_b_im;
|
||||
|
||||
// ============================================================================
|
||||
// TWIDDLE ROM
|
||||
// ============================================================================
|
||||
localparam TW_QUARTER = N / 4;
|
||||
localparam TW_ADDR_W = LOG2N - 2;
|
||||
|
||||
(* rom_style = "block" *) reg signed [TWIDDLE_W-1:0] cos_rom [0:TW_QUARTER-1];
|
||||
|
||||
initial begin
|
||||
$readmemh(TWIDDLE_FILE, cos_rom);
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// BIT-REVERSE
|
||||
// ============================================================================
|
||||
function [LOG2N-1:0] bit_reverse;
|
||||
input [LOG2N-1:0] val;
|
||||
integer b;
|
||||
begin
|
||||
bit_reverse = 0;
|
||||
for (b = 0; b < LOG2N; b = b + 1)
|
||||
bit_reverse[LOG2N-1-b] = val[b];
|
||||
end
|
||||
endfunction
|
||||
|
||||
// ============================================================================
|
||||
// COUNTERS AND PIPELINE REGISTERS
|
||||
// ============================================================================
|
||||
reg [LOG2N-1:0] load_count;
|
||||
reg [LOG2N:0] out_count;
|
||||
reg [LOG2N-1:0] bfly_count;
|
||||
reg [3:0] stage;
|
||||
|
||||
// Registered values (captured in BF_READ, used in BF_CALC)
|
||||
reg signed [TWIDDLE_W-1:0] rd_tw_cos, rd_tw_sin;
|
||||
reg [LOG2N-1:0] rd_addr_even, rd_addr_odd;
|
||||
reg rd_inverse;
|
||||
|
||||
// Half and twiddle stride
|
||||
reg [LOG2N-1:0] half_reg;
|
||||
reg [LOG2N-1:0] tw_stride_reg;
|
||||
|
||||
// ============================================================================
|
||||
// BUTTERFLY ADDRESS COMPUTATION (combinational)
|
||||
// ============================================================================
|
||||
reg [LOG2N-1:0] bf_addr_even;
|
||||
reg [LOG2N-1:0] bf_addr_odd;
|
||||
reg [LOG2N-1:0] bf_tw_idx;
|
||||
|
||||
always @(*) begin : bf_addr_calc
|
||||
reg [LOG2N-1:0] half_val;
|
||||
reg [LOG2N-1:0] idx_val;
|
||||
reg [LOG2N-1:0] grp_val;
|
||||
|
||||
half_val = half_reg;
|
||||
idx_val = bfly_count & (half_val - 1);
|
||||
grp_val = (bfly_count - idx_val);
|
||||
|
||||
bf_addr_even = (grp_val << 1) | idx_val;
|
||||
bf_addr_odd = bf_addr_even + half_val;
|
||||
|
||||
bf_tw_idx = idx_val * tw_stride_reg;
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// TWIDDLE LOOKUP (combinational)
|
||||
// ============================================================================
|
||||
reg signed [TWIDDLE_W-1:0] tw_cos_lookup;
|
||||
reg signed [TWIDDLE_W-1:0] tw_sin_lookup;
|
||||
|
||||
always @(*) begin : tw_lookup
|
||||
reg [LOG2N-1:0] k;
|
||||
reg [LOG2N-1:0] rom_idx;
|
||||
|
||||
k = bf_tw_idx;
|
||||
tw_cos_lookup = 0;
|
||||
tw_sin_lookup = 0;
|
||||
|
||||
if (k == 0) begin
|
||||
tw_cos_lookup = cos_rom[0];
|
||||
tw_sin_lookup = {TWIDDLE_W{1'b0}};
|
||||
end else if (k == FFT_N_QTR[LOG2N-1:0]) begin
|
||||
tw_cos_lookup = {TWIDDLE_W{1'b0}};
|
||||
tw_sin_lookup = cos_rom[0];
|
||||
end else if (k < FFT_N_QTR[LOG2N-1:0]) begin
|
||||
tw_cos_lookup = cos_rom[k[TW_ADDR_W-1:0]];
|
||||
rom_idx = FFT_N_QTR[LOG2N-1:0] - k;
|
||||
tw_sin_lookup = cos_rom[rom_idx[TW_ADDR_W-1:0]];
|
||||
end else begin
|
||||
rom_idx = k - FFT_N_QTR[LOG2N-1:0];
|
||||
tw_sin_lookup = cos_rom[rom_idx[TW_ADDR_W-1:0]];
|
||||
rom_idx = FFT_N_HALF[LOG2N-1:0] - k;
|
||||
tw_cos_lookup = -cos_rom[rom_idx[TW_ADDR_W-1:0]];
|
||||
end
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// SATURATION
|
||||
// ============================================================================
|
||||
function signed [DATA_W-1:0] saturate;
|
||||
input signed [INTERNAL_W-1:0] val;
|
||||
reg signed [INTERNAL_W-1:0] max_pos;
|
||||
reg signed [INTERNAL_W-1:0] max_neg;
|
||||
begin
|
||||
max_pos = (1 << (DATA_W - 1)) - 1;
|
||||
max_neg = -(1 << (DATA_W - 1));
|
||||
if (val > max_pos)
|
||||
saturate = max_pos[DATA_W-1:0];
|
||||
else if (val < max_neg)
|
||||
saturate = max_neg[DATA_W-1:0];
|
||||
else
|
||||
saturate = val[DATA_W-1:0];
|
||||
end
|
||||
endfunction
|
||||
|
||||
// ============================================================================
|
||||
// BUTTERFLY COMPUTATION (combinational, for BF_CALC write data)
|
||||
// ============================================================================
|
||||
reg signed [INTERNAL_W-1:0] bf_t_re, bf_t_im;
|
||||
reg signed [INTERNAL_W-1:0] bf_sum_re, bf_sum_im;
|
||||
reg signed [INTERNAL_W-1:0] bf_dif_re, bf_dif_im;
|
||||
|
||||
always @(*) begin : bf_compute
|
||||
if (!rd_inverse) begin
|
||||
bf_t_re = (mem_rdata_b_re * rd_tw_cos + mem_rdata_b_im * rd_tw_sin) >>> (TWIDDLE_W - 1);
|
||||
bf_t_im = (mem_rdata_b_im * rd_tw_cos - mem_rdata_b_re * rd_tw_sin) >>> (TWIDDLE_W - 1);
|
||||
end else begin
|
||||
bf_t_re = (mem_rdata_b_re * rd_tw_cos - mem_rdata_b_im * rd_tw_sin) >>> (TWIDDLE_W - 1);
|
||||
bf_t_im = (mem_rdata_b_im * rd_tw_cos + mem_rdata_b_re * rd_tw_sin) >>> (TWIDDLE_W - 1);
|
||||
end
|
||||
bf_sum_re = mem_rdata_a_re + bf_t_re;
|
||||
bf_sum_im = mem_rdata_a_im + bf_t_im;
|
||||
bf_dif_re = mem_rdata_a_re - bf_t_re;
|
||||
bf_dif_im = mem_rdata_a_im - bf_t_im;
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// BRAM PORT ADDRESS / WE / WDATA — combinational mux (registered signals)
|
||||
// ============================================================================
|
||||
// Drives port A and port B control signals from FSM state.
|
||||
// These are registered (via NBA) so they are stable at the next posedge
|
||||
// when the BRAM template blocks sample them. This avoids any NBA race.
|
||||
// ============================================================================
|
||||
reg bram_we_a;
|
||||
reg [LOG2N-1:0] bram_addr_a;
|
||||
reg signed [INTERNAL_W-1:0] bram_wdata_a_re;
|
||||
reg signed [INTERNAL_W-1:0] bram_wdata_a_im;
|
||||
|
||||
reg bram_we_b;
|
||||
reg [LOG2N-1:0] bram_addr_b;
|
||||
reg signed [INTERNAL_W-1:0] bram_wdata_b_re;
|
||||
reg signed [INTERNAL_W-1:0] bram_wdata_b_im;
|
||||
|
||||
always @(*) begin : bram_port_mux
|
||||
// Port A defaults
|
||||
bram_we_a = 1'b0;
|
||||
bram_addr_a = 0;
|
||||
bram_wdata_a_re = 0;
|
||||
bram_wdata_a_im = 0;
|
||||
|
||||
// Port B defaults
|
||||
bram_we_b = 1'b0;
|
||||
bram_addr_b = 0;
|
||||
bram_wdata_b_re = 0;
|
||||
bram_wdata_b_im = 0;
|
||||
|
||||
case (state)
|
||||
ST_LOAD: begin
|
||||
bram_we_a = din_valid;
|
||||
bram_addr_a = bit_reverse(load_count);
|
||||
bram_wdata_a_re = {{(INTERNAL_W-DATA_W){din_re[DATA_W-1]}}, din_re};
|
||||
bram_wdata_a_im = {{(INTERNAL_W-DATA_W){din_im[DATA_W-1]}}, din_im};
|
||||
end
|
||||
ST_BF_READ: begin
|
||||
bram_addr_a = bf_addr_even;
|
||||
bram_addr_b = bf_addr_odd;
|
||||
end
|
||||
ST_BF_CALC: begin
|
||||
bram_we_a = 1'b1;
|
||||
bram_addr_a = rd_addr_even;
|
||||
bram_wdata_a_re = bf_sum_re;
|
||||
bram_wdata_a_im = bf_sum_im;
|
||||
|
||||
bram_we_b = 1'b1;
|
||||
bram_addr_b = rd_addr_odd;
|
||||
bram_wdata_b_re = bf_dif_re;
|
||||
bram_wdata_b_im = bf_dif_im;
|
||||
end
|
||||
ST_OUTPUT: begin
|
||||
bram_addr_a = out_count[LOG2N-1:0];
|
||||
end
|
||||
default: begin
|
||||
// keep defaults
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// DATA MEMORY — True Dual-Port BRAM
|
||||
// ============================================================================
|
||||
// For synthesis: xpm_memory_tdpram (Xilinx Parameterized Macros)
|
||||
// For simulation: behavioral Verilog-2001 model (Icarus-compatible)
|
||||
// ============================================================================
|
||||
|
||||
// XPM read-data wires (directly assigned to rdata regs below)
|
||||
wire [INTERNAL_W-1:0] xpm_douta_re, xpm_doutb_re;
|
||||
wire [INTERNAL_W-1:0] xpm_douta_im, xpm_doutb_im;
|
||||
|
||||
always @(*) begin
|
||||
mem_rdata_a_re = $signed(xpm_douta_re);
|
||||
mem_rdata_a_im = $signed(xpm_douta_im);
|
||||
mem_rdata_b_re = $signed(xpm_doutb_re);
|
||||
mem_rdata_b_im = $signed(xpm_doutb_im);
|
||||
end
|
||||
|
||||
`ifndef FFT_XPM_BRAM
|
||||
// ----------------------------------------------------------------------------
|
||||
// Default: behavioral TDP model (works with Icarus Verilog -g2001)
|
||||
// For Vivado synthesis, define FFT_XPM_BRAM to use xpm_memory_tdpram.
|
||||
// ----------------------------------------------------------------------------
|
||||
reg [INTERNAL_W-1:0] sim_mem_re [0:N-1];
|
||||
reg [INTERNAL_W-1:0] sim_mem_im [0:N-1];
|
||||
|
||||
// Port A
|
||||
reg [INTERNAL_W-1:0] sim_douta_re, sim_douta_im;
|
||||
always @(posedge clk) begin
|
||||
if (bram_we_a) begin
|
||||
sim_mem_re[bram_addr_a] <= bram_wdata_a_re;
|
||||
sim_mem_im[bram_addr_a] <= bram_wdata_a_im;
|
||||
end
|
||||
sim_douta_re <= sim_mem_re[bram_addr_a];
|
||||
sim_douta_im <= sim_mem_im[bram_addr_a];
|
||||
end
|
||||
assign xpm_douta_re = sim_douta_re;
|
||||
assign xpm_douta_im = sim_douta_im;
|
||||
|
||||
// Port B
|
||||
reg [INTERNAL_W-1:0] sim_doutb_re, sim_doutb_im;
|
||||
always @(posedge clk) begin
|
||||
if (bram_we_b) begin
|
||||
sim_mem_re[bram_addr_b] <= bram_wdata_b_re;
|
||||
sim_mem_im[bram_addr_b] <= bram_wdata_b_im;
|
||||
end
|
||||
sim_doutb_re <= sim_mem_re[bram_addr_b];
|
||||
sim_doutb_im <= sim_mem_im[bram_addr_b];
|
||||
end
|
||||
assign xpm_doutb_re = sim_doutb_re;
|
||||
assign xpm_doutb_im = sim_doutb_im;
|
||||
|
||||
integer init_i;
|
||||
initial begin
|
||||
for (init_i = 0; init_i < N; init_i = init_i + 1) begin
|
||||
sim_mem_re[init_i] = 0;
|
||||
sim_mem_im[init_i] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
`else
|
||||
// ----------------------------------------------------------------------------
|
||||
// Synthesis: xpm_memory_tdpram — guaranteed BRAM mapping
|
||||
// Enabled when FFT_XPM_BRAM is defined (e.g. in Vivado TCL script).
|
||||
// ----------------------------------------------------------------------------
|
||||
// Note: Vivado auto-finds XPM library; no `include needed.
|
||||
// Two instances: one for real, one for imaginary.
|
||||
// WRITE_MODE = "write_first" matches the behavioral TDP template.
|
||||
// READ_LATENCY = 1 (registered output).
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
xpm_memory_tdpram #(
|
||||
.ADDR_WIDTH_A (LOG2N),
|
||||
.ADDR_WIDTH_B (LOG2N),
|
||||
.AUTO_SLEEP_TIME (0),
|
||||
.BYTE_WRITE_WIDTH_A (INTERNAL_W),
|
||||
.BYTE_WRITE_WIDTH_B (INTERNAL_W),
|
||||
.CASCADE_HEIGHT (0),
|
||||
.CLOCKING_MODE ("common_clock"),
|
||||
.ECC_BIT_RANGE ("7:0"),
|
||||
.ECC_MODE ("no_ecc"),
|
||||
.ECC_TYPE ("none"),
|
||||
.IGNORE_INIT_SYNTH (0),
|
||||
.MEMORY_INIT_FILE ("none"),
|
||||
.MEMORY_INIT_PARAM ("0"),
|
||||
.MEMORY_OPTIMIZATION ("true"),
|
||||
.MEMORY_PRIMITIVE ("block"),
|
||||
.MEMORY_SIZE (N * INTERNAL_W),
|
||||
.MESSAGE_CONTROL (0),
|
||||
.RAM_DECOMP ("auto"),
|
||||
.READ_DATA_WIDTH_A (INTERNAL_W),
|
||||
.READ_DATA_WIDTH_B (INTERNAL_W),
|
||||
.READ_LATENCY_A (1),
|
||||
.READ_LATENCY_B (1),
|
||||
.READ_RESET_VALUE_A ("0"),
|
||||
.READ_RESET_VALUE_B ("0"),
|
||||
.RST_MODE_A ("SYNC"),
|
||||
.RST_MODE_B ("SYNC"),
|
||||
.SIM_ASSERT_CHK (0),
|
||||
.USE_EMBEDDED_CONSTRAINT (0),
|
||||
.USE_MEM_INIT (1),
|
||||
.USE_MEM_INIT_MMI (0),
|
||||
.WAKEUP_TIME ("disable_sleep"),
|
||||
.WRITE_DATA_WIDTH_A (INTERNAL_W),
|
||||
.WRITE_DATA_WIDTH_B (INTERNAL_W),
|
||||
.WRITE_MODE_A ("read_first"),
|
||||
.WRITE_MODE_B ("read_first"),
|
||||
.WRITE_PROTECT (1)
|
||||
) u_bram_re (
|
||||
.clka (clk),
|
||||
.clkb (clk),
|
||||
.rsta (1'b0),
|
||||
.rstb (1'b0),
|
||||
.ena (1'b1),
|
||||
.enb (1'b1),
|
||||
.regcea (1'b1),
|
||||
.regceb (1'b1),
|
||||
.addra (bram_addr_a),
|
||||
.addrb (bram_addr_b),
|
||||
.dina (bram_wdata_a_re),
|
||||
.dinb (bram_wdata_b_re),
|
||||
.wea (bram_we_a),
|
||||
.web (bram_we_b),
|
||||
.douta (xpm_douta_re),
|
||||
.doutb (xpm_doutb_re),
|
||||
.injectdbiterra (1'b0),
|
||||
.injectdbiterrb (1'b0),
|
||||
.injectsbiterra (1'b0),
|
||||
.injectsbiterrb (1'b0),
|
||||
.sbiterra (),
|
||||
.sbiterrb (),
|
||||
.dbiterra (),
|
||||
.dbiterrb (),
|
||||
.sleep (1'b0)
|
||||
);
|
||||
|
||||
xpm_memory_tdpram #(
|
||||
.ADDR_WIDTH_A (LOG2N),
|
||||
.ADDR_WIDTH_B (LOG2N),
|
||||
.AUTO_SLEEP_TIME (0),
|
||||
.BYTE_WRITE_WIDTH_A (INTERNAL_W),
|
||||
.BYTE_WRITE_WIDTH_B (INTERNAL_W),
|
||||
.CASCADE_HEIGHT (0),
|
||||
.CLOCKING_MODE ("common_clock"),
|
||||
.ECC_BIT_RANGE ("7:0"),
|
||||
.ECC_MODE ("no_ecc"),
|
||||
.ECC_TYPE ("none"),
|
||||
.IGNORE_INIT_SYNTH (0),
|
||||
.MEMORY_INIT_FILE ("none"),
|
||||
.MEMORY_INIT_PARAM ("0"),
|
||||
.MEMORY_OPTIMIZATION ("true"),
|
||||
.MEMORY_PRIMITIVE ("block"),
|
||||
.MEMORY_SIZE (N * INTERNAL_W),
|
||||
.MESSAGE_CONTROL (0),
|
||||
.RAM_DECOMP ("auto"),
|
||||
.READ_DATA_WIDTH_A (INTERNAL_W),
|
||||
.READ_DATA_WIDTH_B (INTERNAL_W),
|
||||
.READ_LATENCY_A (1),
|
||||
.READ_LATENCY_B (1),
|
||||
.READ_RESET_VALUE_A ("0"),
|
||||
.READ_RESET_VALUE_B ("0"),
|
||||
.RST_MODE_A ("SYNC"),
|
||||
.RST_MODE_B ("SYNC"),
|
||||
.SIM_ASSERT_CHK (0),
|
||||
.USE_EMBEDDED_CONSTRAINT (0),
|
||||
.USE_MEM_INIT (1),
|
||||
.USE_MEM_INIT_MMI (0),
|
||||
.WAKEUP_TIME ("disable_sleep"),
|
||||
.WRITE_DATA_WIDTH_A (INTERNAL_W),
|
||||
.WRITE_DATA_WIDTH_B (INTERNAL_W),
|
||||
.WRITE_MODE_A ("read_first"),
|
||||
.WRITE_MODE_B ("read_first"),
|
||||
.WRITE_PROTECT (1)
|
||||
) u_bram_im (
|
||||
.clka (clk),
|
||||
.clkb (clk),
|
||||
.rsta (1'b0),
|
||||
.rstb (1'b0),
|
||||
.ena (1'b1),
|
||||
.enb (1'b1),
|
||||
.regcea (1'b1),
|
||||
.regceb (1'b1),
|
||||
.addra (bram_addr_a),
|
||||
.addrb (bram_addr_b),
|
||||
.dina (bram_wdata_a_im),
|
||||
.dinb (bram_wdata_b_im),
|
||||
.wea (bram_we_a),
|
||||
.web (bram_we_b),
|
||||
.douta (xpm_douta_im),
|
||||
.doutb (xpm_doutb_im),
|
||||
.injectdbiterra (1'b0),
|
||||
.injectdbiterrb (1'b0),
|
||||
.injectsbiterra (1'b0),
|
||||
.injectsbiterrb (1'b0),
|
||||
.sbiterra (),
|
||||
.sbiterrb (),
|
||||
.dbiterra (),
|
||||
.dbiterrb (),
|
||||
.sleep (1'b0)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
// ============================================================================
|
||||
// OUTPUT PIPELINE
|
||||
// ============================================================================
|
||||
reg out_pipe_valid;
|
||||
reg out_pipe_inverse;
|
||||
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
if (!reset_n) begin
|
||||
out_pipe_valid <= 1'b0;
|
||||
out_pipe_inverse <= 1'b0;
|
||||
end else begin
|
||||
out_pipe_valid <= (state == ST_OUTPUT) && (out_count <= FFT_N_M1[LOG2N-1:0]);
|
||||
out_pipe_inverse <= inverse;
|
||||
end
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// MAIN FSM
|
||||
// ============================================================================
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
if (!reset_n) begin
|
||||
state <= ST_IDLE;
|
||||
load_count <= 0;
|
||||
out_count <= 0;
|
||||
bfly_count <= 0;
|
||||
stage <= 0;
|
||||
half_reg <= 1;
|
||||
tw_stride_reg <= FFT_N_HALF[LOG2N-1:0];
|
||||
dout_re <= 0;
|
||||
dout_im <= 0;
|
||||
dout_valid <= 0;
|
||||
done <= 0;
|
||||
rd_tw_cos <= 0;
|
||||
rd_tw_sin <= 0;
|
||||
rd_addr_even <= 0;
|
||||
rd_addr_odd <= 0;
|
||||
rd_inverse <= 0;
|
||||
end else begin
|
||||
dout_valid <= 1'b0;
|
||||
done <= 1'b0;
|
||||
|
||||
case (state)
|
||||
|
||||
ST_IDLE: begin
|
||||
if (start) begin
|
||||
state <= ST_LOAD;
|
||||
load_count <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
ST_LOAD: begin
|
||||
if (din_valid) begin
|
||||
if (load_count == FFT_N_M1[LOG2N-1:0]) begin
|
||||
state <= ST_BF_READ;
|
||||
stage <= 0;
|
||||
bfly_count <= 0;
|
||||
half_reg <= 1;
|
||||
tw_stride_reg <= FFT_N_HALF[LOG2N-1:0];
|
||||
end else begin
|
||||
load_count <= load_count + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
ST_BF_READ: begin
|
||||
rd_tw_cos <= tw_cos_lookup;
|
||||
rd_tw_sin <= tw_sin_lookup;
|
||||
rd_addr_even <= bf_addr_even;
|
||||
rd_addr_odd <= bf_addr_odd;
|
||||
rd_inverse <= inverse;
|
||||
state <= ST_BF_CALC;
|
||||
end
|
||||
|
||||
ST_BF_CALC: begin
|
||||
if (bfly_count == FFT_N_HALF_M1[LOG2N-1:0]) begin
|
||||
bfly_count <= 0;
|
||||
if (stage == LOG2N - 1) begin
|
||||
state <= ST_OUTPUT;
|
||||
out_count <= 0;
|
||||
end else begin
|
||||
stage <= stage + 1;
|
||||
half_reg <= half_reg << 1;
|
||||
tw_stride_reg <= tw_stride_reg >> 1;
|
||||
state <= ST_BF_READ;
|
||||
end
|
||||
end else begin
|
||||
bfly_count <= bfly_count + 1;
|
||||
state <= ST_BF_READ;
|
||||
end
|
||||
end
|
||||
|
||||
ST_OUTPUT: begin
|
||||
if (out_count <= FFT_N_M1[LOG2N-1:0]) begin
|
||||
out_count <= out_count + 1;
|
||||
end
|
||||
|
||||
if (out_pipe_valid) begin
|
||||
if (out_pipe_inverse) begin
|
||||
dout_re <= saturate(mem_rdata_a_re >>> LOG2N);
|
||||
dout_im <= saturate(mem_rdata_a_im >>> LOG2N);
|
||||
end else begin
|
||||
dout_re <= saturate(mem_rdata_a_re);
|
||||
dout_im <= saturate(mem_rdata_a_im);
|
||||
end
|
||||
dout_valid <= 1'b1;
|
||||
end
|
||||
|
||||
if (out_count > FFT_N_M1[LOG2N-1:0] && !out_pipe_valid) begin
|
||||
state <= ST_DONE;
|
||||
end
|
||||
end
|
||||
|
||||
ST_DONE: begin
|
||||
done <= 1'b1;
|
||||
state <= ST_IDLE;
|
||||
end
|
||||
|
||||
default: state <= ST_IDLE;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -0,0 +1,259 @@
|
||||
// Quarter-wave cosine ROM for 1024-point FFT
|
||||
// 256 entries, 16-bit signed Q15 ($readmemh format)
|
||||
// cos(2*pi*k/1024) for k = 0..255
|
||||
7FFF
|
||||
7FFE
|
||||
7FFD
|
||||
7FF9
|
||||
7FF5
|
||||
7FF0
|
||||
7FE9
|
||||
7FE1
|
||||
7FD8
|
||||
7FCD
|
||||
7FC1
|
||||
7FB4
|
||||
7FA6
|
||||
7F97
|
||||
7F86
|
||||
7F74
|
||||
7F61
|
||||
7F4D
|
||||
7F37
|
||||
7F21
|
||||
7F09
|
||||
7EEF
|
||||
7ED5
|
||||
7EB9
|
||||
7E9C
|
||||
7E7E
|
||||
7E5F
|
||||
7E3E
|
||||
7E1D
|
||||
7DFA
|
||||
7DD5
|
||||
7DB0
|
||||
7D89
|
||||
7D62
|
||||
7D39
|
||||
7D0E
|
||||
7CE3
|
||||
7CB6
|
||||
7C88
|
||||
7C59
|
||||
7C29
|
||||
7BF8
|
||||
7BC5
|
||||
7B91
|
||||
7B5C
|
||||
7B26
|
||||
7AEE
|
||||
7AB6
|
||||
7A7C
|
||||
7A41
|
||||
7A05
|
||||
79C8
|
||||
7989
|
||||
794A
|
||||
7909
|
||||
78C7
|
||||
7884
|
||||
783F
|
||||
77FA
|
||||
77B3
|
||||
776B
|
||||
7722
|
||||
76D8
|
||||
768D
|
||||
7641
|
||||
75F3
|
||||
75A5
|
||||
7555
|
||||
7504
|
||||
74B2
|
||||
745F
|
||||
740A
|
||||
73B5
|
||||
735E
|
||||
7307
|
||||
72AE
|
||||
7254
|
||||
71F9
|
||||
719D
|
||||
7140
|
||||
70E2
|
||||
7083
|
||||
7022
|
||||
6FC1
|
||||
6F5E
|
||||
6EFB
|
||||
6E96
|
||||
6E30
|
||||
6DC9
|
||||
6D61
|
||||
6CF8
|
||||
6C8E
|
||||
6C23
|
||||
6BB7
|
||||
6B4A
|
||||
6ADC
|
||||
6A6D
|
||||
69FD
|
||||
698B
|
||||
6919
|
||||
68A6
|
||||
6832
|
||||
67BC
|
||||
6746
|
||||
66CF
|
||||
6656
|
||||
65DD
|
||||
6563
|
||||
64E8
|
||||
646C
|
||||
63EE
|
||||
6370
|
||||
62F1
|
||||
6271
|
||||
61F0
|
||||
616E
|
||||
60EB
|
||||
6068
|
||||
5FE3
|
||||
5F5D
|
||||
5ED7
|
||||
5E4F
|
||||
5DC7
|
||||
5D3E
|
||||
5CB3
|
||||
5C28
|
||||
5B9C
|
||||
5B0F
|
||||
5A82
|
||||
59F3
|
||||
5964
|
||||
58D3
|
||||
5842
|
||||
57B0
|
||||
571D
|
||||
568A
|
||||
55F5
|
||||
5560
|
||||
54C9
|
||||
5432
|
||||
539B
|
||||
5302
|
||||
5268
|
||||
51CE
|
||||
5133
|
||||
5097
|
||||
4FFB
|
||||
4F5D
|
||||
4EBF
|
||||
4E20
|
||||
4D81
|
||||
4CE0
|
||||
4C3F
|
||||
4B9D
|
||||
4AFB
|
||||
4A58
|
||||
49B4
|
||||
490F
|
||||
4869
|
||||
47C3
|
||||
471C
|
||||
4675
|
||||
45CD
|
||||
4524
|
||||
447A
|
||||
43D0
|
||||
4325
|
||||
427A
|
||||
41CE
|
||||
4121
|
||||
4073
|
||||
3FC5
|
||||
3F17
|
||||
3E68
|
||||
3DB8
|
||||
3D07
|
||||
3C56
|
||||
3BA5
|
||||
3AF2
|
||||
3A40
|
||||
398C
|
||||
38D9
|
||||
3824
|
||||
376F
|
||||
36BA
|
||||
3604
|
||||
354D
|
||||
3496
|
||||
33DF
|
||||
3326
|
||||
326E
|
||||
31B5
|
||||
30FB
|
||||
3041
|
||||
2F87
|
||||
2ECC
|
||||
2E11
|
||||
2D55
|
||||
2C99
|
||||
2BDC
|
||||
2B1F
|
||||
2A61
|
||||
29A3
|
||||
28E5
|
||||
2826
|
||||
2767
|
||||
26A8
|
||||
25E8
|
||||
2528
|
||||
2467
|
||||
23A6
|
||||
22E5
|
||||
2223
|
||||
2161
|
||||
209F
|
||||
1FDD
|
||||
1F1A
|
||||
1E57
|
||||
1D93
|
||||
1CCF
|
||||
1C0B
|
||||
1B47
|
||||
1A82
|
||||
19BE
|
||||
18F9
|
||||
1833
|
||||
176E
|
||||
16A8
|
||||
15E2
|
||||
151C
|
||||
1455
|
||||
138F
|
||||
12C8
|
||||
1201
|
||||
113A
|
||||
1072
|
||||
0FAB
|
||||
0EE3
|
||||
0E1C
|
||||
0D54
|
||||
0C8C
|
||||
0BC4
|
||||
0AFB
|
||||
0A33
|
||||
096A
|
||||
08A2
|
||||
07D9
|
||||
0711
|
||||
0648
|
||||
057F
|
||||
04B6
|
||||
03ED
|
||||
0324
|
||||
025B
|
||||
0192
|
||||
00C9
|
||||
@@ -0,0 +1,11 @@
|
||||
// Quarter-wave cosine ROM for 32-point FFT
|
||||
// 8 entries, 16-bit signed Q15 ($readmemh format)
|
||||
// cos(2*pi*k/32) for k = 0..7
|
||||
7FFF
|
||||
7D89
|
||||
7641
|
||||
6A6D
|
||||
5A82
|
||||
471C
|
||||
30FB
|
||||
18F9
|
||||
@@ -529,18 +529,718 @@ end
|
||||
|
||||
`else
|
||||
// ============================================================================
|
||||
// SYNTHESIS STUB
|
||||
// SYNTHESIS IMPLEMENTATION — Radix-2 DIT FFT via fft_engine
|
||||
// ============================================================================
|
||||
// The behavioral FFT implementation above uses $cos/$sin/$rtoi (non-
|
||||
// synthesizable). For real hardware, replace this stub with Xilinx xfft
|
||||
// IP cores or a synthesizable pipelined FFT. The stub ties outputs to
|
||||
// safe defaults so the rest of the design can be synthesized and verified.
|
||||
// Uses a single fft_engine instance (1024-pt) reused 3 times:
|
||||
// 1. Forward FFT of signal
|
||||
// 2. Forward FFT of reference
|
||||
// 3. Inverse FFT of conjugate product
|
||||
// Conjugate multiply done via frequency_matched_filter (4-stage pipeline).
|
||||
//
|
||||
// Buffer scheme (BRAM-inferrable):
|
||||
// sig_buf[1024]: ADC input -> signal FFT output
|
||||
// ref_buf[1024]: Reference input -> reference FFT output
|
||||
// prod_buf[1024]: Conjugate multiply output -> IFFT output
|
||||
//
|
||||
// Memory access is INSIDE always @(posedge clk) blocks (no async reset)
|
||||
// using local blocking variables. This eliminates NBA race conditions
|
||||
// and enables Vivado BRAM inference (same pattern as fft_engine.v).
|
||||
//
|
||||
// BRAM read latency (1 cycle) is handled by "primed" flags:
|
||||
// feed_primed — for FFT feed operations
|
||||
// mult_primed — for conjugate multiply feed
|
||||
// out_primed — for output streaming
|
||||
// ============================================================================
|
||||
|
||||
assign range_profile_i = 16'd0;
|
||||
assign range_profile_q = 16'd0;
|
||||
assign range_profile_valid = 1'b0;
|
||||
assign chain_state = 4'd0; // permanently IDLE
|
||||
localparam FFT_SIZE = 1024;
|
||||
localparam ADDR_BITS = 10;
|
||||
|
||||
// State encoding
|
||||
localparam [3:0] ST_IDLE = 4'd0,
|
||||
ST_COLLECT = 4'd1, // Collect 1024 ADC + ref samples
|
||||
ST_SIG_FFT = 4'd2, // Forward FFT of signal
|
||||
ST_SIG_CAP = 4'd3, // Capture signal FFT output
|
||||
ST_REF_FFT = 4'd4, // Forward FFT of reference
|
||||
ST_REF_CAP = 4'd5, // Capture reference FFT output
|
||||
ST_MULTIPLY = 4'd6, // Conjugate multiply (pipelined)
|
||||
ST_INV_FFT = 4'd7, // Inverse FFT of product
|
||||
ST_INV_CAP = 4'd8, // Capture IFFT output
|
||||
ST_OUTPUT = 4'd9, // Stream 1024 results
|
||||
ST_DONE = 4'd10;
|
||||
|
||||
reg [3:0] state;
|
||||
|
||||
// ============================================================================
|
||||
// DATA BUFFERS (block RAM) — declared here, accessed in BRAM port blocks
|
||||
// ============================================================================
|
||||
(* ram_style = "block" *) reg signed [15:0] sig_buf_i [0:FFT_SIZE-1];
|
||||
(* ram_style = "block" *) reg signed [15:0] sig_buf_q [0:FFT_SIZE-1];
|
||||
(* ram_style = "block" *) reg signed [15:0] ref_buf_i [0:FFT_SIZE-1];
|
||||
(* ram_style = "block" *) reg signed [15:0] ref_buf_q [0:FFT_SIZE-1];
|
||||
(* ram_style = "block" *) reg signed [15:0] prod_buf_i [0:FFT_SIZE-1];
|
||||
(* ram_style = "block" *) reg signed [15:0] prod_buf_q [0:FFT_SIZE-1];
|
||||
|
||||
// BRAM read data (registered outputs from port blocks)
|
||||
reg signed [15:0] sig_rdata_i, sig_rdata_q;
|
||||
reg signed [15:0] ref_rdata_i, ref_rdata_q;
|
||||
reg signed [15:0] prod_rdata_i, prod_rdata_q;
|
||||
|
||||
// ============================================================================
|
||||
// COUNTERS
|
||||
// ============================================================================
|
||||
reg [ADDR_BITS:0] collect_count; // 0..1024 for sample collection
|
||||
reg [ADDR_BITS:0] feed_count; // 0..1024 for feeding FFT engine
|
||||
reg [ADDR_BITS:0] cap_count; // 0..1024 for capturing FFT output
|
||||
reg [ADDR_BITS:0] mult_count; // 0..1024 for multiply feeding
|
||||
reg [ADDR_BITS:0] out_count; // 0..1024 for output streaming
|
||||
|
||||
// BRAM read latency pipeline flags
|
||||
reg feed_primed; // 1 = BRAM rdata valid for feed operations
|
||||
reg mult_primed; // 1 = BRAM rdata valid for multiply reads
|
||||
reg out_primed; // 1 = BRAM rdata valid for output reads
|
||||
|
||||
// ============================================================================
|
||||
// FFT ENGINE INTERFACE (single instance, reused 3 times)
|
||||
// ============================================================================
|
||||
reg fft_start;
|
||||
reg fft_inverse;
|
||||
reg signed [15:0] fft_din_re, fft_din_im;
|
||||
reg fft_din_valid;
|
||||
wire signed [15:0] fft_dout_re, fft_dout_im;
|
||||
wire fft_dout_valid;
|
||||
wire fft_busy;
|
||||
wire fft_done;
|
||||
|
||||
fft_engine #(
|
||||
.N(FFT_SIZE),
|
||||
.LOG2N(ADDR_BITS),
|
||||
.DATA_W(16),
|
||||
.INTERNAL_W(32),
|
||||
.TWIDDLE_W(16),
|
||||
.TWIDDLE_FILE("fft_twiddle_1024.mem")
|
||||
) fft_inst (
|
||||
.clk(clk),
|
||||
.reset_n(reset_n),
|
||||
.start(fft_start),
|
||||
.inverse(fft_inverse),
|
||||
.din_re(fft_din_re),
|
||||
.din_im(fft_din_im),
|
||||
.din_valid(fft_din_valid),
|
||||
.dout_re(fft_dout_re),
|
||||
.dout_im(fft_dout_im),
|
||||
.dout_valid(fft_dout_valid),
|
||||
.busy(fft_busy),
|
||||
.done(fft_done)
|
||||
);
|
||||
|
||||
// ============================================================================
|
||||
// CONJUGATE MULTIPLY INTERFACE (frequency_matched_filter)
|
||||
// ============================================================================
|
||||
reg signed [15:0] mf_sig_re, mf_sig_im;
|
||||
reg signed [15:0] mf_ref_re, mf_ref_im;
|
||||
reg mf_valid_in;
|
||||
wire signed [15:0] mf_out_re, mf_out_im;
|
||||
wire mf_valid_out;
|
||||
|
||||
frequency_matched_filter mf_inst (
|
||||
.clk(clk),
|
||||
.reset_n(reset_n),
|
||||
.fft_real_in(mf_sig_re),
|
||||
.fft_imag_in(mf_sig_im),
|
||||
.fft_valid_in(mf_valid_in),
|
||||
.ref_chirp_real(mf_ref_re),
|
||||
.ref_chirp_imag(mf_ref_im),
|
||||
.filtered_real(mf_out_re),
|
||||
.filtered_imag(mf_out_im),
|
||||
.filtered_valid(mf_valid_out),
|
||||
.state()
|
||||
);
|
||||
|
||||
// Pipeline flush counter for matched filter (4-stage pipeline)
|
||||
reg [2:0] mf_flush_count;
|
||||
|
||||
// ============================================================================
|
||||
// OUTPUT REGISTERS
|
||||
// ============================================================================
|
||||
reg out_valid_reg;
|
||||
reg signed [15:0] out_i_reg, out_q_reg;
|
||||
|
||||
// ============================================================================
|
||||
// BRAM PORT: sig_buf — all address/we/wdata computed inline (race-free)
|
||||
// ============================================================================
|
||||
// Handles: IDLE/COLLECT writes, SIG_FFT/SIG_CAP capture writes,
|
||||
// SIG_FFT feed reads, MULTIPLY signal reads
|
||||
// No async reset in sensitivity list — enables Vivado BRAM inference.
|
||||
// ============================================================================
|
||||
always @(posedge clk) begin : sig_bram_port
|
||||
reg we;
|
||||
reg [ADDR_BITS-1:0] addr;
|
||||
reg signed [15:0] wdata_i, wdata_q;
|
||||
|
||||
// Defaults
|
||||
we = 1'b0;
|
||||
addr = 0;
|
||||
wdata_i = 0;
|
||||
wdata_q = 0;
|
||||
|
||||
case (state)
|
||||
ST_IDLE: begin
|
||||
if (adc_valid) begin
|
||||
we = 1'b1;
|
||||
addr = 0;
|
||||
wdata_i = $signed(adc_data_i);
|
||||
wdata_q = $signed(adc_data_q);
|
||||
end
|
||||
end
|
||||
ST_COLLECT: begin
|
||||
if (adc_valid && collect_count < FFT_SIZE) begin
|
||||
we = 1'b1;
|
||||
addr = collect_count[ADDR_BITS-1:0];
|
||||
wdata_i = $signed(adc_data_i);
|
||||
wdata_q = $signed(adc_data_q);
|
||||
end
|
||||
end
|
||||
ST_SIG_FFT: begin
|
||||
if (feed_count < FFT_SIZE && !feed_primed) begin
|
||||
// Pre-read cycle: present address, no write
|
||||
addr = feed_count[ADDR_BITS-1:0];
|
||||
end else if (feed_count <= FFT_SIZE && feed_primed) begin
|
||||
// Primed: read address for NEXT sample (or hold last)
|
||||
if (feed_count < FFT_SIZE)
|
||||
addr = feed_count[ADDR_BITS-1:0];
|
||||
else
|
||||
addr = 0; // don't care, past last sample
|
||||
end
|
||||
// Capture FFT output (write) — happens after feeding is done
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
we = 1'b1;
|
||||
addr = cap_count[ADDR_BITS-1:0];
|
||||
wdata_i = fft_dout_re;
|
||||
wdata_q = fft_dout_im;
|
||||
end
|
||||
end
|
||||
ST_SIG_CAP: begin
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
we = 1'b1;
|
||||
addr = cap_count[ADDR_BITS-1:0];
|
||||
wdata_i = fft_dout_re;
|
||||
wdata_q = fft_dout_im;
|
||||
end
|
||||
end
|
||||
ST_MULTIPLY: begin
|
||||
// Read signal FFT results for conjugate multiply
|
||||
if (mult_count < FFT_SIZE && !mult_primed) begin
|
||||
addr = mult_count[ADDR_BITS-1:0];
|
||||
end else if (mult_count <= FFT_SIZE && mult_primed) begin
|
||||
if (mult_count < FFT_SIZE)
|
||||
addr = mult_count[ADDR_BITS-1:0];
|
||||
else
|
||||
addr = 0;
|
||||
end
|
||||
end
|
||||
default: begin
|
||||
// keep defaults
|
||||
end
|
||||
endcase
|
||||
|
||||
// BRAM write
|
||||
if (we) begin
|
||||
sig_buf_i[addr] <= wdata_i;
|
||||
sig_buf_q[addr] <= wdata_q;
|
||||
end
|
||||
// BRAM read (1-cycle latency)
|
||||
sig_rdata_i <= sig_buf_i[addr];
|
||||
sig_rdata_q <= sig_buf_q[addr];
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// BRAM PORT: ref_buf — all address/we/wdata computed inline (race-free)
|
||||
// ============================================================================
|
||||
// Handles: IDLE/COLLECT writes, REF_FFT/REF_CAP capture writes,
|
||||
// REF_FFT feed reads, MULTIPLY reference reads
|
||||
// ============================================================================
|
||||
always @(posedge clk) begin : ref_bram_port
|
||||
reg we;
|
||||
reg [ADDR_BITS-1:0] addr;
|
||||
reg signed [15:0] wdata_i, wdata_q;
|
||||
|
||||
// Defaults
|
||||
we = 1'b0;
|
||||
addr = 0;
|
||||
wdata_i = 0;
|
||||
wdata_q = 0;
|
||||
|
||||
case (state)
|
||||
ST_IDLE: begin
|
||||
if (adc_valid) begin
|
||||
we = 1'b1;
|
||||
addr = 0;
|
||||
wdata_i = $signed(long_chirp_real);
|
||||
wdata_q = $signed(long_chirp_imag);
|
||||
end
|
||||
end
|
||||
ST_COLLECT: begin
|
||||
if (adc_valid && collect_count < FFT_SIZE) begin
|
||||
we = 1'b1;
|
||||
addr = collect_count[ADDR_BITS-1:0];
|
||||
wdata_i = $signed(long_chirp_real);
|
||||
wdata_q = $signed(long_chirp_imag);
|
||||
end
|
||||
end
|
||||
ST_REF_FFT: begin
|
||||
if (feed_count < FFT_SIZE && !feed_primed) begin
|
||||
addr = feed_count[ADDR_BITS-1:0];
|
||||
end else if (feed_count <= FFT_SIZE && feed_primed) begin
|
||||
if (feed_count < FFT_SIZE)
|
||||
addr = feed_count[ADDR_BITS-1:0];
|
||||
else
|
||||
addr = 0;
|
||||
end
|
||||
// Capture FFT output
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
we = 1'b1;
|
||||
addr = cap_count[ADDR_BITS-1:0];
|
||||
wdata_i = fft_dout_re;
|
||||
wdata_q = fft_dout_im;
|
||||
end
|
||||
end
|
||||
ST_REF_CAP: begin
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
we = 1'b1;
|
||||
addr = cap_count[ADDR_BITS-1:0];
|
||||
wdata_i = fft_dout_re;
|
||||
wdata_q = fft_dout_im;
|
||||
end
|
||||
end
|
||||
ST_MULTIPLY: begin
|
||||
// Read reference FFT results for conjugate multiply
|
||||
if (mult_count < FFT_SIZE && !mult_primed) begin
|
||||
addr = mult_count[ADDR_BITS-1:0];
|
||||
end else if (mult_count <= FFT_SIZE && mult_primed) begin
|
||||
if (mult_count < FFT_SIZE)
|
||||
addr = mult_count[ADDR_BITS-1:0];
|
||||
else
|
||||
addr = 0;
|
||||
end
|
||||
end
|
||||
default: begin
|
||||
// keep defaults
|
||||
end
|
||||
endcase
|
||||
|
||||
// BRAM write
|
||||
if (we) begin
|
||||
ref_buf_i[addr] <= wdata_i;
|
||||
ref_buf_q[addr] <= wdata_q;
|
||||
end
|
||||
// BRAM read (1-cycle latency)
|
||||
ref_rdata_i <= ref_buf_i[addr];
|
||||
ref_rdata_q <= ref_buf_q[addr];
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// BRAM PORT: prod_buf — all address/we/wdata computed inline (race-free)
|
||||
// ============================================================================
|
||||
// Handles: MULTIPLY capture writes, INV_FFT/INV_CAP capture writes,
|
||||
// INV_FFT feed reads, OUTPUT reads
|
||||
// ============================================================================
|
||||
always @(posedge clk) begin : prod_bram_port
|
||||
reg we;
|
||||
reg [ADDR_BITS-1:0] addr;
|
||||
reg signed [15:0] wdata_i, wdata_q;
|
||||
|
||||
// Defaults
|
||||
we = 1'b0;
|
||||
addr = 0;
|
||||
wdata_i = 0;
|
||||
wdata_q = 0;
|
||||
|
||||
case (state)
|
||||
ST_MULTIPLY: begin
|
||||
// Capture conjugate multiply output
|
||||
if (mf_valid_out && cap_count < FFT_SIZE) begin
|
||||
we = 1'b1;
|
||||
addr = cap_count[ADDR_BITS-1:0];
|
||||
wdata_i = mf_out_re;
|
||||
wdata_q = mf_out_im;
|
||||
end
|
||||
end
|
||||
ST_INV_FFT: begin
|
||||
if (feed_count < FFT_SIZE && !feed_primed) begin
|
||||
addr = feed_count[ADDR_BITS-1:0];
|
||||
end else if (feed_count <= FFT_SIZE && feed_primed) begin
|
||||
if (feed_count < FFT_SIZE)
|
||||
addr = feed_count[ADDR_BITS-1:0];
|
||||
else
|
||||
addr = 0;
|
||||
end
|
||||
// Capture IFFT output
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
we = 1'b1;
|
||||
addr = cap_count[ADDR_BITS-1:0];
|
||||
wdata_i = fft_dout_re;
|
||||
wdata_q = fft_dout_im;
|
||||
end
|
||||
end
|
||||
ST_INV_CAP: begin
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
we = 1'b1;
|
||||
addr = cap_count[ADDR_BITS-1:0];
|
||||
wdata_i = fft_dout_re;
|
||||
wdata_q = fft_dout_im;
|
||||
end
|
||||
end
|
||||
ST_OUTPUT: begin
|
||||
// Read product buffer for output streaming
|
||||
if (out_count < FFT_SIZE && !out_primed) begin
|
||||
addr = out_count[ADDR_BITS-1:0];
|
||||
end else if (out_count <= FFT_SIZE && out_primed) begin
|
||||
if (out_count < FFT_SIZE)
|
||||
addr = out_count[ADDR_BITS-1:0];
|
||||
else
|
||||
addr = 0;
|
||||
end
|
||||
end
|
||||
default: begin
|
||||
// keep defaults
|
||||
end
|
||||
endcase
|
||||
|
||||
// BRAM write
|
||||
if (we) begin
|
||||
prod_buf_i[addr] <= wdata_i;
|
||||
prod_buf_q[addr] <= wdata_q;
|
||||
end
|
||||
// BRAM read (1-cycle latency)
|
||||
prod_rdata_i <= prod_buf_i[addr];
|
||||
prod_rdata_q <= prod_buf_q[addr];
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// MAIN FSM — no buffer array accesses here (all via BRAM ports above)
|
||||
// ============================================================================
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
if (!reset_n) begin
|
||||
state <= ST_IDLE;
|
||||
collect_count <= 0;
|
||||
feed_count <= 0;
|
||||
cap_count <= 0;
|
||||
mult_count <= 0;
|
||||
out_count <= 0;
|
||||
feed_primed <= 1'b0;
|
||||
mult_primed <= 1'b0;
|
||||
out_primed <= 1'b0;
|
||||
fft_start <= 1'b0;
|
||||
fft_inverse <= 1'b0;
|
||||
fft_din_re <= 0;
|
||||
fft_din_im <= 0;
|
||||
fft_din_valid <= 1'b0;
|
||||
mf_sig_re <= 0;
|
||||
mf_sig_im <= 0;
|
||||
mf_ref_re <= 0;
|
||||
mf_ref_im <= 0;
|
||||
mf_valid_in <= 1'b0;
|
||||
mf_flush_count <= 0;
|
||||
out_valid_reg <= 1'b0;
|
||||
out_i_reg <= 0;
|
||||
out_q_reg <= 0;
|
||||
end else begin
|
||||
// Defaults
|
||||
fft_start <= 1'b0;
|
||||
fft_din_valid <= 1'b0;
|
||||
mf_valid_in <= 1'b0;
|
||||
out_valid_reg <= 1'b0;
|
||||
|
||||
case (state)
|
||||
|
||||
// ================================================================
|
||||
ST_IDLE: begin
|
||||
collect_count <= 0;
|
||||
feed_primed <= 1'b0;
|
||||
mult_primed <= 1'b0;
|
||||
out_primed <= 1'b0;
|
||||
if (adc_valid) begin
|
||||
// First sample written by sig/ref BRAM ports (they see
|
||||
// state==ST_IDLE && adc_valid)
|
||||
collect_count <= 1;
|
||||
state <= ST_COLLECT;
|
||||
end
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// COLLECT: Gather 1024 ADC + reference samples
|
||||
// Writes happen in sig/ref BRAM ports (they see state==ST_COLLECT)
|
||||
// ================================================================
|
||||
ST_COLLECT: begin
|
||||
if (adc_valid && collect_count < FFT_SIZE) begin
|
||||
collect_count <= collect_count + 1;
|
||||
end
|
||||
|
||||
if (collect_count == FFT_SIZE) begin
|
||||
// All 1024 samples collected — start signal FFT
|
||||
state <= ST_SIG_FFT;
|
||||
fft_start <= 1'b1;
|
||||
fft_inverse <= 1'b0; // Forward FFT
|
||||
feed_count <= 0;
|
||||
cap_count <= 0;
|
||||
feed_primed <= 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// SIG_FFT: Feed signal buffer to FFT engine (forward)
|
||||
// BRAM read has 1-cycle latency: address presented in BRAM port,
|
||||
// data available in sig_rdata_i/q next cycle.
|
||||
// ================================================================
|
||||
ST_SIG_FFT: begin
|
||||
// Feed phase: read sig_buf -> fft_din
|
||||
if (feed_count < FFT_SIZE) begin
|
||||
if (!feed_primed) begin
|
||||
// Pre-read cycle: address presented to BRAM, wait 1 cycle
|
||||
feed_primed <= 1'b1;
|
||||
feed_count <= feed_count + 1;
|
||||
// fft_din_valid stays 0 (default)
|
||||
end else begin
|
||||
// Primed: BRAM rdata is valid for previous address
|
||||
fft_din_re <= sig_rdata_i;
|
||||
fft_din_im <= sig_rdata_q;
|
||||
fft_din_valid <= 1'b1;
|
||||
feed_count <= feed_count + 1;
|
||||
end
|
||||
end else if (feed_count == FFT_SIZE && feed_primed) begin
|
||||
// Last sample: BRAM rdata has data for address 1023
|
||||
fft_din_re <= sig_rdata_i;
|
||||
fft_din_im <= sig_rdata_q;
|
||||
fft_din_valid <= 1'b1;
|
||||
feed_count <= feed_count + 1; // -> 1025, stops feeding
|
||||
end
|
||||
|
||||
// Capture FFT output (writes happen in BRAM port)
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
cap_count <= cap_count + 1;
|
||||
end
|
||||
|
||||
if (fft_done) begin
|
||||
state <= ST_SIG_CAP;
|
||||
end
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// SIG_CAP: Ensure all signal FFT outputs captured
|
||||
// ================================================================
|
||||
ST_SIG_CAP: begin
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
cap_count <= cap_count + 1;
|
||||
end
|
||||
|
||||
// Start reference FFT
|
||||
state <= ST_REF_FFT;
|
||||
fft_start <= 1'b1;
|
||||
fft_inverse <= 1'b0; // Forward FFT
|
||||
feed_count <= 0;
|
||||
cap_count <= 0;
|
||||
feed_primed <= 1'b0;
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// REF_FFT: Feed reference buffer to FFT engine (forward)
|
||||
// ================================================================
|
||||
ST_REF_FFT: begin
|
||||
// Feed phase: read ref_buf -> fft_din
|
||||
if (feed_count < FFT_SIZE) begin
|
||||
if (!feed_primed) begin
|
||||
feed_primed <= 1'b1;
|
||||
feed_count <= feed_count + 1;
|
||||
end else begin
|
||||
fft_din_re <= ref_rdata_i;
|
||||
fft_din_im <= ref_rdata_q;
|
||||
fft_din_valid <= 1'b1;
|
||||
feed_count <= feed_count + 1;
|
||||
end
|
||||
end else if (feed_count == FFT_SIZE && feed_primed) begin
|
||||
fft_din_re <= ref_rdata_i;
|
||||
fft_din_im <= ref_rdata_q;
|
||||
fft_din_valid <= 1'b1;
|
||||
feed_count <= feed_count + 1;
|
||||
end
|
||||
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
cap_count <= cap_count + 1;
|
||||
end
|
||||
|
||||
if (fft_done) begin
|
||||
state <= ST_REF_CAP;
|
||||
end
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// REF_CAP: Ensure all ref FFT outputs captured
|
||||
// ================================================================
|
||||
ST_REF_CAP: begin
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
cap_count <= cap_count + 1;
|
||||
end
|
||||
|
||||
state <= ST_MULTIPLY;
|
||||
mult_count <= 0;
|
||||
cap_count <= 0;
|
||||
mf_flush_count <= 0;
|
||||
mult_primed <= 1'b0;
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// MULTIPLY: Stream sig FFT and ref FFT through freq_matched_filter
|
||||
// Both sig_buf and ref_buf are read simultaneously (separate BRAM
|
||||
// ports). Pipeline latency = 4 clocks. Feed 1024 pairs, then flush.
|
||||
// ================================================================
|
||||
ST_MULTIPLY: begin
|
||||
if (mult_count < FFT_SIZE) begin
|
||||
if (!mult_primed) begin
|
||||
// Pre-read cycle
|
||||
mult_primed <= 1'b1;
|
||||
mult_count <= mult_count + 1;
|
||||
end else begin
|
||||
mf_sig_re <= sig_rdata_i;
|
||||
mf_sig_im <= sig_rdata_q;
|
||||
mf_ref_re <= ref_rdata_i;
|
||||
mf_ref_im <= ref_rdata_q;
|
||||
mf_valid_in <= 1'b1;
|
||||
mult_count <= mult_count + 1;
|
||||
end
|
||||
end else if (mult_count == FFT_SIZE && mult_primed) begin
|
||||
// Last sample
|
||||
mf_sig_re <= sig_rdata_i;
|
||||
mf_sig_im <= sig_rdata_q;
|
||||
mf_ref_re <= ref_rdata_i;
|
||||
mf_ref_im <= ref_rdata_q;
|
||||
mf_valid_in <= 1'b1;
|
||||
mult_count <= mult_count + 1;
|
||||
end else begin
|
||||
// Pipeline flush — wait for remaining outputs
|
||||
mf_flush_count <= mf_flush_count + 1;
|
||||
end
|
||||
|
||||
// Capture multiply outputs (writes happen in BRAM port)
|
||||
if (mf_valid_out && cap_count < FFT_SIZE) begin
|
||||
cap_count <= cap_count + 1;
|
||||
end
|
||||
|
||||
// Done when all outputs captured
|
||||
if (cap_count == FFT_SIZE) begin
|
||||
state <= ST_INV_FFT;
|
||||
fft_start <= 1'b1;
|
||||
fft_inverse <= 1'b1; // Inverse FFT
|
||||
feed_count <= 0;
|
||||
cap_count <= 0;
|
||||
feed_primed <= 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// INV_FFT: Feed product buffer to FFT engine (inverse)
|
||||
// ================================================================
|
||||
ST_INV_FFT: begin
|
||||
if (feed_count < FFT_SIZE) begin
|
||||
if (!feed_primed) begin
|
||||
feed_primed <= 1'b1;
|
||||
feed_count <= feed_count + 1;
|
||||
end else begin
|
||||
fft_din_re <= prod_rdata_i;
|
||||
fft_din_im <= prod_rdata_q;
|
||||
fft_din_valid <= 1'b1;
|
||||
feed_count <= feed_count + 1;
|
||||
end
|
||||
end else if (feed_count == FFT_SIZE && feed_primed) begin
|
||||
fft_din_re <= prod_rdata_i;
|
||||
fft_din_im <= prod_rdata_q;
|
||||
fft_din_valid <= 1'b1;
|
||||
feed_count <= feed_count + 1;
|
||||
end
|
||||
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
cap_count <= cap_count + 1;
|
||||
end
|
||||
|
||||
if (fft_done) begin
|
||||
state <= ST_INV_CAP;
|
||||
end
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// INV_CAP: Ensure all IFFT outputs captured
|
||||
// ================================================================
|
||||
ST_INV_CAP: begin
|
||||
if (fft_dout_valid && cap_count < FFT_SIZE) begin
|
||||
cap_count <= cap_count + 1;
|
||||
end
|
||||
|
||||
state <= ST_OUTPUT;
|
||||
out_count <= 0;
|
||||
out_primed <= 1'b0;
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// OUTPUT: Stream 1024 range profile samples
|
||||
// BRAM read latency: present address, data valid next cycle.
|
||||
// ================================================================
|
||||
ST_OUTPUT: begin
|
||||
if (out_count < FFT_SIZE) begin
|
||||
if (!out_primed) begin
|
||||
// Pre-read cycle
|
||||
out_primed <= 1'b1;
|
||||
out_count <= out_count + 1;
|
||||
end else begin
|
||||
out_i_reg <= prod_rdata_i;
|
||||
out_q_reg <= prod_rdata_q;
|
||||
out_valid_reg <= 1'b1;
|
||||
out_count <= out_count + 1;
|
||||
end
|
||||
end else if (out_count == FFT_SIZE && out_primed) begin
|
||||
// Last sample
|
||||
out_i_reg <= prod_rdata_i;
|
||||
out_q_reg <= prod_rdata_q;
|
||||
out_valid_reg <= 1'b1;
|
||||
out_count <= out_count + 1;
|
||||
end else begin
|
||||
state <= ST_DONE;
|
||||
end
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// DONE: Return to idle
|
||||
// ================================================================
|
||||
ST_DONE: begin
|
||||
state <= ST_IDLE;
|
||||
end
|
||||
|
||||
default: state <= ST_IDLE;
|
||||
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// OUTPUT ASSIGNMENTS
|
||||
// ============================================================================
|
||||
assign range_profile_i = out_i_reg;
|
||||
assign range_profile_q = out_q_reg;
|
||||
assign range_profile_valid = out_valid_reg;
|
||||
assign chain_state = state;
|
||||
|
||||
// ============================================================================
|
||||
// BUFFER INIT (for simulation — Vivado ignores initial blocks on arrays)
|
||||
// ============================================================================
|
||||
integer init_idx;
|
||||
initial begin
|
||||
for (init_idx = 0; init_idx < FFT_SIZE; init_idx = init_idx + 1) begin
|
||||
sig_buf_i[init_idx] = 0;
|
||||
sig_buf_q[init_idx] = 0;
|
||||
ref_buf_i[init_idx] = 0;
|
||||
ref_buf_q[init_idx] = 0;
|
||||
prod_buf_i[init_idx] = 0;
|
||||
prod_buf_q[init_idx] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
|
||||
@@ -0,0 +1,526 @@
|
||||
`timescale 1ns / 1ps
|
||||
|
||||
/**
|
||||
* tb_fft_engine.v
|
||||
*
|
||||
* Testbench for the synthesizable FFT engine.
|
||||
* Tests with N=32 first (fast), then validates key properties.
|
||||
*
|
||||
* Test Groups:
|
||||
* 1. Impulse response: FFT of delta[0] should be all 1s
|
||||
* 2. DC input: FFT of all-1s should be delta at bin 0
|
||||
* 3. Single tone: FFT of cos(2*pi*k/N) should peak at bin k
|
||||
* 4. Roundtrip: FFT then IFFT should recover original
|
||||
* 5. Linearity: FFT(a+b) ~= FFT(a) + FFT(b)
|
||||
*
|
||||
* Convention: standard check task with pass/fail tracking.
|
||||
*/
|
||||
|
||||
module tb_fft_engine;
|
||||
|
||||
// ============================================================================
|
||||
// PARAMETERS — test with 32-pt for speed
|
||||
// ============================================================================
|
||||
localparam N = 32;
|
||||
localparam LOG2N = 5;
|
||||
localparam DATA_W = 16;
|
||||
localparam INT_W = 32;
|
||||
localparam TW_W = 16;
|
||||
localparam CLK_PERIOD = 10;
|
||||
|
||||
// ============================================================================
|
||||
// SIGNALS
|
||||
// ============================================================================
|
||||
reg clk, reset_n;
|
||||
reg start, inverse;
|
||||
reg signed [DATA_W-1:0] din_re, din_im;
|
||||
reg din_valid;
|
||||
wire signed [DATA_W-1:0] dout_re, dout_im;
|
||||
wire dout_valid, busy, done_sig;
|
||||
|
||||
// ============================================================================
|
||||
// DUT
|
||||
// ============================================================================
|
||||
fft_engine #(
|
||||
.N(N),
|
||||
.LOG2N(LOG2N),
|
||||
.DATA_W(DATA_W),
|
||||
.INTERNAL_W(INT_W),
|
||||
.TWIDDLE_W(TW_W),
|
||||
.TWIDDLE_FILE("fft_twiddle_32.mem")
|
||||
) dut (
|
||||
.clk(clk),
|
||||
.reset_n(reset_n),
|
||||
.start(start),
|
||||
.inverse(inverse),
|
||||
.din_re(din_re),
|
||||
.din_im(din_im),
|
||||
.din_valid(din_valid),
|
||||
.dout_re(dout_re),
|
||||
.dout_im(dout_im),
|
||||
.dout_valid(dout_valid),
|
||||
.busy(busy),
|
||||
.done(done_sig)
|
||||
);
|
||||
|
||||
// ============================================================================
|
||||
// CLOCK
|
||||
// ============================================================================
|
||||
initial clk = 0;
|
||||
always #(CLK_PERIOD/2) clk = ~clk;
|
||||
|
||||
// ============================================================================
|
||||
// PASS/FAIL TRACKING
|
||||
// ============================================================================
|
||||
integer pass_count, fail_count;
|
||||
|
||||
task check;
|
||||
input cond;
|
||||
input [512*8-1:0] label;
|
||||
begin
|
||||
if (cond) begin
|
||||
$display(" [PASS] %0s", label);
|
||||
pass_count = pass_count + 1;
|
||||
end else begin
|
||||
$display(" [FAIL] %0s", label);
|
||||
fail_count = fail_count + 1;
|
||||
end
|
||||
end
|
||||
endtask
|
||||
|
||||
// ============================================================================
|
||||
// STORAGE FOR CAPTURED OUTPUTS
|
||||
// ============================================================================
|
||||
reg signed [DATA_W-1:0] out_re [0:N-1];
|
||||
reg signed [DATA_W-1:0] out_im [0:N-1];
|
||||
integer out_idx;
|
||||
|
||||
// Second set for roundtrip
|
||||
reg signed [DATA_W-1:0] out2_re [0:N-1];
|
||||
reg signed [DATA_W-1:0] out2_im [0:N-1];
|
||||
|
||||
// Input storage for roundtrip comparison
|
||||
reg signed [DATA_W-1:0] in_re [0:N-1];
|
||||
reg signed [DATA_W-1:0] in_im [0:N-1];
|
||||
|
||||
// ============================================================================
|
||||
// HELPER TASKS
|
||||
// ============================================================================
|
||||
|
||||
// Reset
|
||||
task do_reset;
|
||||
begin
|
||||
reset_n = 0;
|
||||
start = 0;
|
||||
inverse = 0;
|
||||
din_re = 0;
|
||||
din_im = 0;
|
||||
din_valid = 0;
|
||||
repeat(5) @(posedge clk); #1;
|
||||
reset_n = 1;
|
||||
repeat(2) @(posedge clk); #1;
|
||||
end
|
||||
endtask
|
||||
|
||||
// Run FFT: load N samples from in_re/in_im arrays, capture output to out_re/out_im
|
||||
task run_fft;
|
||||
input inv;
|
||||
integer i;
|
||||
begin
|
||||
inverse = inv;
|
||||
@(posedge clk); #1;
|
||||
start = 1;
|
||||
@(posedge clk); #1;
|
||||
start = 0;
|
||||
|
||||
// Feed N samples
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
din_re = in_re[i];
|
||||
din_im = in_im[i];
|
||||
din_valid = 1;
|
||||
@(posedge clk); #1;
|
||||
end
|
||||
din_valid = 0;
|
||||
din_re = 0;
|
||||
din_im = 0;
|
||||
|
||||
// Wait for output and capture
|
||||
out_idx = 0;
|
||||
while (out_idx < N) begin
|
||||
@(posedge clk); #1;
|
||||
if (dout_valid) begin
|
||||
out_re[out_idx] = dout_re;
|
||||
out_im[out_idx] = dout_im;
|
||||
out_idx = out_idx + 1;
|
||||
end
|
||||
end
|
||||
|
||||
// Wait for done
|
||||
@(posedge clk); #1;
|
||||
end
|
||||
endtask
|
||||
|
||||
// Run FFT and capture to out2 arrays
|
||||
task run_fft_to_out2;
|
||||
input inv;
|
||||
integer i;
|
||||
begin
|
||||
inverse = inv;
|
||||
@(posedge clk); #1;
|
||||
start = 1;
|
||||
@(posedge clk); #1;
|
||||
start = 0;
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
din_re = in_re[i];
|
||||
din_im = in_im[i];
|
||||
din_valid = 1;
|
||||
@(posedge clk); #1;
|
||||
end
|
||||
din_valid = 0;
|
||||
din_re = 0;
|
||||
din_im = 0;
|
||||
|
||||
out_idx = 0;
|
||||
while (out_idx < N) begin
|
||||
@(posedge clk); #1;
|
||||
if (dout_valid) begin
|
||||
out2_re[out_idx] = dout_re;
|
||||
out2_im[out_idx] = dout_im;
|
||||
out_idx = out_idx + 1;
|
||||
end
|
||||
end
|
||||
@(posedge clk); #1;
|
||||
end
|
||||
endtask
|
||||
|
||||
// ============================================================================
|
||||
// VCD + CSV
|
||||
// ============================================================================
|
||||
initial begin
|
||||
$dumpfile("tb_fft_engine.vcd");
|
||||
$dumpvars(0, tb_fft_engine);
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// MAIN TEST
|
||||
// ============================================================================
|
||||
integer i, j;
|
||||
integer max_mag_bin;
|
||||
reg signed [31:0] max_mag;
|
||||
reg signed [31:0] mag;
|
||||
reg signed [31:0] err;
|
||||
integer max_err;
|
||||
integer total_energy_in, total_energy_out;
|
||||
|
||||
// For tone generation
|
||||
real angle;
|
||||
reg signed [DATA_W-1:0] cos_val;
|
||||
|
||||
initial begin
|
||||
pass_count = 0;
|
||||
fail_count = 0;
|
||||
|
||||
$display("============================================================");
|
||||
$display(" FFT Engine Testbench — N=%0d", N);
|
||||
$display("============================================================");
|
||||
|
||||
do_reset;
|
||||
|
||||
// ================================================================
|
||||
// TEST GROUP 1: Impulse Response
|
||||
// FFT(delta[0]) should give all bins = 1 (in_re[0]=1, rest=0)
|
||||
// Since input is Q15-ish (16-bit signed), use amplitude = 1000
|
||||
// FFT of impulse with amplitude A: all bins = A
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test Group 1: Impulse Response ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
in_re[i] = (i == 0) ? 16'sd1000 : 16'sd0;
|
||||
in_im[i] = 16'sd0;
|
||||
end
|
||||
|
||||
run_fft(0); // Forward FFT
|
||||
|
||||
// All bins should have re ~= 1000, im ~= 0
|
||||
max_err = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
err = out_re[i] - 1000;
|
||||
if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
err = out_im[i];
|
||||
if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
end
|
||||
$display(" Impulse FFT max error from expected: %0d", max_err);
|
||||
check(max_err < 10, "Impulse FFT: all bins ~= input amplitude");
|
||||
check(out_re[0] == 1000 || (out_re[0] >= 998 && out_re[0] <= 1002),
|
||||
"Impulse FFT: bin 0 real ~= 1000");
|
||||
|
||||
// ================================================================
|
||||
// TEST GROUP 2: DC Input
|
||||
// FFT of constant value A across all N samples:
|
||||
// bin 0 = A*N, all other bins = 0
|
||||
// Use amplitude 100 so bin 0 = 100*32 = 3200
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test Group 2: DC Input ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
in_re[i] = 16'sd100;
|
||||
in_im[i] = 16'sd0;
|
||||
end
|
||||
|
||||
run_fft(0);
|
||||
|
||||
$display(" DC FFT bin[0] = %0d + j%0d (expect %0d + j0)", out_re[0], out_im[0], 100*N);
|
||||
// Q15 twiddle rounding over N butterflies can cause ~1% error
|
||||
check(out_re[0] >= (100*N - 50) && out_re[0] <= (100*N + 50),
|
||||
"DC FFT: bin 0 real ~= A*N (1.5% tol)");
|
||||
|
||||
max_err = 0;
|
||||
for (i = 1; i < N; i = i + 1) begin
|
||||
mag = out_re[i] * out_re[i] + out_im[i] * out_im[i];
|
||||
if (out_re[i] > max_err || -out_re[i] > max_err)
|
||||
max_err = (out_re[i] > 0) ? out_re[i] : -out_re[i];
|
||||
if (out_im[i] > max_err || -out_im[i] > max_err)
|
||||
max_err = (out_im[i] > 0) ? out_im[i] : -out_im[i];
|
||||
end
|
||||
$display(" DC FFT max non-DC bin magnitude: %0d", max_err);
|
||||
check(max_err < 20, "DC FFT: non-DC bins ~= 0 (Q15 rounding tol)");
|
||||
|
||||
// ================================================================
|
||||
// TEST GROUP 3: Single Tone (cosine at bin 4)
|
||||
// cos(2*pi*4*n/32) -> peaks at bins 4 and N-4=28
|
||||
// Amplitude 1000 -> each peak = 1000*N/2 = 16000
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test Group 3: Single Tone (bin 4) ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
// cos(2*pi*4*i/32) in Q15-ish
|
||||
angle = 6.28318530718 * 4.0 * i / 32.0;
|
||||
cos_val = $rtoi($cos(angle) * 1000.0);
|
||||
in_re[i] = cos_val;
|
||||
in_im[i] = 16'sd0;
|
||||
end
|
||||
|
||||
run_fft(0);
|
||||
|
||||
// Find peak bin
|
||||
max_mag = 0;
|
||||
max_mag_bin = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
mag = out_re[i] * out_re[i] + out_im[i] * out_im[i];
|
||||
if (mag > max_mag) begin
|
||||
max_mag = mag;
|
||||
max_mag_bin = i;
|
||||
end
|
||||
end
|
||||
$display(" Tone FFT peak bin: %0d (expect 4)", max_mag_bin);
|
||||
$display(" Tone FFT bin[4] = %0d + j%0d", out_re[4], out_im[4]);
|
||||
$display(" Tone FFT bin[28] = %0d + j%0d", out_re[28], out_im[28]);
|
||||
check(max_mag_bin == 4 || max_mag_bin == 28,
|
||||
"Tone FFT: peak at bin 4 or 28");
|
||||
// Bin 4 and 28 should have magnitude ~= N/2 * 1000 = 16000
|
||||
mag = out_re[4] * out_re[4] + out_im[4] * out_im[4];
|
||||
check(mag > 15000*15000 && mag < 17000*17000,
|
||||
"Tone FFT: bin 4 magnitude ~= 16000");
|
||||
|
||||
// ================================================================
|
||||
// TEST GROUP 4: Roundtrip (FFT then IFFT = identity)
|
||||
// Load random-ish data, FFT, IFFT, compare to original
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test Group 4: Roundtrip (FFT->IFFT) ---");
|
||||
|
||||
// Use a simple deterministic pattern
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
in_re[i] = (i * 137 + 42) % 2001 - 1000; // [-1000, 1000]
|
||||
in_im[i] = (i * 251 + 17) % 2001 - 1000;
|
||||
end
|
||||
|
||||
// Forward FFT
|
||||
run_fft(0);
|
||||
|
||||
// Copy FFT output as input for IFFT
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
in_re[i] = out_re[i];
|
||||
in_im[i] = out_im[i];
|
||||
end
|
||||
|
||||
// Save original input for comparison
|
||||
// (we need to recompute since in_re was overwritten)
|
||||
|
||||
// Actually let's redo: store originals first
|
||||
// We'll do it properly with separate storage
|
||||
|
||||
// Re-do: load original pattern
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
out2_re[i] = (i * 137 + 42) % 2001 - 1000;
|
||||
out2_im[i] = (i * 251 + 17) % 2001 - 1000;
|
||||
end
|
||||
|
||||
// Now in_re/in_im has FFT output. Run IFFT.
|
||||
run_fft(1);
|
||||
|
||||
// out_re/out_im should match original (out2_re/out2_im) within tolerance
|
||||
max_err = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
err = out_re[i] - out2_re[i];
|
||||
if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
err = out_im[i] - out2_im[i];
|
||||
if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
end
|
||||
$display(" Roundtrip max error: %0d", max_err);
|
||||
check(max_err < 20, "Roundtrip: FFT->IFFT recovers original (err < 20)");
|
||||
check(max_err < 5, "Roundtrip: FFT->IFFT tight tolerance (err < 5)");
|
||||
|
||||
// Print first few samples for debugging
|
||||
$display(" Sample comparison (idx: original vs recovered):");
|
||||
for (i = 0; i < 8; i = i + 1) begin
|
||||
$display(" [%0d] re: %0d vs %0d, im: %0d vs %0d",
|
||||
i, out2_re[i], out_re[i], out2_im[i], out_im[i]);
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// TEST GROUP 5: IFFT of impulse
|
||||
// IFFT(delta[0]) = 1/N for all bins -> should be ~1 for amplitude N
|
||||
// Input: bin[0] = N (=32), rest = 0
|
||||
// IFFT output: all samples = 1
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test Group 5: IFFT of Impulse ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
in_re[i] = (i == 0) ? N : 16'sd0;
|
||||
in_im[i] = 16'sd0;
|
||||
end
|
||||
|
||||
run_fft(1); // Inverse FFT
|
||||
|
||||
max_err = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
err = out_re[i] - 1;
|
||||
if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
err = out_im[i];
|
||||
if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
end
|
||||
$display(" IFFT impulse max error: %0d", max_err);
|
||||
check(max_err < 2, "IFFT impulse: all samples ~= 1");
|
||||
|
||||
// ================================================================
|
||||
// TEST GROUP 6: Parseval's theorem (energy conservation)
|
||||
// Sum |x[n]|^2 should equal (1/N) * Sum |X[k]|^2
|
||||
// We compare N * sum_time vs sum_freq
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test Group 6: Parseval's Theorem ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
in_re[i] = (i * 137 + 42) % 2001 - 1000;
|
||||
in_im[i] = (i * 251 + 17) % 2001 - 1000;
|
||||
end
|
||||
|
||||
// Compute time-domain energy
|
||||
total_energy_in = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
total_energy_in = total_energy_in + in_re[i] * in_re[i] + in_im[i] * in_im[i];
|
||||
end
|
||||
|
||||
run_fft(0);
|
||||
|
||||
// Compute frequency-domain energy
|
||||
total_energy_out = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
total_energy_out = total_energy_out + out_re[i] * out_re[i] + out_im[i] * out_im[i];
|
||||
end
|
||||
|
||||
// Parseval: sum_time = (1/N) * sum_freq => N * sum_time = sum_freq
|
||||
$display(" Time energy * N = %0d", total_energy_in * N);
|
||||
$display(" Freq energy = %0d", total_energy_out);
|
||||
// Allow some tolerance for fixed-point rounding
|
||||
err = total_energy_in * N - total_energy_out;
|
||||
if (err < 0) err = -err;
|
||||
$display(" Parseval error = %0d", err);
|
||||
// Relative error
|
||||
if (total_energy_in * N > 0) begin
|
||||
$display(" Parseval rel error = %0d%%", (err * 100) / (total_energy_in * N));
|
||||
check((err * 100) / (total_energy_in * N) < 5,
|
||||
"Parseval: energy conserved within 5%");
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// TEST GROUP 7: Pure imaginary input
|
||||
// FFT of j*sin(2*pi*2*n/N) -> peaks at bins 2 and N-2
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test Group 7: Pure Imaginary Tone (bin 2) ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
in_re[i] = 16'sd0;
|
||||
angle = 6.28318530718 * 2.0 * i / 32.0;
|
||||
in_im[i] = $rtoi($sin(angle) * 1000.0);
|
||||
end
|
||||
|
||||
run_fft(0);
|
||||
|
||||
// Find peak
|
||||
max_mag = 0;
|
||||
max_mag_bin = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
mag = out_re[i] * out_re[i] + out_im[i] * out_im[i];
|
||||
if (mag > max_mag) begin
|
||||
max_mag = mag;
|
||||
max_mag_bin = i;
|
||||
end
|
||||
end
|
||||
$display(" Imag tone peak bin: %0d (expect 2 or 30)", max_mag_bin);
|
||||
check(max_mag_bin == 2 || max_mag_bin == 30,
|
||||
"Imag tone: peak at bin 2 or 30");
|
||||
|
||||
// ================================================================
|
||||
// TEST GROUP 8: Zero input
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test Group 8: Zero Input ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
in_re[i] = 16'sd0;
|
||||
in_im[i] = 16'sd0;
|
||||
end
|
||||
|
||||
run_fft(0);
|
||||
|
||||
max_err = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
err = out_re[i];
|
||||
if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
err = out_im[i];
|
||||
if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
end
|
||||
check(max_err == 0, "Zero input: all output bins = 0");
|
||||
|
||||
// ================================================================
|
||||
// SUMMARY
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("============================================================");
|
||||
$display(" RESULTS: %0d/%0d passed", pass_count, pass_count + fail_count);
|
||||
if (fail_count == 0)
|
||||
$display(" ALL TESTS PASSED");
|
||||
else
|
||||
$display(" SOME TESTS FAILED");
|
||||
$display("============================================================");
|
||||
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -0,0 +1,543 @@
|
||||
`timescale 1ns / 1ps
|
||||
|
||||
/**
|
||||
* tb_mf_chain_synth.v
|
||||
*
|
||||
* Testbench for the SYNTHESIS branch of matched_filter_processing_chain.v.
|
||||
* This is compiled WITHOUT -DSIMULATION so the `else` branch (fft_engine-based)
|
||||
* is activated.
|
||||
*
|
||||
* The synthesis branch uses an iterative fft_engine (1024-pt, single butterfly),
|
||||
* so processing takes ~40K+ clock cycles per frame. Timeouts are set accordingly.
|
||||
*/
|
||||
|
||||
module tb_mf_chain_synth;
|
||||
|
||||
// ── Parameters ─────────────────────────────────────────────
|
||||
localparam CLK_PERIOD = 10.0; // 100 MHz
|
||||
localparam FFT_SIZE = 1024;
|
||||
// Timeout for full frame processing:
|
||||
// 3 FFTs × ~12K cycles each + multiply ~1K + overhead ≈ 40K
|
||||
// Use 200K for safety margin
|
||||
localparam FRAME_TIMEOUT = 200000;
|
||||
|
||||
// ── Signals ────────────────────────────────────────────────
|
||||
reg clk;
|
||||
reg reset_n;
|
||||
reg [15:0] adc_data_i;
|
||||
reg [15:0] adc_data_q;
|
||||
reg adc_valid;
|
||||
reg [5:0] chirp_counter;
|
||||
reg [15:0] long_chirp_real;
|
||||
reg [15:0] long_chirp_imag;
|
||||
reg [15:0] short_chirp_real;
|
||||
reg [15:0] short_chirp_imag;
|
||||
wire signed [15:0] range_profile_i;
|
||||
wire signed [15:0] range_profile_q;
|
||||
wire range_profile_valid;
|
||||
wire [3:0] chain_state;
|
||||
|
||||
// ── Test bookkeeping ───────────────────────────────────────
|
||||
integer pass_count;
|
||||
integer fail_count;
|
||||
integer test_num;
|
||||
integer i;
|
||||
|
||||
// Synthesis-branch states (mirror DUT)
|
||||
localparam [3:0] ST_IDLE = 4'd0,
|
||||
ST_COLLECT = 4'd1,
|
||||
ST_SIG_FFT = 4'd2,
|
||||
ST_SIG_CAP = 4'd3,
|
||||
ST_REF_FFT = 4'd4,
|
||||
ST_REF_CAP = 4'd5,
|
||||
ST_MULTIPLY = 4'd6,
|
||||
ST_INV_FFT = 4'd7,
|
||||
ST_INV_CAP = 4'd8,
|
||||
ST_OUTPUT = 4'd9,
|
||||
ST_DONE = 4'd10;
|
||||
|
||||
// ── Concurrent output capture ──────────────────────────────
|
||||
integer cap_count;
|
||||
reg cap_enable;
|
||||
integer cap_max_abs;
|
||||
integer cap_peak_bin;
|
||||
integer cap_cur_abs;
|
||||
|
||||
// Output capture arrays
|
||||
reg signed [15:0] cap_out_i [0:1023];
|
||||
reg signed [15:0] cap_out_q [0:1023];
|
||||
|
||||
// ── Clock ──────────────────────────────────────────────────
|
||||
always #(CLK_PERIOD/2) clk = ~clk;
|
||||
|
||||
// ── DUT ────────────────────────────────────────────────────
|
||||
matched_filter_processing_chain uut (
|
||||
.clk (clk),
|
||||
.reset_n (reset_n),
|
||||
.adc_data_i (adc_data_i),
|
||||
.adc_data_q (adc_data_q),
|
||||
.adc_valid (adc_valid),
|
||||
.chirp_counter (chirp_counter),
|
||||
.long_chirp_real (long_chirp_real),
|
||||
.long_chirp_imag (long_chirp_imag),
|
||||
.short_chirp_real (short_chirp_real),
|
||||
.short_chirp_imag (short_chirp_imag),
|
||||
.range_profile_i (range_profile_i),
|
||||
.range_profile_q (range_profile_q),
|
||||
.range_profile_valid (range_profile_valid),
|
||||
.chain_state (chain_state)
|
||||
);
|
||||
|
||||
// ── Concurrent output capture block ────────────────────────
|
||||
always @(posedge clk) begin
|
||||
#1;
|
||||
if (cap_enable && range_profile_valid) begin
|
||||
if (cap_count < FFT_SIZE) begin
|
||||
cap_out_i[cap_count] = range_profile_i;
|
||||
cap_out_q[cap_count] = range_profile_q;
|
||||
end
|
||||
cap_cur_abs = (range_profile_i[15] ? -range_profile_i : range_profile_i)
|
||||
+ (range_profile_q[15] ? -range_profile_q : range_profile_q);
|
||||
if (cap_cur_abs > cap_max_abs) begin
|
||||
cap_max_abs = cap_cur_abs;
|
||||
cap_peak_bin = cap_count;
|
||||
end
|
||||
cap_count = cap_count + 1;
|
||||
end
|
||||
end
|
||||
|
||||
// ── Check task ─────────────────────────────────────────────
|
||||
task check;
|
||||
input cond;
|
||||
input [511:0] label;
|
||||
begin
|
||||
test_num = test_num + 1;
|
||||
if (cond) begin
|
||||
$display("[PASS] Test %0d: %0s", test_num, label);
|
||||
pass_count = pass_count + 1;
|
||||
end else begin
|
||||
$display("[FAIL] Test %0d: %0s", test_num, label);
|
||||
fail_count = fail_count + 1;
|
||||
end
|
||||
end
|
||||
endtask
|
||||
|
||||
// ── Helper: apply reset ────────────────────────────────────
|
||||
task apply_reset;
|
||||
begin
|
||||
reset_n = 0;
|
||||
adc_valid = 0;
|
||||
adc_data_i = 16'd0;
|
||||
adc_data_q = 16'd0;
|
||||
chirp_counter = 6'd0;
|
||||
long_chirp_real = 16'd0;
|
||||
long_chirp_imag = 16'd0;
|
||||
short_chirp_real = 16'd0;
|
||||
short_chirp_imag = 16'd0;
|
||||
cap_enable = 0;
|
||||
cap_count = 0;
|
||||
cap_max_abs = 0;
|
||||
cap_peak_bin = -1;
|
||||
repeat (4) @(posedge clk);
|
||||
reset_n = 1;
|
||||
@(posedge clk);
|
||||
#1;
|
||||
end
|
||||
endtask
|
||||
|
||||
// ── Helper: start capture ──────────────────────────────────
|
||||
task start_capture;
|
||||
begin
|
||||
cap_count = 0;
|
||||
cap_max_abs = 0;
|
||||
cap_peak_bin = -1;
|
||||
cap_enable = 1;
|
||||
end
|
||||
endtask
|
||||
|
||||
// ── Helper: wait for IDLE with long timeout ────────────────
|
||||
task wait_for_idle;
|
||||
integer wait_count;
|
||||
begin
|
||||
wait_count = 0;
|
||||
while (chain_state != ST_IDLE && wait_count < FRAME_TIMEOUT) begin
|
||||
@(posedge clk);
|
||||
wait_count = wait_count + 1;
|
||||
end
|
||||
#1;
|
||||
if (wait_count >= FRAME_TIMEOUT)
|
||||
$display(" WARNING: wait_for_idle timed out at %0d cycles", wait_count);
|
||||
end
|
||||
endtask
|
||||
|
||||
// ── Helper: feed DC frame ──────────────────────────────────
|
||||
task feed_dc_frame;
|
||||
integer k;
|
||||
begin
|
||||
for (k = 0; k < FFT_SIZE; k = k + 1) begin
|
||||
adc_data_i = 16'sh1000; // +4096
|
||||
adc_data_q = 16'sh0000;
|
||||
long_chirp_real = 16'sh1000;
|
||||
long_chirp_imag = 16'sh0000;
|
||||
short_chirp_real = 16'd0;
|
||||
short_chirp_imag = 16'd0;
|
||||
adc_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
#1;
|
||||
end
|
||||
adc_valid = 1'b0;
|
||||
end
|
||||
endtask
|
||||
|
||||
// ── Helper: feed tone frame (signal=reference=tone at bin) ─
|
||||
task feed_tone_frame;
|
||||
input integer tone_bin;
|
||||
integer k;
|
||||
real angle;
|
||||
begin
|
||||
for (k = 0; k < FFT_SIZE; k = k + 1) begin
|
||||
angle = 6.28318530718 * tone_bin * k / (1.0 * FFT_SIZE);
|
||||
adc_data_i = $rtoi(8000.0 * $cos(angle));
|
||||
adc_data_q = $rtoi(8000.0 * $sin(angle));
|
||||
long_chirp_real = $rtoi(8000.0 * $cos(angle));
|
||||
long_chirp_imag = $rtoi(8000.0 * $sin(angle));
|
||||
short_chirp_real = 16'd0;
|
||||
short_chirp_imag = 16'd0;
|
||||
adc_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
#1;
|
||||
end
|
||||
adc_valid = 1'b0;
|
||||
end
|
||||
endtask
|
||||
|
||||
// ── Helper: feed impulse frame (delta at sample 0) ─────────
|
||||
task feed_impulse_frame;
|
||||
integer k;
|
||||
begin
|
||||
for (k = 0; k < FFT_SIZE; k = k + 1) begin
|
||||
if (k == 0) begin
|
||||
adc_data_i = 16'sh4000; // 0.5 in Q15
|
||||
adc_data_q = 16'sh0000;
|
||||
long_chirp_real = 16'sh4000;
|
||||
long_chirp_imag = 16'sh0000;
|
||||
end else begin
|
||||
adc_data_i = 16'sh0000;
|
||||
adc_data_q = 16'sh0000;
|
||||
long_chirp_real = 16'sh0000;
|
||||
long_chirp_imag = 16'sh0000;
|
||||
end
|
||||
short_chirp_real = 16'd0;
|
||||
short_chirp_imag = 16'd0;
|
||||
adc_valid = 1'b1;
|
||||
@(posedge clk);
|
||||
#1;
|
||||
end
|
||||
adc_valid = 1'b0;
|
||||
end
|
||||
endtask
|
||||
|
||||
// ── Stimulus ───────────────────────────────────────────────
|
||||
initial begin
|
||||
$dumpfile("tb_mf_chain_synth.vcd");
|
||||
$dumpvars(0, tb_mf_chain_synth);
|
||||
|
||||
// Init
|
||||
clk = 0;
|
||||
pass_count = 0;
|
||||
fail_count = 0;
|
||||
test_num = 0;
|
||||
cap_enable = 0;
|
||||
cap_count = 0;
|
||||
cap_max_abs = 0;
|
||||
cap_peak_bin = -1;
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 1: Reset behaviour
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 1: Reset Behaviour ---");
|
||||
apply_reset;
|
||||
|
||||
reset_n = 0;
|
||||
repeat (4) @(posedge clk); #1;
|
||||
check(range_profile_valid === 1'b0, "range_profile_valid=0 during reset");
|
||||
check(chain_state === ST_IDLE, "chain_state=IDLE during reset");
|
||||
reset_n = 1;
|
||||
@(posedge clk); #1;
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 2: No valid input stays IDLE
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 2: No Valid Input → Stays IDLE ---");
|
||||
apply_reset;
|
||||
|
||||
repeat (100) @(posedge clk);
|
||||
#1;
|
||||
check(chain_state === ST_IDLE, "Stays in IDLE with no valid input");
|
||||
check(range_profile_valid === 1'b0, "No output when no input");
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 3: DC frame — state transitions and output count
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 3: DC Frame — Full Processing ---");
|
||||
apply_reset;
|
||||
|
||||
start_capture;
|
||||
feed_dc_frame;
|
||||
|
||||
$display(" Waiting for processing (3 FFTs + multiply)...");
|
||||
wait_for_idle;
|
||||
cap_enable = 0;
|
||||
|
||||
$display(" Output count: %0d (expected %0d)", cap_count, FFT_SIZE);
|
||||
$display(" Peak bin: %0d, magnitude: %0d", cap_peak_bin, cap_max_abs);
|
||||
check(cap_count == FFT_SIZE, "DC: Outputs exactly 1024 range profile samples");
|
||||
check(chain_state === ST_IDLE, "DC: Returns to IDLE after frame");
|
||||
// DC autocorrelation: FFT of DC = energy at bin 0 only
|
||||
// conj multiply = |bin0|^2 at bin 0, zeros elsewhere
|
||||
// IFFT of single bin = constant => peak at bin 0 (or any bin since all equal)
|
||||
// With Q15 truncation, expect non-zero output
|
||||
check(cap_max_abs > 0, "DC: Non-zero output");
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 4: Zero input → zero output
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 4: Zero Input → Zero Output ---");
|
||||
apply_reset;
|
||||
|
||||
start_capture;
|
||||
for (i = 0; i < FFT_SIZE; i = i + 1) begin
|
||||
adc_data_i = 16'd0;
|
||||
adc_data_q = 16'd0;
|
||||
long_chirp_real = 16'd0;
|
||||
long_chirp_imag = 16'd0;
|
||||
short_chirp_real = 16'd0;
|
||||
short_chirp_imag = 16'd0;
|
||||
adc_valid = 1'b1;
|
||||
@(posedge clk); #1;
|
||||
end
|
||||
adc_valid = 1'b0;
|
||||
|
||||
wait_for_idle;
|
||||
cap_enable = 0;
|
||||
|
||||
$display(" Output count: %0d", cap_count);
|
||||
$display(" Max magnitude: %0d", cap_max_abs);
|
||||
check(cap_count == FFT_SIZE, "Zero: Got 1024 output samples");
|
||||
// Allow small rounding noise (fft_engine Q15 rounding can produce ±1)
|
||||
check(cap_max_abs <= 2, "Zero: Output magnitude <= 2 (near zero)");
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 5: Tone autocorrelation (bin 5)
|
||||
// signal = reference = tone at bin 5
|
||||
// Autocorrelation peak at bin 0 (time lag 0)
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 5: Tone Autocorrelation (bin 5) ---");
|
||||
apply_reset;
|
||||
|
||||
start_capture;
|
||||
feed_tone_frame(5);
|
||||
|
||||
$display(" Waiting for processing...");
|
||||
wait_for_idle;
|
||||
cap_enable = 0;
|
||||
|
||||
$display(" Output count: %0d", cap_count);
|
||||
$display(" Peak bin: %0d, magnitude: %0d", cap_peak_bin, cap_max_abs);
|
||||
check(cap_count == FFT_SIZE, "Tone: Got 1024 output samples");
|
||||
// Autocorrelation of a pure tone: peak at bin 0
|
||||
check(cap_peak_bin <= 5 || cap_peak_bin >= FFT_SIZE - 5,
|
||||
"Tone: Autocorrelation peak near bin 0");
|
||||
check(cap_max_abs > 0, "Tone: Peak magnitude > 0");
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 6: Impulse autocorrelation
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 6: Impulse Autocorrelation ---");
|
||||
apply_reset;
|
||||
|
||||
start_capture;
|
||||
feed_impulse_frame;
|
||||
|
||||
$display(" Waiting for processing...");
|
||||
wait_for_idle;
|
||||
cap_enable = 0;
|
||||
|
||||
$display(" Output count: %0d", cap_count);
|
||||
$display(" Peak bin: %0d, magnitude: %0d", cap_peak_bin, cap_max_abs);
|
||||
check(cap_count == FFT_SIZE, "Impulse: Got 1024 output samples");
|
||||
check(cap_max_abs > 0, "Impulse: Non-zero output");
|
||||
check(chain_state === ST_IDLE, "Impulse: Returns to IDLE");
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 7: Reset mid-operation
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 7: Reset Mid-Operation ---");
|
||||
apply_reset;
|
||||
|
||||
// Feed ~512 samples (halfway through collection)
|
||||
for (i = 0; i < 512; i = i + 1) begin
|
||||
adc_data_i = 16'sh1000;
|
||||
adc_data_q = 16'sh0000;
|
||||
long_chirp_real = 16'sh1000;
|
||||
long_chirp_imag = 16'sh0000;
|
||||
short_chirp_real = 16'd0;
|
||||
short_chirp_imag = 16'd0;
|
||||
adc_valid = 1'b1;
|
||||
@(posedge clk); #1;
|
||||
end
|
||||
adc_valid = 1'b0;
|
||||
|
||||
// Assert reset
|
||||
reset_n = 0;
|
||||
repeat (4) @(posedge clk); #1;
|
||||
reset_n = 1;
|
||||
@(posedge clk); #1;
|
||||
|
||||
check(chain_state === ST_IDLE, "Mid-op reset: Returns to IDLE");
|
||||
check(range_profile_valid === 1'b0, "Mid-op reset: No output");
|
||||
|
||||
// Feed a complete frame after reset
|
||||
start_capture;
|
||||
feed_dc_frame;
|
||||
wait_for_idle;
|
||||
cap_enable = 0;
|
||||
|
||||
$display(" Post-reset frame: %0d outputs", cap_count);
|
||||
check(cap_count == FFT_SIZE, "Mid-op reset: Post-reset frame gives 1024 outputs");
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 8: Back-to-back frames
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 8: Back-to-Back Frames ---");
|
||||
apply_reset;
|
||||
|
||||
// Frame 1
|
||||
start_capture;
|
||||
feed_dc_frame;
|
||||
wait_for_idle;
|
||||
cap_enable = 0;
|
||||
$display(" Frame 1: %0d outputs, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs);
|
||||
check(cap_count == FFT_SIZE, "B2B Frame 1: 1024 outputs");
|
||||
|
||||
// Frame 2
|
||||
start_capture;
|
||||
feed_tone_frame(3);
|
||||
wait_for_idle;
|
||||
cap_enable = 0;
|
||||
$display(" Frame 2: %0d outputs, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs);
|
||||
check(cap_count == FFT_SIZE, "B2B Frame 2: 1024 outputs");
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 9: Mismatched signal vs reference
|
||||
// Signal at bin 5, reference at bin 10
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 9: Mismatched Signal vs Reference ---");
|
||||
apply_reset;
|
||||
|
||||
start_capture;
|
||||
for (i = 0; i < FFT_SIZE; i = i + 1) begin
|
||||
adc_data_i = $rtoi(8000.0 * $cos(6.28318530718 * 5 * i / 1024.0));
|
||||
adc_data_q = $rtoi(8000.0 * $sin(6.28318530718 * 5 * i / 1024.0));
|
||||
long_chirp_real = $rtoi(8000.0 * $cos(6.28318530718 * 10 * i / 1024.0));
|
||||
long_chirp_imag = $rtoi(8000.0 * $sin(6.28318530718 * 10 * i / 1024.0));
|
||||
short_chirp_real = 16'd0;
|
||||
short_chirp_imag = 16'd0;
|
||||
adc_valid = 1'b1;
|
||||
@(posedge clk); #1;
|
||||
end
|
||||
adc_valid = 1'b0;
|
||||
|
||||
wait_for_idle;
|
||||
cap_enable = 0;
|
||||
|
||||
$display(" Mismatched: peak bin=%0d, magnitude=%0d", cap_peak_bin, cap_max_abs);
|
||||
check(cap_count == FFT_SIZE, "Mismatch: Got 1024 output samples");
|
||||
// Signal=bin5, ref=bin10: product has energy at bin(5-10)=bin(-5)=bin(1019)
|
||||
// IFFT of that gives a tone at sample spacing of 5
|
||||
// The key check is that it completes and produces output
|
||||
check(cap_max_abs > 0, "Mismatch: Non-zero output");
|
||||
check(chain_state === ST_IDLE, "Mismatch: Returns to IDLE");
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 10: Saturation — max positive values
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 10: Saturation — Max Positive ---");
|
||||
apply_reset;
|
||||
|
||||
start_capture;
|
||||
for (i = 0; i < FFT_SIZE; i = i + 1) begin
|
||||
adc_data_i = 16'sh7FFF;
|
||||
adc_data_q = 16'sh7FFF;
|
||||
long_chirp_real = 16'sh7FFF;
|
||||
long_chirp_imag = 16'sh7FFF;
|
||||
short_chirp_real = 16'd0;
|
||||
short_chirp_imag = 16'd0;
|
||||
adc_valid = 1'b1;
|
||||
@(posedge clk); #1;
|
||||
end
|
||||
adc_valid = 1'b0;
|
||||
|
||||
wait_for_idle;
|
||||
cap_enable = 0;
|
||||
|
||||
$display(" Saturation: count=%0d, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs);
|
||||
check(cap_count == FFT_SIZE, "Saturation: Completes with 1024 outputs");
|
||||
check(chain_state === ST_IDLE, "Saturation: Returns to IDLE");
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// TEST GROUP 11: Valid-gap / stall test
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("\n--- Test Group 11: Valid-Gap Stall Test ---");
|
||||
apply_reset;
|
||||
|
||||
start_capture;
|
||||
for (i = 0; i < FFT_SIZE; i = i + 1) begin
|
||||
adc_data_i = 16'sh1000;
|
||||
adc_data_q = 16'sh0000;
|
||||
long_chirp_real = 16'sh1000;
|
||||
long_chirp_imag = 16'sh0000;
|
||||
short_chirp_real = 16'd0;
|
||||
short_chirp_imag = 16'd0;
|
||||
adc_valid = 1'b1;
|
||||
@(posedge clk); #1;
|
||||
|
||||
// Every 100 samples, insert a 10-cycle gap
|
||||
if ((i % 100) == 99 && i < FFT_SIZE - 1) begin : stall_block
|
||||
integer gap_j;
|
||||
adc_valid = 1'b0;
|
||||
for (gap_j = 0; gap_j < 10; gap_j = gap_j + 1) begin
|
||||
@(posedge clk); #1;
|
||||
end
|
||||
end
|
||||
end
|
||||
adc_valid = 1'b0;
|
||||
|
||||
wait_for_idle;
|
||||
cap_enable = 0;
|
||||
|
||||
$display(" Stall: count=%0d, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs);
|
||||
check(cap_count == FFT_SIZE, "Stall: 1024 outputs emitted");
|
||||
check(chain_state === ST_IDLE, "Stall: Returns to IDLE");
|
||||
|
||||
// ════════════════════════════════════════════════════════
|
||||
// Summary
|
||||
// ════════════════════════════════════════════════════════
|
||||
$display("");
|
||||
$display("========================================");
|
||||
$display(" MATCHED FILTER PROCESSING CHAIN");
|
||||
$display(" (SYNTHESIS BRANCH — fft_engine)");
|
||||
$display(" PASSED: %0d / %0d", pass_count, test_num);
|
||||
$display(" FAILED: %0d / %0d", fail_count, test_num);
|
||||
if (fail_count == 0)
|
||||
$display(" ** ALL TESTS PASSED **");
|
||||
else
|
||||
$display(" ** SOME TESTS FAILED **");
|
||||
$display("========================================");
|
||||
$display("");
|
||||
|
||||
#100;
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -0,0 +1,355 @@
|
||||
`timescale 1ns / 1ps
|
||||
|
||||
/**
|
||||
* tb_xfft_32.v
|
||||
*
|
||||
* Testbench for xfft_32 AXI-Stream FFT wrapper.
|
||||
* Verifies the wrapper correctly interfaces with fft_engine via AXI-Stream.
|
||||
*
|
||||
* Test Groups:
|
||||
* 1. Impulse response (all output bins = input amplitude)
|
||||
* 2. DC input (bin 0 = A*N, rest ~= 0)
|
||||
* 3. Single tone detection
|
||||
* 4. AXI-Stream handshake correctness (tvalid, tlast, tready)
|
||||
* 5. Back-to-back transforms (no state leakage)
|
||||
*/
|
||||
|
||||
module tb_xfft_32;
|
||||
|
||||
// ============================================================================
|
||||
// PARAMETERS
|
||||
// ============================================================================
|
||||
localparam N = 32;
|
||||
localparam CLK_PERIOD = 10;
|
||||
|
||||
// ============================================================================
|
||||
// SIGNALS
|
||||
// ============================================================================
|
||||
reg aclk, aresetn;
|
||||
reg [7:0] cfg_tdata;
|
||||
reg cfg_tvalid;
|
||||
wire cfg_tready;
|
||||
reg [31:0] din_tdata;
|
||||
reg din_tvalid;
|
||||
reg din_tlast;
|
||||
wire [31:0] dout_tdata;
|
||||
wire dout_tvalid;
|
||||
wire dout_tlast;
|
||||
reg dout_tready;
|
||||
|
||||
// ============================================================================
|
||||
// DUT
|
||||
// ============================================================================
|
||||
xfft_32 dut (
|
||||
.aclk(aclk),
|
||||
.aresetn(aresetn),
|
||||
.s_axis_config_tdata(cfg_tdata),
|
||||
.s_axis_config_tvalid(cfg_tvalid),
|
||||
.s_axis_config_tready(cfg_tready),
|
||||
.s_axis_data_tdata(din_tdata),
|
||||
.s_axis_data_tvalid(din_tvalid),
|
||||
.s_axis_data_tlast(din_tlast),
|
||||
.m_axis_data_tdata(dout_tdata),
|
||||
.m_axis_data_tvalid(dout_tvalid),
|
||||
.m_axis_data_tlast(dout_tlast),
|
||||
.m_axis_data_tready(dout_tready)
|
||||
);
|
||||
|
||||
// ============================================================================
|
||||
// CLOCK
|
||||
// ============================================================================
|
||||
initial aclk = 0;
|
||||
always #(CLK_PERIOD/2) aclk = ~aclk;
|
||||
|
||||
// ============================================================================
|
||||
// PASS/FAIL TRACKING
|
||||
// ============================================================================
|
||||
integer pass_count, fail_count;
|
||||
|
||||
task check;
|
||||
input cond;
|
||||
input [512*8-1:0] label;
|
||||
begin
|
||||
if (cond) begin
|
||||
$display(" [PASS] %0s", label);
|
||||
pass_count = pass_count + 1;
|
||||
end else begin
|
||||
$display(" [FAIL] %0s", label);
|
||||
fail_count = fail_count + 1;
|
||||
end
|
||||
end
|
||||
endtask
|
||||
|
||||
// ============================================================================
|
||||
// OUTPUT CAPTURE
|
||||
// ============================================================================
|
||||
reg signed [15:0] out_re [0:N-1];
|
||||
reg signed [15:0] out_im [0:N-1];
|
||||
integer out_idx;
|
||||
reg got_tlast;
|
||||
integer tlast_count;
|
||||
|
||||
// ============================================================================
|
||||
// HELPER TASKS
|
||||
// ============================================================================
|
||||
|
||||
task do_reset;
|
||||
begin
|
||||
aresetn = 0;
|
||||
cfg_tdata = 0;
|
||||
cfg_tvalid = 0;
|
||||
din_tdata = 0;
|
||||
din_tvalid = 0;
|
||||
din_tlast = 0;
|
||||
dout_tready = 1;
|
||||
repeat(5) @(posedge aclk);
|
||||
aresetn = 1;
|
||||
repeat(2) @(posedge aclk);
|
||||
end
|
||||
endtask
|
||||
|
||||
// Send config (forward FFT: tdata[0]=1)
|
||||
// Waits for cfg_tready (wrapper in S_IDLE) before sending
|
||||
task send_config;
|
||||
input [7:0] cfg;
|
||||
integer wait_cnt;
|
||||
begin
|
||||
// Wait for wrapper to be ready (S_IDLE)
|
||||
wait_cnt = 0;
|
||||
while (!cfg_tready && wait_cnt < 5000) begin
|
||||
@(posedge aclk);
|
||||
wait_cnt = wait_cnt + 1;
|
||||
end
|
||||
cfg_tdata = cfg;
|
||||
cfg_tvalid = 1;
|
||||
@(posedge aclk);
|
||||
cfg_tvalid = 0;
|
||||
cfg_tdata = 0;
|
||||
end
|
||||
endtask
|
||||
|
||||
// Feed N samples: each sample is {im[15:0], re[15:0]}
|
||||
// in_re_arr and in_im_arr must be pre-loaded
|
||||
reg signed [15:0] feed_re [0:N-1];
|
||||
reg signed [15:0] feed_im [0:N-1];
|
||||
|
||||
task feed_data;
|
||||
integer i;
|
||||
begin
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
din_tdata = {feed_im[i], feed_re[i]};
|
||||
din_tvalid = 1;
|
||||
din_tlast = (i == N - 1) ? 1 : 0;
|
||||
@(posedge aclk);
|
||||
end
|
||||
din_tvalid = 0;
|
||||
din_tlast = 0;
|
||||
din_tdata = 0;
|
||||
end
|
||||
endtask
|
||||
|
||||
// Capture N output samples
|
||||
task capture_output;
|
||||
integer timeout;
|
||||
begin
|
||||
out_idx = 0;
|
||||
got_tlast = 0;
|
||||
tlast_count = 0;
|
||||
timeout = 0;
|
||||
while (out_idx < N && timeout < 5000) begin
|
||||
@(posedge aclk);
|
||||
if (dout_tvalid && dout_tready) begin
|
||||
out_re[out_idx] = dout_tdata[15:0];
|
||||
out_im[out_idx] = dout_tdata[31:16];
|
||||
if (dout_tlast) begin
|
||||
got_tlast = 1;
|
||||
tlast_count = tlast_count + 1;
|
||||
end
|
||||
out_idx = out_idx + 1;
|
||||
end
|
||||
timeout = timeout + 1;
|
||||
end
|
||||
end
|
||||
endtask
|
||||
|
||||
// ============================================================================
|
||||
// VCD
|
||||
// ============================================================================
|
||||
initial begin
|
||||
$dumpfile("tb_xfft_32.vcd");
|
||||
$dumpvars(0, tb_xfft_32);
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// MAIN TEST
|
||||
// ============================================================================
|
||||
integer i;
|
||||
reg signed [31:0] err;
|
||||
integer max_err;
|
||||
integer max_mag_bin;
|
||||
reg signed [31:0] max_mag, mag;
|
||||
real angle;
|
||||
|
||||
initial begin
|
||||
pass_count = 0;
|
||||
fail_count = 0;
|
||||
|
||||
$display("============================================================");
|
||||
$display(" xfft_32 AXI-Stream Wrapper Testbench");
|
||||
$display("============================================================");
|
||||
|
||||
do_reset;
|
||||
|
||||
// ================================================================
|
||||
// TEST 1: Impulse Response
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test 1: Impulse Response ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
feed_re[i] = (i == 0) ? 16'sd1000 : 16'sd0;
|
||||
feed_im[i] = 16'sd0;
|
||||
end
|
||||
|
||||
send_config(8'h01); // Forward FFT
|
||||
feed_data;
|
||||
capture_output;
|
||||
|
||||
check(out_idx == N, "Received N output samples");
|
||||
check(got_tlast == 1, "Got tlast on output");
|
||||
|
||||
max_err = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
err = out_re[i] - 1000;
|
||||
if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
err = out_im[i];
|
||||
if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
end
|
||||
$display(" Impulse max error: %0d", max_err);
|
||||
check(max_err < 10, "Impulse: all bins ~= 1000");
|
||||
|
||||
// ================================================================
|
||||
// TEST 2: DC Input
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test 2: DC Input ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
feed_re[i] = 16'sd100;
|
||||
feed_im[i] = 16'sd0;
|
||||
end
|
||||
|
||||
send_config(8'h01);
|
||||
feed_data;
|
||||
capture_output;
|
||||
|
||||
$display(" DC bin[0] = %0d + j%0d (expect ~3200)", out_re[0], out_im[0]);
|
||||
check(out_re[0] >= 3100 && out_re[0] <= 3300, "DC: bin 0 ~= 3200 (5% tol)");
|
||||
|
||||
max_err = 0;
|
||||
for (i = 1; i < N; i = i + 1) begin
|
||||
err = out_re[i]; if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
err = out_im[i]; if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
end
|
||||
$display(" DC max non-DC: %0d", max_err);
|
||||
check(max_err < 25, "DC: non-DC bins ~= 0");
|
||||
|
||||
// ================================================================
|
||||
// TEST 3: Single Tone (bin 4)
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test 3: Single Tone (bin 4) ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
angle = 6.28318530718 * 4.0 * i / 32.0;
|
||||
feed_re[i] = $rtoi($cos(angle) * 1000.0);
|
||||
feed_im[i] = 16'sd0;
|
||||
end
|
||||
|
||||
send_config(8'h01);
|
||||
feed_data;
|
||||
capture_output;
|
||||
|
||||
max_mag = 0;
|
||||
max_mag_bin = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
mag = out_re[i] * out_re[i] + out_im[i] * out_im[i];
|
||||
if (mag > max_mag) begin
|
||||
max_mag = mag;
|
||||
max_mag_bin = i;
|
||||
end
|
||||
end
|
||||
$display(" Tone peak bin: %0d (expect 4 or 28)", max_mag_bin);
|
||||
check(max_mag_bin == 4 || max_mag_bin == 28, "Tone: peak at bin 4 or 28");
|
||||
|
||||
// ================================================================
|
||||
// TEST 4: Back-to-back transforms
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test 4: Back-to-Back Transforms ---");
|
||||
|
||||
// First: impulse
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
feed_re[i] = (i == 0) ? 16'sd500 : 16'sd0;
|
||||
feed_im[i] = 16'sd0;
|
||||
end
|
||||
send_config(8'h01);
|
||||
feed_data;
|
||||
capture_output;
|
||||
check(out_idx == N, "Back-to-back 1st: got N outputs");
|
||||
|
||||
// Second: DC immediately after
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
feed_re[i] = 16'sd50;
|
||||
feed_im[i] = 16'sd0;
|
||||
end
|
||||
send_config(8'h01);
|
||||
feed_data;
|
||||
capture_output;
|
||||
check(out_idx == N, "Back-to-back 2nd: got N outputs");
|
||||
$display(" 2nd transform bin[0] = %0d (expect ~1600)", out_re[0]);
|
||||
check(out_re[0] >= 1500 && out_re[0] <= 1700, "Back-to-back 2nd: bin 0 ~= 1600");
|
||||
|
||||
// ================================================================
|
||||
// TEST 5: Zero input
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("--- Test 5: Zero Input ---");
|
||||
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
feed_re[i] = 16'sd0;
|
||||
feed_im[i] = 16'sd0;
|
||||
end
|
||||
send_config(8'h01);
|
||||
feed_data;
|
||||
capture_output;
|
||||
|
||||
max_err = 0;
|
||||
for (i = 0; i < N; i = i + 1) begin
|
||||
err = out_re[i]; if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
err = out_im[i]; if (err < 0) err = -err;
|
||||
if (err > max_err) max_err = err;
|
||||
end
|
||||
check(max_err == 0, "Zero input: all outputs = 0");
|
||||
|
||||
// ================================================================
|
||||
// SUMMARY
|
||||
// ================================================================
|
||||
$display("");
|
||||
$display("============================================================");
|
||||
$display(" RESULTS: %0d/%0d passed", pass_count, pass_count + fail_count);
|
||||
if (fail_count == 0)
|
||||
$display(" ALL TESTS PASSED");
|
||||
else
|
||||
$display(" SOME TESTS FAILED");
|
||||
$display("============================================================");
|
||||
|
||||
$finish;
|
||||
end
|
||||
|
||||
endmodule
|
||||
+240
-33
@@ -1,18 +1,15 @@
|
||||
`timescale 1ns / 1ps
|
||||
// ============================================================================
|
||||
// xfft_32.v — Synthesis stub for Xilinx 32-point FFT IP core
|
||||
// xfft_32.v — 32-point FFT with AXI-Stream interface
|
||||
// ============================================================================
|
||||
// This is a PLACEHOLDER module that provides the port interface expected by
|
||||
// doppler_processor.v. It does NOT perform an actual FFT — it simply passes
|
||||
// input data through with a one-cycle latency and generates proper AXI-Stream
|
||||
// handshake signals.
|
||||
//
|
||||
// For real hardware, replace this stub with either:
|
||||
// (a) A Xilinx FFT IP core generated via Vivado IP Catalog, or
|
||||
// (b) A custom synthesizable radix-2 DIT 32-point FFT in Verilog.
|
||||
// Wraps the synthesizable fft_engine (radix-2 DIT) with the AXI-Stream port
|
||||
// interface expected by doppler_processor.v.
|
||||
//
|
||||
// Port interface matches the Xilinx LogiCORE IP Fast Fourier Transform
|
||||
// (AXI-Stream variant) as instantiated in doppler_processor.v.
|
||||
//
|
||||
// Data format: {Q[15:0], I[15:0]} packed 32-bit.
|
||||
// Config tdata[0]: 1 = forward FFT, 0 = inverse FFT.
|
||||
// ============================================================================
|
||||
|
||||
module xfft_32 (
|
||||
@@ -36,36 +33,246 @@ module xfft_32 (
|
||||
input wire m_axis_data_tready
|
||||
);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Synthesis stub: pass-through with one-cycle latency
|
||||
// ----------------------------------------------------------------------------
|
||||
// This gives Vivado a real module to synthesize so it can check port
|
||||
// connectivity, infer timing paths, and estimate utilization. The actual
|
||||
// FFT computation is deferred to IP integration or a custom RTL FFT.
|
||||
// ----------------------------------------------------------------------------
|
||||
// ============================================================================
|
||||
// PARAMETERS
|
||||
// ============================================================================
|
||||
localparam N = 32;
|
||||
localparam LOG2N = 5;
|
||||
|
||||
// Always accept config
|
||||
assign s_axis_config_tready = 1'b1;
|
||||
// ============================================================================
|
||||
// INTERNAL SIGNALS
|
||||
// ============================================================================
|
||||
|
||||
// Pipeline registers for data pass-through
|
||||
reg [31:0] data_reg;
|
||||
reg valid_reg;
|
||||
reg last_reg;
|
||||
// FSM states
|
||||
localparam [2:0] S_IDLE = 3'd0,
|
||||
S_CONFIG = 3'd1, // Latch config (fwd/inv)
|
||||
S_FEED = 3'd2, // Feed input to FFT engine
|
||||
S_WAIT = 3'd3, // Wait for FFT to complete
|
||||
S_OUTPUT = 3'd4; // Stream output
|
||||
|
||||
reg [2:0] state;
|
||||
|
||||
// Configuration
|
||||
reg inverse_reg;
|
||||
|
||||
// Input buffering
|
||||
reg signed [15:0] in_buf_re [0:N-1];
|
||||
reg signed [15:0] in_buf_im [0:N-1];
|
||||
reg [5:0] in_count; // 0..31 for loading, extra bit for overflow check
|
||||
|
||||
// Output buffering
|
||||
reg signed [15:0] out_buf_re [0:N-1];
|
||||
reg signed [15:0] out_buf_im [0:N-1];
|
||||
reg [5:0] out_count;
|
||||
reg [5:0] out_total; // counts how many outputs captured from engine
|
||||
|
||||
// FFT engine interface
|
||||
reg fft_start;
|
||||
reg fft_inverse;
|
||||
reg signed [15:0] fft_din_re, fft_din_im;
|
||||
reg fft_din_valid;
|
||||
wire signed [15:0] fft_dout_re, fft_dout_im;
|
||||
wire fft_dout_valid;
|
||||
wire fft_busy;
|
||||
wire fft_done;
|
||||
|
||||
// Feed counter for streaming into engine
|
||||
reg [5:0] feed_count;
|
||||
|
||||
// ============================================================================
|
||||
// FFT ENGINE INSTANCE
|
||||
// ============================================================================
|
||||
fft_engine #(
|
||||
.N(N),
|
||||
.LOG2N(LOG2N),
|
||||
.DATA_W(16),
|
||||
.INTERNAL_W(32),
|
||||
.TWIDDLE_W(16),
|
||||
.TWIDDLE_FILE("fft_twiddle_32.mem")
|
||||
) fft_core (
|
||||
.clk(aclk),
|
||||
.reset_n(aresetn),
|
||||
.start(fft_start),
|
||||
.inverse(fft_inverse),
|
||||
.din_re(fft_din_re),
|
||||
.din_im(fft_din_im),
|
||||
.din_valid(fft_din_valid),
|
||||
.dout_re(fft_dout_re),
|
||||
.dout_im(fft_dout_im),
|
||||
.dout_valid(fft_dout_valid),
|
||||
.busy(fft_busy),
|
||||
.done(fft_done)
|
||||
);
|
||||
|
||||
// ============================================================================
|
||||
// AXI-STREAM OUTPUTS
|
||||
// ============================================================================
|
||||
|
||||
// Config is accepted when idle
|
||||
assign s_axis_config_tready = (state == S_IDLE);
|
||||
|
||||
// Output data: {Q, I} packed
|
||||
assign m_axis_data_tdata = {out_buf_im[out_count[4:0]], out_buf_re[out_count[4:0]]};
|
||||
assign m_axis_data_tvalid = (state == S_OUTPUT) && (out_count < N);
|
||||
assign m_axis_data_tlast = (state == S_OUTPUT) && (out_count == N - 1);
|
||||
|
||||
// ============================================================================
|
||||
// BUFFER WRITE LOGIC — separate always block, NO async reset
|
||||
// Allows Vivado to infer distributed RAM instead of dissolving into registers.
|
||||
// ============================================================================
|
||||
// Input buffer write enable
|
||||
reg in_buf_we;
|
||||
reg [4:0] in_buf_waddr;
|
||||
reg signed [15:0] in_buf_wdata_re, in_buf_wdata_im;
|
||||
|
||||
// Output buffer write enable
|
||||
reg out_buf_we;
|
||||
reg [4:0] out_buf_waddr;
|
||||
reg signed [15:0] out_buf_wdata_re, out_buf_wdata_im;
|
||||
|
||||
always @(posedge aclk) begin
|
||||
if (!aresetn) begin
|
||||
data_reg <= 32'd0;
|
||||
valid_reg <= 1'b0;
|
||||
last_reg <= 1'b0;
|
||||
end else begin
|
||||
data_reg <= s_axis_data_tdata;
|
||||
valid_reg <= s_axis_data_tvalid;
|
||||
last_reg <= s_axis_data_tlast;
|
||||
if (in_buf_we) begin
|
||||
in_buf_re[in_buf_waddr] <= in_buf_wdata_re;
|
||||
in_buf_im[in_buf_waddr] <= in_buf_wdata_im;
|
||||
end
|
||||
if (out_buf_we) begin
|
||||
out_buf_re[out_buf_waddr] <= out_buf_wdata_re;
|
||||
out_buf_im[out_buf_waddr] <= out_buf_wdata_im;
|
||||
end
|
||||
end
|
||||
|
||||
assign m_axis_data_tdata = data_reg;
|
||||
assign m_axis_data_tvalid = valid_reg;
|
||||
assign m_axis_data_tlast = last_reg;
|
||||
// ============================================================================
|
||||
// MAIN FSM
|
||||
// ============================================================================
|
||||
always @(posedge aclk or negedge aresetn) begin
|
||||
if (!aresetn) begin
|
||||
state <= S_IDLE;
|
||||
inverse_reg <= 1'b0;
|
||||
in_count <= 0;
|
||||
out_count <= 0;
|
||||
out_total <= 0;
|
||||
feed_count <= 0;
|
||||
fft_start <= 1'b0;
|
||||
fft_inverse <= 1'b0;
|
||||
fft_din_re <= 0;
|
||||
fft_din_im <= 0;
|
||||
fft_din_valid <= 1'b0;
|
||||
in_buf_we <= 1'b0;
|
||||
in_buf_waddr <= 0;
|
||||
in_buf_wdata_re <= 0;
|
||||
in_buf_wdata_im <= 0;
|
||||
out_buf_we <= 1'b0;
|
||||
out_buf_waddr <= 0;
|
||||
out_buf_wdata_re <= 0;
|
||||
out_buf_wdata_im <= 0;
|
||||
end else begin
|
||||
// Defaults
|
||||
fft_start <= 1'b0;
|
||||
fft_din_valid <= 1'b0;
|
||||
in_buf_we <= 1'b0;
|
||||
out_buf_we <= 1'b0;
|
||||
|
||||
case (state)
|
||||
|
||||
// ================================================================
|
||||
S_IDLE: begin
|
||||
in_count <= 0;
|
||||
if (s_axis_config_tvalid) begin
|
||||
// Config tdata[0]: 1=forward, 0=inverse
|
||||
// fft_engine: inverse=0 means forward, inverse=1 means inverse
|
||||
inverse_reg <= ~s_axis_config_tdata[0];
|
||||
state <= S_FEED;
|
||||
in_count <= 0;
|
||||
feed_count <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// S_FEED: Buffer all N inputs first, then start engine.
|
||||
// ================================================================
|
||||
S_FEED: begin
|
||||
if (in_count < N) begin
|
||||
// Still accepting input data
|
||||
if (s_axis_data_tvalid) begin
|
||||
in_buf_we <= 1'b1;
|
||||
in_buf_waddr <= in_count[4:0];
|
||||
in_buf_wdata_re <= s_axis_data_tdata[15:0];
|
||||
in_buf_wdata_im <= s_axis_data_tdata[31:16];
|
||||
in_count <= in_count + 1;
|
||||
end
|
||||
end else if (feed_count == 0) begin
|
||||
// All N inputs buffered, start the FFT engine
|
||||
fft_start <= 1'b1;
|
||||
fft_inverse <= inverse_reg;
|
||||
feed_count <= 0;
|
||||
state <= S_WAIT;
|
||||
out_total <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// S_WAIT: Feed buffered data to engine, then wait for output
|
||||
// ================================================================
|
||||
S_WAIT: begin
|
||||
if (feed_count < N) begin
|
||||
fft_din_re <= in_buf_re[feed_count[4:0]];
|
||||
fft_din_im <= in_buf_im[feed_count[4:0]];
|
||||
fft_din_valid <= 1'b1;
|
||||
feed_count <= feed_count + 1;
|
||||
end
|
||||
|
||||
// Capture engine outputs
|
||||
if (fft_dout_valid && out_total < N) begin
|
||||
out_buf_we <= 1'b1;
|
||||
out_buf_waddr <= out_total[4:0];
|
||||
out_buf_wdata_re <= fft_dout_re;
|
||||
out_buf_wdata_im <= fft_dout_im;
|
||||
out_total <= out_total + 1;
|
||||
end
|
||||
|
||||
// Engine done
|
||||
if (fft_done) begin
|
||||
state <= S_OUTPUT;
|
||||
out_count <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
// ================================================================
|
||||
// S_OUTPUT: Stream buffered results via AXI-Stream master
|
||||
// ================================================================
|
||||
S_OUTPUT: begin
|
||||
if (m_axis_data_tready || !m_axis_data_tvalid) begin
|
||||
if (out_count < N) begin
|
||||
// m_axis_data_tdata driven combinationally from out_buf
|
||||
if (m_axis_data_tready) begin
|
||||
out_count <= out_count + 1;
|
||||
end
|
||||
end
|
||||
if (out_count >= N - 1 && m_axis_data_tready) begin
|
||||
state <= S_IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
default: state <= S_IDLE;
|
||||
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// ============================================================================
|
||||
// MEMORY INIT (simulation only)
|
||||
// ============================================================================
|
||||
`ifdef SIMULATION
|
||||
integer init_k;
|
||||
initial begin
|
||||
for (init_k = 0; init_k < N; init_k = init_k + 1) begin
|
||||
in_buf_re[init_k] = 0;
|
||||
in_buf_im[init_k] = 0;
|
||||
out_buf_re[init_k] = 0;
|
||||
out_buf_im[init_k] = 0;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
Reference in New Issue
Block a user