From 692b6a3bfab92df58b12aa2a423d43f1660d24c5 Mon Sep 17 00:00:00 2001 From: Jason <83615043+JJassonn69@users.noreply.github.com> Date: Mon, 16 Mar 2026 10:25:07 +0200 Subject: [PATCH] Replace FFT stubs with synthesizable radix-2 DIT engine, fix BRAM inference Implement iterative single-butterfly FFT engine (fft_engine.v) supporting 1024-pt and 32-pt transforms with quarter-wave twiddle ROM, XPM_MEMORY_TDPRAM for guaranteed BRAM mapping in Vivado, and behavioral model for simulation. Add xfft_32.v AXI-Stream wrapper for doppler_processor integration and dual-branch matched_filter_processing_chain.v (behavioral + synthesis paths). Fix placement failure caused by 68K+ registers from dissolved memory arrays: - doppler_processor.v: extract mem writes to sync-only always block for BRAM - xfft_32.v: extract buffer writes to sync-only always block for LUTRAM Post-implementation: 37K regs (29%), 23K LUTs (37%), 10 BRAM (7%), fully routed. All testbenches pass: fft_engine 12/12, xfft_32 10/10, mf_chain 27/27. --- 9_Firmware/9_2_FPGA/doppler_processor.v | 327 ++++---- 9_Firmware/9_2_FPGA/fft_engine.v | 606 +++++++++++++++ 9_Firmware/9_2_FPGA/fft_twiddle_1024.mem | 259 +++++++ 9_Firmware/9_2_FPGA/fft_twiddle_32.mem | 11 + .../matched_filter_processing_chain.v | 718 +++++++++++++++++- 9_Firmware/9_2_FPGA/tb/tb_fft_engine.v | 526 +++++++++++++ 9_Firmware/9_2_FPGA/tb/tb_mf_chain_synth.v | 543 +++++++++++++ 9_Firmware/9_2_FPGA/tb/tb_xfft_32.v | 355 +++++++++ 9_Firmware/9_2_FPGA/xfft_32.v | 273 ++++++- 9 files changed, 3428 insertions(+), 190 deletions(-) create mode 100644 9_Firmware/9_2_FPGA/fft_engine.v create mode 100644 9_Firmware/9_2_FPGA/fft_twiddle_1024.mem create mode 100644 9_Firmware/9_2_FPGA/fft_twiddle_32.mem create mode 100644 9_Firmware/9_2_FPGA/tb/tb_fft_engine.v create mode 100644 9_Firmware/9_2_FPGA/tb/tb_mf_chain_synth.v create mode 100644 9_Firmware/9_2_FPGA/tb/tb_xfft_32.v diff --git a/9_Firmware/9_2_FPGA/doppler_processor.v b/9_Firmware/9_2_FPGA/doppler_processor.v index a1f2e18..d7b1499 100644 --- a/9_Firmware/9_2_FPGA/doppler_processor.v +++ b/9_Firmware/9_2_FPGA/doppler_processor.v @@ -124,157 +124,188 @@ always @(posedge clk or negedge reset_n) begin end wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1; -// ============================================== -// Main State Machine - FIXED -// ============================================== -reg [5:0] fft_sample_counter; -reg [9:0] processing_timeout; - -always @(posedge clk or negedge reset_n) begin - if (!reset_n) begin - state <= S_IDLE; - write_range_bin <= 0; - write_chirp_index <= 0; - read_range_bin <= 0; - read_doppler_index <= 0; - frame_buffer_full <= 0; - doppler_valid <= 0; - fft_start <= 0; - fft_input_valid <= 0; - fft_input_last <= 0; - fft_sample_counter <= 0; - processing_timeout <= 0; - status <= 0; - chirps_received <= 0; - chirp_state <= 0; - end else begin - doppler_valid <= 0; - fft_input_valid <= 0; - fft_input_last <= 0; - - if (processing_timeout > 0) begin - processing_timeout <= processing_timeout - 1; - end - - case (state) - S_IDLE: begin - if (frame_start_pulse) begin - // Start new frame - write_chirp_index <= 0; - write_range_bin <= 0; - frame_buffer_full <= 0; - chirps_received <= 0; - //chirp_state <= 1; // Start accumulating - end - - if (data_valid && !frame_buffer_full) begin +// ============================================== +// Main State Machine - FIXED +// ============================================== +reg [5:0] fft_sample_counter; +reg [9:0] processing_timeout; + +// Memory write enable and data signals (extracted for BRAM inference) +reg mem_we; +reg [10:0] mem_waddr_r; +reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q; + +// Memory read data (registered for BRAM read latency) +reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q; + +// ---------------------------------------------------------- +// Separate always block for memory writes — NO async reset +// in sensitivity list, so Vivado can infer Block RAM. +// ---------------------------------------------------------- +always @(posedge clk) begin + if (mem_we) begin + doppler_i_mem[mem_waddr_r] <= mem_wdata_i; + doppler_q_mem[mem_waddr_r] <= mem_wdata_q; + end + // Registered read — address driven by mem_read_addr from FSM + mem_rdata_i <= doppler_i_mem[mem_read_addr]; + mem_rdata_q <= doppler_q_mem[mem_read_addr]; +end + +// ---------------------------------------------------------- +// Main FSM — async reset for control registers only. +// Memory arrays are NOT touched here. +// ---------------------------------------------------------- +always @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + state <= S_IDLE; + write_range_bin <= 0; + write_chirp_index <= 0; + read_range_bin <= 0; + read_doppler_index <= 0; + frame_buffer_full <= 0; + doppler_valid <= 0; + fft_start <= 0; + fft_input_valid <= 0; + fft_input_last <= 0; + fft_sample_counter <= 0; + processing_timeout <= 0; + status <= 0; + chirps_received <= 0; + chirp_state <= 0; + mem_we <= 0; + mem_waddr_r <= 0; + mem_wdata_i <= 0; + mem_wdata_q <= 0; + mult_i <= 0; + mult_q <= 0; + fft_input_i <= 0; + fft_input_q <= 0; + doppler_output <= 0; + doppler_bin <= 0; + end else begin + doppler_valid <= 0; + fft_input_valid <= 0; + fft_input_last <= 0; + mem_we <= 0; + + if (processing_timeout > 0) begin + processing_timeout <= processing_timeout - 1; + end + + case (state) + S_IDLE: begin + if (frame_start_pulse) begin + // Start new frame + write_chirp_index <= 0; + write_range_bin <= 0; + frame_buffer_full <= 0; + chirps_received <= 0; + end + + if (data_valid && !frame_buffer_full) begin state <= S_ACCUMULATE; - write_range_bin <= 0; - end - end - - S_ACCUMULATE: begin - if (data_valid) begin - // Store with proper addressing - doppler_i_mem[mem_write_addr] <= range_data[15:0]; - doppler_q_mem[mem_write_addr] <= range_data[31:16]; - - // Debug output to see what's being written - // $display("Time=%t: Write addr=%d (chirp=%d, range=%d), Data=%h", - // $time, mem_write_addr, write_chirp_index, write_range_bin, range_data); - - // Increment range bin - if (write_range_bin < RANGE_BINS - 1) begin - write_range_bin <= write_range_bin + 1; - end else begin - // Completed one chirp - write_range_bin <= 0; - write_chirp_index <= write_chirp_index + 1; - chirps_received <= chirps_received + 1; - - // Check if frame is complete - if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin - frame_buffer_full <= 1; - chirp_state <= 0; // Stop accumulating - // Could automatically start processing here: - state <= S_LOAD_FFT; - read_range_bin <= 0; - read_doppler_index <= 0; - fft_sample_counter <= 0; - fft_start <= 1; - end - end - end - end - - // [Rest of S_LOAD_FFT, S_FFT_WAIT, S_OUTPUT states remain similar] - // But with fixed addressing in S_LOAD_FFT: - S_LOAD_FFT: begin - fft_start <= 0; - - if (fft_sample_counter < DOPPLER_FFT_SIZE) begin - // Use correct addressing for reading - mult_i <= $signed(doppler_i_mem[mem_read_addr]) * - $signed(window_coeff[read_doppler_index]); - mult_q <= $signed(doppler_q_mem[mem_read_addr]) * - $signed(window_coeff[read_doppler_index]); + write_range_bin <= 0; + end + end + + S_ACCUMULATE: begin + if (data_valid) begin + // Drive memory write signals (actual write in separate block) + mem_we <= 1; + mem_waddr_r <= mem_write_addr; + mem_wdata_i <= range_data[15:0]; + mem_wdata_q <= range_data[31:16]; - // Round instead of truncate - fft_input_i <= (mult_i + (1 << 14)) >>> 15; // Round to nearest - fft_input_q <= (mult_q + (1 << 14)) >>> 15; + // Increment range bin + if (write_range_bin < RANGE_BINS - 1) begin + write_range_bin <= write_range_bin + 1; + end else begin + // Completed one chirp + write_range_bin <= 0; + write_chirp_index <= write_chirp_index + 1; + chirps_received <= chirps_received + 1; + + // Check if frame is complete + if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin + frame_buffer_full <= 1; + chirp_state <= 0; + state <= S_LOAD_FFT; + read_range_bin <= 0; + read_doppler_index <= 0; + fft_sample_counter <= 0; + fft_start <= 1; + end + end + end + end + + S_LOAD_FFT: begin + fft_start <= 0; + + if (fft_sample_counter < DOPPLER_FFT_SIZE) begin + // Use registered read data (one cycle latency from BRAM) + mult_i <= $signed(mem_rdata_i) * + $signed(window_coeff[read_doppler_index]); + mult_q <= $signed(mem_rdata_q) * + $signed(window_coeff[read_doppler_index]); - fft_input_valid <= 1; - - if (fft_sample_counter == DOPPLER_FFT_SIZE - 1) begin - fft_input_last <= 1; - end - - // Increment chirp index for next sample - read_doppler_index <= read_doppler_index + 1; - fft_sample_counter <= fft_sample_counter + 1; - end else begin - state <= S_FFT_WAIT; - fft_sample_counter <= 0; - processing_timeout <= 100; - end - end - - S_FFT_WAIT: begin - if (fft_output_valid) begin - doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]}; - doppler_bin <= fft_sample_counter; - range_bin <= read_range_bin; - doppler_valid <= 1; - - fft_sample_counter <= fft_sample_counter + 1; - - if (fft_output_last) begin - state <= S_OUTPUT; - fft_sample_counter <= 0; - end - end - - if (processing_timeout == 0) begin - state <= S_OUTPUT; - end - end - - S_OUTPUT: begin - if (read_range_bin < RANGE_BINS - 1) begin - read_range_bin <= read_range_bin + 1; - read_doppler_index <= 0; - state <= S_LOAD_FFT; - fft_start <= 1; - end else begin - state <= S_IDLE; - frame_buffer_full <= 0; - end - end - - endcase - - status <= {state, frame_buffer_full}; - end + // Round instead of truncate + fft_input_i <= (mult_i + (1 << 14)) >>> 15; + fft_input_q <= (mult_q + (1 << 14)) >>> 15; + + fft_input_valid <= 1; + + if (fft_sample_counter == DOPPLER_FFT_SIZE - 1) begin + fft_input_last <= 1; + end + + // Increment chirp index for next sample + read_doppler_index <= read_doppler_index + 1; + fft_sample_counter <= fft_sample_counter + 1; + end else begin + state <= S_FFT_WAIT; + fft_sample_counter <= 0; + processing_timeout <= 100; + end + end + + S_FFT_WAIT: begin + if (fft_output_valid) begin + doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]}; + doppler_bin <= fft_sample_counter; + range_bin <= read_range_bin; + doppler_valid <= 1; + + fft_sample_counter <= fft_sample_counter + 1; + + if (fft_output_last) begin + state <= S_OUTPUT; + fft_sample_counter <= 0; + end + end + + if (processing_timeout == 0) begin + state <= S_OUTPUT; + end + end + + S_OUTPUT: begin + if (read_range_bin < RANGE_BINS - 1) begin + read_range_bin <= read_range_bin + 1; + read_doppler_index <= 0; + state <= S_LOAD_FFT; + fft_start <= 1; + end else begin + state <= S_IDLE; + frame_buffer_full <= 0; + end + end + + endcase + + status <= {state, frame_buffer_full}; + end end // ============================================== diff --git a/9_Firmware/9_2_FPGA/fft_engine.v b/9_Firmware/9_2_FPGA/fft_engine.v new file mode 100644 index 0000000..f02b676 --- /dev/null +++ b/9_Firmware/9_2_FPGA/fft_engine.v @@ -0,0 +1,606 @@ +`timescale 1ns / 1ps + +/** + * fft_engine.v + * + * Synthesizable parameterized radix-2 DIT FFT/IFFT engine. + * Iterative single-butterfly architecture with quarter-wave twiddle ROM. + * + * Architecture: + * - LOAD: Accept N input samples, store bit-reversed in BRAM + * - COMPUTE: LOG2N stages x N/2 butterflies, 2-cycle pipeline: + * BF_READ: Present BRAM addresses, capture twiddle + * BF_CALC: BRAM data valid; butterfly compute + writeback + * - OUTPUT: Stream N results (1/N scaling for IFFT) + * + * Data memory uses xpm_memory_tdpram (Xilinx Parameterized Macros) for + * guaranteed BRAM mapping in synthesis. Under `ifdef SIMULATION, a + * behavioral Verilog-2001 model replaces the XPM so the design compiles + * with Icarus Verilog or any non-Xilinx simulator. + * + * Clock domain: single clock (clk), active-low async reset (reset_n). + */ + +module fft_engine #( + parameter N = 1024, + parameter LOG2N = 10, + parameter DATA_W = 16, + parameter INTERNAL_W = 32, + parameter TWIDDLE_W = 16, + parameter TWIDDLE_FILE = "fft_twiddle_1024.mem" +)( + input wire clk, + input wire reset_n, + + // Control + input wire start, + input wire inverse, + + // Data input + input wire signed [DATA_W-1:0] din_re, + input wire signed [DATA_W-1:0] din_im, + input wire din_valid, + + // Data output + output reg signed [DATA_W-1:0] dout_re, + output reg signed [DATA_W-1:0] dout_im, + output reg dout_valid, + + // Status + output wire busy, + output reg done +); + +// ============================================================================ +// SAFE WIDTH CONSTANTS +// ============================================================================ +localparam [LOG2N:0] FFT_N = N; +localparam [LOG2N:0] FFT_N_HALF = N / 2; +localparam [LOG2N:0] FFT_N_QTR = N / 4; +localparam [LOG2N:0] FFT_N_HALF_M1 = N / 2 - 1; +localparam [LOG2N:0] FFT_N_M1 = N - 1; + +// ============================================================================ +// STATES +// ============================================================================ +localparam [2:0] ST_IDLE = 3'd0, + ST_LOAD = 3'd1, + ST_BF_READ = 3'd2, + ST_BF_CALC = 3'd3, + ST_OUTPUT = 3'd4, + ST_DONE = 3'd5; + +reg [2:0] state; +assign busy = (state != ST_IDLE); + +// ============================================================================ +// DATA MEMORY DECLARATIONS +// ============================================================================ + +// BRAM read data (registered outputs from port blocks) +reg signed [INTERNAL_W-1:0] mem_rdata_a_re, mem_rdata_a_im; +reg signed [INTERNAL_W-1:0] mem_rdata_b_re, mem_rdata_b_im; + +// ============================================================================ +// TWIDDLE ROM +// ============================================================================ +localparam TW_QUARTER = N / 4; +localparam TW_ADDR_W = LOG2N - 2; + +(* rom_style = "block" *) reg signed [TWIDDLE_W-1:0] cos_rom [0:TW_QUARTER-1]; + +initial begin + $readmemh(TWIDDLE_FILE, cos_rom); +end + +// ============================================================================ +// BIT-REVERSE +// ============================================================================ +function [LOG2N-1:0] bit_reverse; + input [LOG2N-1:0] val; + integer b; + begin + bit_reverse = 0; + for (b = 0; b < LOG2N; b = b + 1) + bit_reverse[LOG2N-1-b] = val[b]; + end +endfunction + +// ============================================================================ +// COUNTERS AND PIPELINE REGISTERS +// ============================================================================ +reg [LOG2N-1:0] load_count; +reg [LOG2N:0] out_count; +reg [LOG2N-1:0] bfly_count; +reg [3:0] stage; + +// Registered values (captured in BF_READ, used in BF_CALC) +reg signed [TWIDDLE_W-1:0] rd_tw_cos, rd_tw_sin; +reg [LOG2N-1:0] rd_addr_even, rd_addr_odd; +reg rd_inverse; + +// Half and twiddle stride +reg [LOG2N-1:0] half_reg; +reg [LOG2N-1:0] tw_stride_reg; + +// ============================================================================ +// BUTTERFLY ADDRESS COMPUTATION (combinational) +// ============================================================================ +reg [LOG2N-1:0] bf_addr_even; +reg [LOG2N-1:0] bf_addr_odd; +reg [LOG2N-1:0] bf_tw_idx; + +always @(*) begin : bf_addr_calc + reg [LOG2N-1:0] half_val; + reg [LOG2N-1:0] idx_val; + reg [LOG2N-1:0] grp_val; + + half_val = half_reg; + idx_val = bfly_count & (half_val - 1); + grp_val = (bfly_count - idx_val); + + bf_addr_even = (grp_val << 1) | idx_val; + bf_addr_odd = bf_addr_even + half_val; + + bf_tw_idx = idx_val * tw_stride_reg; +end + +// ============================================================================ +// TWIDDLE LOOKUP (combinational) +// ============================================================================ +reg signed [TWIDDLE_W-1:0] tw_cos_lookup; +reg signed [TWIDDLE_W-1:0] tw_sin_lookup; + +always @(*) begin : tw_lookup + reg [LOG2N-1:0] k; + reg [LOG2N-1:0] rom_idx; + + k = bf_tw_idx; + tw_cos_lookup = 0; + tw_sin_lookup = 0; + + if (k == 0) begin + tw_cos_lookup = cos_rom[0]; + tw_sin_lookup = {TWIDDLE_W{1'b0}}; + end else if (k == FFT_N_QTR[LOG2N-1:0]) begin + tw_cos_lookup = {TWIDDLE_W{1'b0}}; + tw_sin_lookup = cos_rom[0]; + end else if (k < FFT_N_QTR[LOG2N-1:0]) begin + tw_cos_lookup = cos_rom[k[TW_ADDR_W-1:0]]; + rom_idx = FFT_N_QTR[LOG2N-1:0] - k; + tw_sin_lookup = cos_rom[rom_idx[TW_ADDR_W-1:0]]; + end else begin + rom_idx = k - FFT_N_QTR[LOG2N-1:0]; + tw_sin_lookup = cos_rom[rom_idx[TW_ADDR_W-1:0]]; + rom_idx = FFT_N_HALF[LOG2N-1:0] - k; + tw_cos_lookup = -cos_rom[rom_idx[TW_ADDR_W-1:0]]; + end +end + +// ============================================================================ +// SATURATION +// ============================================================================ +function signed [DATA_W-1:0] saturate; + input signed [INTERNAL_W-1:0] val; + reg signed [INTERNAL_W-1:0] max_pos; + reg signed [INTERNAL_W-1:0] max_neg; + begin + max_pos = (1 << (DATA_W - 1)) - 1; + max_neg = -(1 << (DATA_W - 1)); + if (val > max_pos) + saturate = max_pos[DATA_W-1:0]; + else if (val < max_neg) + saturate = max_neg[DATA_W-1:0]; + else + saturate = val[DATA_W-1:0]; + end +endfunction + +// ============================================================================ +// BUTTERFLY COMPUTATION (combinational, for BF_CALC write data) +// ============================================================================ +reg signed [INTERNAL_W-1:0] bf_t_re, bf_t_im; +reg signed [INTERNAL_W-1:0] bf_sum_re, bf_sum_im; +reg signed [INTERNAL_W-1:0] bf_dif_re, bf_dif_im; + +always @(*) begin : bf_compute + if (!rd_inverse) begin + bf_t_re = (mem_rdata_b_re * rd_tw_cos + mem_rdata_b_im * rd_tw_sin) >>> (TWIDDLE_W - 1); + bf_t_im = (mem_rdata_b_im * rd_tw_cos - mem_rdata_b_re * rd_tw_sin) >>> (TWIDDLE_W - 1); + end else begin + bf_t_re = (mem_rdata_b_re * rd_tw_cos - mem_rdata_b_im * rd_tw_sin) >>> (TWIDDLE_W - 1); + bf_t_im = (mem_rdata_b_im * rd_tw_cos + mem_rdata_b_re * rd_tw_sin) >>> (TWIDDLE_W - 1); + end + bf_sum_re = mem_rdata_a_re + bf_t_re; + bf_sum_im = mem_rdata_a_im + bf_t_im; + bf_dif_re = mem_rdata_a_re - bf_t_re; + bf_dif_im = mem_rdata_a_im - bf_t_im; +end + +// ============================================================================ +// BRAM PORT ADDRESS / WE / WDATA — combinational mux (registered signals) +// ============================================================================ +// Drives port A and port B control signals from FSM state. +// These are registered (via NBA) so they are stable at the next posedge +// when the BRAM template blocks sample them. This avoids any NBA race. +// ============================================================================ +reg bram_we_a; +reg [LOG2N-1:0] bram_addr_a; +reg signed [INTERNAL_W-1:0] bram_wdata_a_re; +reg signed [INTERNAL_W-1:0] bram_wdata_a_im; + +reg bram_we_b; +reg [LOG2N-1:0] bram_addr_b; +reg signed [INTERNAL_W-1:0] bram_wdata_b_re; +reg signed [INTERNAL_W-1:0] bram_wdata_b_im; + +always @(*) begin : bram_port_mux + // Port A defaults + bram_we_a = 1'b0; + bram_addr_a = 0; + bram_wdata_a_re = 0; + bram_wdata_a_im = 0; + + // Port B defaults + bram_we_b = 1'b0; + bram_addr_b = 0; + bram_wdata_b_re = 0; + bram_wdata_b_im = 0; + + case (state) + ST_LOAD: begin + bram_we_a = din_valid; + bram_addr_a = bit_reverse(load_count); + bram_wdata_a_re = {{(INTERNAL_W-DATA_W){din_re[DATA_W-1]}}, din_re}; + bram_wdata_a_im = {{(INTERNAL_W-DATA_W){din_im[DATA_W-1]}}, din_im}; + end + ST_BF_READ: begin + bram_addr_a = bf_addr_even; + bram_addr_b = bf_addr_odd; + end + ST_BF_CALC: begin + bram_we_a = 1'b1; + bram_addr_a = rd_addr_even; + bram_wdata_a_re = bf_sum_re; + bram_wdata_a_im = bf_sum_im; + + bram_we_b = 1'b1; + bram_addr_b = rd_addr_odd; + bram_wdata_b_re = bf_dif_re; + bram_wdata_b_im = bf_dif_im; + end + ST_OUTPUT: begin + bram_addr_a = out_count[LOG2N-1:0]; + end + default: begin + // keep defaults + end + endcase +end + +// ============================================================================ +// DATA MEMORY — True Dual-Port BRAM +// ============================================================================ +// For synthesis: xpm_memory_tdpram (Xilinx Parameterized Macros) +// For simulation: behavioral Verilog-2001 model (Icarus-compatible) +// ============================================================================ + +// XPM read-data wires (directly assigned to rdata regs below) +wire [INTERNAL_W-1:0] xpm_douta_re, xpm_doutb_re; +wire [INTERNAL_W-1:0] xpm_douta_im, xpm_doutb_im; + +always @(*) begin + mem_rdata_a_re = $signed(xpm_douta_re); + mem_rdata_a_im = $signed(xpm_douta_im); + mem_rdata_b_re = $signed(xpm_doutb_re); + mem_rdata_b_im = $signed(xpm_doutb_im); +end + +`ifndef FFT_XPM_BRAM +// ---------------------------------------------------------------------------- +// Default: behavioral TDP model (works with Icarus Verilog -g2001) +// For Vivado synthesis, define FFT_XPM_BRAM to use xpm_memory_tdpram. +// ---------------------------------------------------------------------------- +reg [INTERNAL_W-1:0] sim_mem_re [0:N-1]; +reg [INTERNAL_W-1:0] sim_mem_im [0:N-1]; + +// Port A +reg [INTERNAL_W-1:0] sim_douta_re, sim_douta_im; +always @(posedge clk) begin + if (bram_we_a) begin + sim_mem_re[bram_addr_a] <= bram_wdata_a_re; + sim_mem_im[bram_addr_a] <= bram_wdata_a_im; + end + sim_douta_re <= sim_mem_re[bram_addr_a]; + sim_douta_im <= sim_mem_im[bram_addr_a]; +end +assign xpm_douta_re = sim_douta_re; +assign xpm_douta_im = sim_douta_im; + +// Port B +reg [INTERNAL_W-1:0] sim_doutb_re, sim_doutb_im; +always @(posedge clk) begin + if (bram_we_b) begin + sim_mem_re[bram_addr_b] <= bram_wdata_b_re; + sim_mem_im[bram_addr_b] <= bram_wdata_b_im; + end + sim_doutb_re <= sim_mem_re[bram_addr_b]; + sim_doutb_im <= sim_mem_im[bram_addr_b]; +end +assign xpm_doutb_re = sim_doutb_re; +assign xpm_doutb_im = sim_doutb_im; + +integer init_i; +initial begin + for (init_i = 0; init_i < N; init_i = init_i + 1) begin + sim_mem_re[init_i] = 0; + sim_mem_im[init_i] = 0; + end +end + +`else +// ---------------------------------------------------------------------------- +// Synthesis: xpm_memory_tdpram — guaranteed BRAM mapping +// Enabled when FFT_XPM_BRAM is defined (e.g. in Vivado TCL script). +// ---------------------------------------------------------------------------- +// Note: Vivado auto-finds XPM library; no `include needed. +// Two instances: one for real, one for imaginary. +// WRITE_MODE = "write_first" matches the behavioral TDP template. +// READ_LATENCY = 1 (registered output). +// ---------------------------------------------------------------------------- + +xpm_memory_tdpram #( + .ADDR_WIDTH_A (LOG2N), + .ADDR_WIDTH_B (LOG2N), + .AUTO_SLEEP_TIME (0), + .BYTE_WRITE_WIDTH_A (INTERNAL_W), + .BYTE_WRITE_WIDTH_B (INTERNAL_W), + .CASCADE_HEIGHT (0), + .CLOCKING_MODE ("common_clock"), + .ECC_BIT_RANGE ("7:0"), + .ECC_MODE ("no_ecc"), + .ECC_TYPE ("none"), + .IGNORE_INIT_SYNTH (0), + .MEMORY_INIT_FILE ("none"), + .MEMORY_INIT_PARAM ("0"), + .MEMORY_OPTIMIZATION ("true"), + .MEMORY_PRIMITIVE ("block"), + .MEMORY_SIZE (N * INTERNAL_W), + .MESSAGE_CONTROL (0), + .RAM_DECOMP ("auto"), + .READ_DATA_WIDTH_A (INTERNAL_W), + .READ_DATA_WIDTH_B (INTERNAL_W), + .READ_LATENCY_A (1), + .READ_LATENCY_B (1), + .READ_RESET_VALUE_A ("0"), + .READ_RESET_VALUE_B ("0"), + .RST_MODE_A ("SYNC"), + .RST_MODE_B ("SYNC"), + .SIM_ASSERT_CHK (0), + .USE_EMBEDDED_CONSTRAINT (0), + .USE_MEM_INIT (1), + .USE_MEM_INIT_MMI (0), + .WAKEUP_TIME ("disable_sleep"), + .WRITE_DATA_WIDTH_A (INTERNAL_W), + .WRITE_DATA_WIDTH_B (INTERNAL_W), + .WRITE_MODE_A ("read_first"), + .WRITE_MODE_B ("read_first"), + .WRITE_PROTECT (1) +) u_bram_re ( + .clka (clk), + .clkb (clk), + .rsta (1'b0), + .rstb (1'b0), + .ena (1'b1), + .enb (1'b1), + .regcea (1'b1), + .regceb (1'b1), + .addra (bram_addr_a), + .addrb (bram_addr_b), + .dina (bram_wdata_a_re), + .dinb (bram_wdata_b_re), + .wea (bram_we_a), + .web (bram_we_b), + .douta (xpm_douta_re), + .doutb (xpm_doutb_re), + .injectdbiterra (1'b0), + .injectdbiterrb (1'b0), + .injectsbiterra (1'b0), + .injectsbiterrb (1'b0), + .sbiterra (), + .sbiterrb (), + .dbiterra (), + .dbiterrb (), + .sleep (1'b0) +); + +xpm_memory_tdpram #( + .ADDR_WIDTH_A (LOG2N), + .ADDR_WIDTH_B (LOG2N), + .AUTO_SLEEP_TIME (0), + .BYTE_WRITE_WIDTH_A (INTERNAL_W), + .BYTE_WRITE_WIDTH_B (INTERNAL_W), + .CASCADE_HEIGHT (0), + .CLOCKING_MODE ("common_clock"), + .ECC_BIT_RANGE ("7:0"), + .ECC_MODE ("no_ecc"), + .ECC_TYPE ("none"), + .IGNORE_INIT_SYNTH (0), + .MEMORY_INIT_FILE ("none"), + .MEMORY_INIT_PARAM ("0"), + .MEMORY_OPTIMIZATION ("true"), + .MEMORY_PRIMITIVE ("block"), + .MEMORY_SIZE (N * INTERNAL_W), + .MESSAGE_CONTROL (0), + .RAM_DECOMP ("auto"), + .READ_DATA_WIDTH_A (INTERNAL_W), + .READ_DATA_WIDTH_B (INTERNAL_W), + .READ_LATENCY_A (1), + .READ_LATENCY_B (1), + .READ_RESET_VALUE_A ("0"), + .READ_RESET_VALUE_B ("0"), + .RST_MODE_A ("SYNC"), + .RST_MODE_B ("SYNC"), + .SIM_ASSERT_CHK (0), + .USE_EMBEDDED_CONSTRAINT (0), + .USE_MEM_INIT (1), + .USE_MEM_INIT_MMI (0), + .WAKEUP_TIME ("disable_sleep"), + .WRITE_DATA_WIDTH_A (INTERNAL_W), + .WRITE_DATA_WIDTH_B (INTERNAL_W), + .WRITE_MODE_A ("read_first"), + .WRITE_MODE_B ("read_first"), + .WRITE_PROTECT (1) +) u_bram_im ( + .clka (clk), + .clkb (clk), + .rsta (1'b0), + .rstb (1'b0), + .ena (1'b1), + .enb (1'b1), + .regcea (1'b1), + .regceb (1'b1), + .addra (bram_addr_a), + .addrb (bram_addr_b), + .dina (bram_wdata_a_im), + .dinb (bram_wdata_b_im), + .wea (bram_we_a), + .web (bram_we_b), + .douta (xpm_douta_im), + .doutb (xpm_doutb_im), + .injectdbiterra (1'b0), + .injectdbiterrb (1'b0), + .injectsbiterra (1'b0), + .injectsbiterrb (1'b0), + .sbiterra (), + .sbiterrb (), + .dbiterra (), + .dbiterrb (), + .sleep (1'b0) +); + +`endif + +// ============================================================================ +// OUTPUT PIPELINE +// ============================================================================ +reg out_pipe_valid; +reg out_pipe_inverse; + +always @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + out_pipe_valid <= 1'b0; + out_pipe_inverse <= 1'b0; + end else begin + out_pipe_valid <= (state == ST_OUTPUT) && (out_count <= FFT_N_M1[LOG2N-1:0]); + out_pipe_inverse <= inverse; + end +end + +// ============================================================================ +// MAIN FSM +// ============================================================================ +always @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + state <= ST_IDLE; + load_count <= 0; + out_count <= 0; + bfly_count <= 0; + stage <= 0; + half_reg <= 1; + tw_stride_reg <= FFT_N_HALF[LOG2N-1:0]; + dout_re <= 0; + dout_im <= 0; + dout_valid <= 0; + done <= 0; + rd_tw_cos <= 0; + rd_tw_sin <= 0; + rd_addr_even <= 0; + rd_addr_odd <= 0; + rd_inverse <= 0; + end else begin + dout_valid <= 1'b0; + done <= 1'b0; + + case (state) + + ST_IDLE: begin + if (start) begin + state <= ST_LOAD; + load_count <= 0; + end + end + + ST_LOAD: begin + if (din_valid) begin + if (load_count == FFT_N_M1[LOG2N-1:0]) begin + state <= ST_BF_READ; + stage <= 0; + bfly_count <= 0; + half_reg <= 1; + tw_stride_reg <= FFT_N_HALF[LOG2N-1:0]; + end else begin + load_count <= load_count + 1; + end + end + end + + ST_BF_READ: begin + rd_tw_cos <= tw_cos_lookup; + rd_tw_sin <= tw_sin_lookup; + rd_addr_even <= bf_addr_even; + rd_addr_odd <= bf_addr_odd; + rd_inverse <= inverse; + state <= ST_BF_CALC; + end + + ST_BF_CALC: begin + if (bfly_count == FFT_N_HALF_M1[LOG2N-1:0]) begin + bfly_count <= 0; + if (stage == LOG2N - 1) begin + state <= ST_OUTPUT; + out_count <= 0; + end else begin + stage <= stage + 1; + half_reg <= half_reg << 1; + tw_stride_reg <= tw_stride_reg >> 1; + state <= ST_BF_READ; + end + end else begin + bfly_count <= bfly_count + 1; + state <= ST_BF_READ; + end + end + + ST_OUTPUT: begin + if (out_count <= FFT_N_M1[LOG2N-1:0]) begin + out_count <= out_count + 1; + end + + if (out_pipe_valid) begin + if (out_pipe_inverse) begin + dout_re <= saturate(mem_rdata_a_re >>> LOG2N); + dout_im <= saturate(mem_rdata_a_im >>> LOG2N); + end else begin + dout_re <= saturate(mem_rdata_a_re); + dout_im <= saturate(mem_rdata_a_im); + end + dout_valid <= 1'b1; + end + + if (out_count > FFT_N_M1[LOG2N-1:0] && !out_pipe_valid) begin + state <= ST_DONE; + end + end + + ST_DONE: begin + done <= 1'b1; + state <= ST_IDLE; + end + + default: state <= ST_IDLE; + endcase + end +end + +endmodule diff --git a/9_Firmware/9_2_FPGA/fft_twiddle_1024.mem b/9_Firmware/9_2_FPGA/fft_twiddle_1024.mem new file mode 100644 index 0000000..dc4dd01 --- /dev/null +++ b/9_Firmware/9_2_FPGA/fft_twiddle_1024.mem @@ -0,0 +1,259 @@ +// Quarter-wave cosine ROM for 1024-point FFT +// 256 entries, 16-bit signed Q15 ($readmemh format) +// cos(2*pi*k/1024) for k = 0..255 +7FFF +7FFE +7FFD +7FF9 +7FF5 +7FF0 +7FE9 +7FE1 +7FD8 +7FCD +7FC1 +7FB4 +7FA6 +7F97 +7F86 +7F74 +7F61 +7F4D +7F37 +7F21 +7F09 +7EEF +7ED5 +7EB9 +7E9C +7E7E +7E5F +7E3E +7E1D +7DFA +7DD5 +7DB0 +7D89 +7D62 +7D39 +7D0E +7CE3 +7CB6 +7C88 +7C59 +7C29 +7BF8 +7BC5 +7B91 +7B5C +7B26 +7AEE +7AB6 +7A7C +7A41 +7A05 +79C8 +7989 +794A +7909 +78C7 +7884 +783F +77FA +77B3 +776B +7722 +76D8 +768D +7641 +75F3 +75A5 +7555 +7504 +74B2 +745F +740A +73B5 +735E +7307 +72AE +7254 +71F9 +719D +7140 +70E2 +7083 +7022 +6FC1 +6F5E +6EFB +6E96 +6E30 +6DC9 +6D61 +6CF8 +6C8E +6C23 +6BB7 +6B4A +6ADC +6A6D +69FD +698B +6919 +68A6 +6832 +67BC +6746 +66CF +6656 +65DD +6563 +64E8 +646C +63EE +6370 +62F1 +6271 +61F0 +616E +60EB +6068 +5FE3 +5F5D +5ED7 +5E4F +5DC7 +5D3E +5CB3 +5C28 +5B9C +5B0F +5A82 +59F3 +5964 +58D3 +5842 +57B0 +571D +568A +55F5 +5560 +54C9 +5432 +539B +5302 +5268 +51CE +5133 +5097 +4FFB +4F5D +4EBF +4E20 +4D81 +4CE0 +4C3F +4B9D +4AFB +4A58 +49B4 +490F +4869 +47C3 +471C +4675 +45CD +4524 +447A +43D0 +4325 +427A +41CE +4121 +4073 +3FC5 +3F17 +3E68 +3DB8 +3D07 +3C56 +3BA5 +3AF2 +3A40 +398C +38D9 +3824 +376F +36BA +3604 +354D +3496 +33DF +3326 +326E +31B5 +30FB +3041 +2F87 +2ECC +2E11 +2D55 +2C99 +2BDC +2B1F +2A61 +29A3 +28E5 +2826 +2767 +26A8 +25E8 +2528 +2467 +23A6 +22E5 +2223 +2161 +209F +1FDD +1F1A +1E57 +1D93 +1CCF +1C0B +1B47 +1A82 +19BE +18F9 +1833 +176E +16A8 +15E2 +151C +1455 +138F +12C8 +1201 +113A +1072 +0FAB +0EE3 +0E1C +0D54 +0C8C +0BC4 +0AFB +0A33 +096A +08A2 +07D9 +0711 +0648 +057F +04B6 +03ED +0324 +025B +0192 +00C9 diff --git a/9_Firmware/9_2_FPGA/fft_twiddle_32.mem b/9_Firmware/9_2_FPGA/fft_twiddle_32.mem new file mode 100644 index 0000000..5e49ff4 --- /dev/null +++ b/9_Firmware/9_2_FPGA/fft_twiddle_32.mem @@ -0,0 +1,11 @@ +// Quarter-wave cosine ROM for 32-point FFT +// 8 entries, 16-bit signed Q15 ($readmemh format) +// cos(2*pi*k/32) for k = 0..7 +7FFF +7D89 +7641 +6A6D +5A82 +471C +30FB +18F9 diff --git a/9_Firmware/9_2_FPGA/matched_filter_processing_chain.v b/9_Firmware/9_2_FPGA/matched_filter_processing_chain.v index acbf58c..287a72e 100644 --- a/9_Firmware/9_2_FPGA/matched_filter_processing_chain.v +++ b/9_Firmware/9_2_FPGA/matched_filter_processing_chain.v @@ -529,18 +529,718 @@ end `else // ============================================================================ -// SYNTHESIS STUB +// SYNTHESIS IMPLEMENTATION — Radix-2 DIT FFT via fft_engine // ============================================================================ -// The behavioral FFT implementation above uses $cos/$sin/$rtoi (non- -// synthesizable). For real hardware, replace this stub with Xilinx xfft -// IP cores or a synthesizable pipelined FFT. The stub ties outputs to -// safe defaults so the rest of the design can be synthesized and verified. +// Uses a single fft_engine instance (1024-pt) reused 3 times: +// 1. Forward FFT of signal +// 2. Forward FFT of reference +// 3. Inverse FFT of conjugate product +// Conjugate multiply done via frequency_matched_filter (4-stage pipeline). +// +// Buffer scheme (BRAM-inferrable): +// sig_buf[1024]: ADC input -> signal FFT output +// ref_buf[1024]: Reference input -> reference FFT output +// prod_buf[1024]: Conjugate multiply output -> IFFT output +// +// Memory access is INSIDE always @(posedge clk) blocks (no async reset) +// using local blocking variables. This eliminates NBA race conditions +// and enables Vivado BRAM inference (same pattern as fft_engine.v). +// +// BRAM read latency (1 cycle) is handled by "primed" flags: +// feed_primed — for FFT feed operations +// mult_primed — for conjugate multiply feed +// out_primed — for output streaming // ============================================================================ -assign range_profile_i = 16'd0; -assign range_profile_q = 16'd0; -assign range_profile_valid = 1'b0; -assign chain_state = 4'd0; // permanently IDLE +localparam FFT_SIZE = 1024; +localparam ADDR_BITS = 10; + +// State encoding +localparam [3:0] ST_IDLE = 4'd0, + ST_COLLECT = 4'd1, // Collect 1024 ADC + ref samples + ST_SIG_FFT = 4'd2, // Forward FFT of signal + ST_SIG_CAP = 4'd3, // Capture signal FFT output + ST_REF_FFT = 4'd4, // Forward FFT of reference + ST_REF_CAP = 4'd5, // Capture reference FFT output + ST_MULTIPLY = 4'd6, // Conjugate multiply (pipelined) + ST_INV_FFT = 4'd7, // Inverse FFT of product + ST_INV_CAP = 4'd8, // Capture IFFT output + ST_OUTPUT = 4'd9, // Stream 1024 results + ST_DONE = 4'd10; + +reg [3:0] state; + +// ============================================================================ +// DATA BUFFERS (block RAM) — declared here, accessed in BRAM port blocks +// ============================================================================ +(* ram_style = "block" *) reg signed [15:0] sig_buf_i [0:FFT_SIZE-1]; +(* ram_style = "block" *) reg signed [15:0] sig_buf_q [0:FFT_SIZE-1]; +(* ram_style = "block" *) reg signed [15:0] ref_buf_i [0:FFT_SIZE-1]; +(* ram_style = "block" *) reg signed [15:0] ref_buf_q [0:FFT_SIZE-1]; +(* ram_style = "block" *) reg signed [15:0] prod_buf_i [0:FFT_SIZE-1]; +(* ram_style = "block" *) reg signed [15:0] prod_buf_q [0:FFT_SIZE-1]; + +// BRAM read data (registered outputs from port blocks) +reg signed [15:0] sig_rdata_i, sig_rdata_q; +reg signed [15:0] ref_rdata_i, ref_rdata_q; +reg signed [15:0] prod_rdata_i, prod_rdata_q; + +// ============================================================================ +// COUNTERS +// ============================================================================ +reg [ADDR_BITS:0] collect_count; // 0..1024 for sample collection +reg [ADDR_BITS:0] feed_count; // 0..1024 for feeding FFT engine +reg [ADDR_BITS:0] cap_count; // 0..1024 for capturing FFT output +reg [ADDR_BITS:0] mult_count; // 0..1024 for multiply feeding +reg [ADDR_BITS:0] out_count; // 0..1024 for output streaming + +// BRAM read latency pipeline flags +reg feed_primed; // 1 = BRAM rdata valid for feed operations +reg mult_primed; // 1 = BRAM rdata valid for multiply reads +reg out_primed; // 1 = BRAM rdata valid for output reads + +// ============================================================================ +// FFT ENGINE INTERFACE (single instance, reused 3 times) +// ============================================================================ +reg fft_start; +reg fft_inverse; +reg signed [15:0] fft_din_re, fft_din_im; +reg fft_din_valid; +wire signed [15:0] fft_dout_re, fft_dout_im; +wire fft_dout_valid; +wire fft_busy; +wire fft_done; + +fft_engine #( + .N(FFT_SIZE), + .LOG2N(ADDR_BITS), + .DATA_W(16), + .INTERNAL_W(32), + .TWIDDLE_W(16), + .TWIDDLE_FILE("fft_twiddle_1024.mem") +) fft_inst ( + .clk(clk), + .reset_n(reset_n), + .start(fft_start), + .inverse(fft_inverse), + .din_re(fft_din_re), + .din_im(fft_din_im), + .din_valid(fft_din_valid), + .dout_re(fft_dout_re), + .dout_im(fft_dout_im), + .dout_valid(fft_dout_valid), + .busy(fft_busy), + .done(fft_done) +); + +// ============================================================================ +// CONJUGATE MULTIPLY INTERFACE (frequency_matched_filter) +// ============================================================================ +reg signed [15:0] mf_sig_re, mf_sig_im; +reg signed [15:0] mf_ref_re, mf_ref_im; +reg mf_valid_in; +wire signed [15:0] mf_out_re, mf_out_im; +wire mf_valid_out; + +frequency_matched_filter mf_inst ( + .clk(clk), + .reset_n(reset_n), + .fft_real_in(mf_sig_re), + .fft_imag_in(mf_sig_im), + .fft_valid_in(mf_valid_in), + .ref_chirp_real(mf_ref_re), + .ref_chirp_imag(mf_ref_im), + .filtered_real(mf_out_re), + .filtered_imag(mf_out_im), + .filtered_valid(mf_valid_out), + .state() +); + +// Pipeline flush counter for matched filter (4-stage pipeline) +reg [2:0] mf_flush_count; + +// ============================================================================ +// OUTPUT REGISTERS +// ============================================================================ +reg out_valid_reg; +reg signed [15:0] out_i_reg, out_q_reg; + +// ============================================================================ +// BRAM PORT: sig_buf — all address/we/wdata computed inline (race-free) +// ============================================================================ +// Handles: IDLE/COLLECT writes, SIG_FFT/SIG_CAP capture writes, +// SIG_FFT feed reads, MULTIPLY signal reads +// No async reset in sensitivity list — enables Vivado BRAM inference. +// ============================================================================ +always @(posedge clk) begin : sig_bram_port + reg we; + reg [ADDR_BITS-1:0] addr; + reg signed [15:0] wdata_i, wdata_q; + + // Defaults + we = 1'b0; + addr = 0; + wdata_i = 0; + wdata_q = 0; + + case (state) + ST_IDLE: begin + if (adc_valid) begin + we = 1'b1; + addr = 0; + wdata_i = $signed(adc_data_i); + wdata_q = $signed(adc_data_q); + end + end + ST_COLLECT: begin + if (adc_valid && collect_count < FFT_SIZE) begin + we = 1'b1; + addr = collect_count[ADDR_BITS-1:0]; + wdata_i = $signed(adc_data_i); + wdata_q = $signed(adc_data_q); + end + end + ST_SIG_FFT: begin + if (feed_count < FFT_SIZE && !feed_primed) begin + // Pre-read cycle: present address, no write + addr = feed_count[ADDR_BITS-1:0]; + end else if (feed_count <= FFT_SIZE && feed_primed) begin + // Primed: read address for NEXT sample (or hold last) + if (feed_count < FFT_SIZE) + addr = feed_count[ADDR_BITS-1:0]; + else + addr = 0; // don't care, past last sample + end + // Capture FFT output (write) — happens after feeding is done + if (fft_dout_valid && cap_count < FFT_SIZE) begin + we = 1'b1; + addr = cap_count[ADDR_BITS-1:0]; + wdata_i = fft_dout_re; + wdata_q = fft_dout_im; + end + end + ST_SIG_CAP: begin + if (fft_dout_valid && cap_count < FFT_SIZE) begin + we = 1'b1; + addr = cap_count[ADDR_BITS-1:0]; + wdata_i = fft_dout_re; + wdata_q = fft_dout_im; + end + end + ST_MULTIPLY: begin + // Read signal FFT results for conjugate multiply + if (mult_count < FFT_SIZE && !mult_primed) begin + addr = mult_count[ADDR_BITS-1:0]; + end else if (mult_count <= FFT_SIZE && mult_primed) begin + if (mult_count < FFT_SIZE) + addr = mult_count[ADDR_BITS-1:0]; + else + addr = 0; + end + end + default: begin + // keep defaults + end + endcase + + // BRAM write + if (we) begin + sig_buf_i[addr] <= wdata_i; + sig_buf_q[addr] <= wdata_q; + end + // BRAM read (1-cycle latency) + sig_rdata_i <= sig_buf_i[addr]; + sig_rdata_q <= sig_buf_q[addr]; +end + +// ============================================================================ +// BRAM PORT: ref_buf — all address/we/wdata computed inline (race-free) +// ============================================================================ +// Handles: IDLE/COLLECT writes, REF_FFT/REF_CAP capture writes, +// REF_FFT feed reads, MULTIPLY reference reads +// ============================================================================ +always @(posedge clk) begin : ref_bram_port + reg we; + reg [ADDR_BITS-1:0] addr; + reg signed [15:0] wdata_i, wdata_q; + + // Defaults + we = 1'b0; + addr = 0; + wdata_i = 0; + wdata_q = 0; + + case (state) + ST_IDLE: begin + if (adc_valid) begin + we = 1'b1; + addr = 0; + wdata_i = $signed(long_chirp_real); + wdata_q = $signed(long_chirp_imag); + end + end + ST_COLLECT: begin + if (adc_valid && collect_count < FFT_SIZE) begin + we = 1'b1; + addr = collect_count[ADDR_BITS-1:0]; + wdata_i = $signed(long_chirp_real); + wdata_q = $signed(long_chirp_imag); + end + end + ST_REF_FFT: begin + if (feed_count < FFT_SIZE && !feed_primed) begin + addr = feed_count[ADDR_BITS-1:0]; + end else if (feed_count <= FFT_SIZE && feed_primed) begin + if (feed_count < FFT_SIZE) + addr = feed_count[ADDR_BITS-1:0]; + else + addr = 0; + end + // Capture FFT output + if (fft_dout_valid && cap_count < FFT_SIZE) begin + we = 1'b1; + addr = cap_count[ADDR_BITS-1:0]; + wdata_i = fft_dout_re; + wdata_q = fft_dout_im; + end + end + ST_REF_CAP: begin + if (fft_dout_valid && cap_count < FFT_SIZE) begin + we = 1'b1; + addr = cap_count[ADDR_BITS-1:0]; + wdata_i = fft_dout_re; + wdata_q = fft_dout_im; + end + end + ST_MULTIPLY: begin + // Read reference FFT results for conjugate multiply + if (mult_count < FFT_SIZE && !mult_primed) begin + addr = mult_count[ADDR_BITS-1:0]; + end else if (mult_count <= FFT_SIZE && mult_primed) begin + if (mult_count < FFT_SIZE) + addr = mult_count[ADDR_BITS-1:0]; + else + addr = 0; + end + end + default: begin + // keep defaults + end + endcase + + // BRAM write + if (we) begin + ref_buf_i[addr] <= wdata_i; + ref_buf_q[addr] <= wdata_q; + end + // BRAM read (1-cycle latency) + ref_rdata_i <= ref_buf_i[addr]; + ref_rdata_q <= ref_buf_q[addr]; +end + +// ============================================================================ +// BRAM PORT: prod_buf — all address/we/wdata computed inline (race-free) +// ============================================================================ +// Handles: MULTIPLY capture writes, INV_FFT/INV_CAP capture writes, +// INV_FFT feed reads, OUTPUT reads +// ============================================================================ +always @(posedge clk) begin : prod_bram_port + reg we; + reg [ADDR_BITS-1:0] addr; + reg signed [15:0] wdata_i, wdata_q; + + // Defaults + we = 1'b0; + addr = 0; + wdata_i = 0; + wdata_q = 0; + + case (state) + ST_MULTIPLY: begin + // Capture conjugate multiply output + if (mf_valid_out && cap_count < FFT_SIZE) begin + we = 1'b1; + addr = cap_count[ADDR_BITS-1:0]; + wdata_i = mf_out_re; + wdata_q = mf_out_im; + end + end + ST_INV_FFT: begin + if (feed_count < FFT_SIZE && !feed_primed) begin + addr = feed_count[ADDR_BITS-1:0]; + end else if (feed_count <= FFT_SIZE && feed_primed) begin + if (feed_count < FFT_SIZE) + addr = feed_count[ADDR_BITS-1:0]; + else + addr = 0; + end + // Capture IFFT output + if (fft_dout_valid && cap_count < FFT_SIZE) begin + we = 1'b1; + addr = cap_count[ADDR_BITS-1:0]; + wdata_i = fft_dout_re; + wdata_q = fft_dout_im; + end + end + ST_INV_CAP: begin + if (fft_dout_valid && cap_count < FFT_SIZE) begin + we = 1'b1; + addr = cap_count[ADDR_BITS-1:0]; + wdata_i = fft_dout_re; + wdata_q = fft_dout_im; + end + end + ST_OUTPUT: begin + // Read product buffer for output streaming + if (out_count < FFT_SIZE && !out_primed) begin + addr = out_count[ADDR_BITS-1:0]; + end else if (out_count <= FFT_SIZE && out_primed) begin + if (out_count < FFT_SIZE) + addr = out_count[ADDR_BITS-1:0]; + else + addr = 0; + end + end + default: begin + // keep defaults + end + endcase + + // BRAM write + if (we) begin + prod_buf_i[addr] <= wdata_i; + prod_buf_q[addr] <= wdata_q; + end + // BRAM read (1-cycle latency) + prod_rdata_i <= prod_buf_i[addr]; + prod_rdata_q <= prod_buf_q[addr]; +end + +// ============================================================================ +// MAIN FSM — no buffer array accesses here (all via BRAM ports above) +// ============================================================================ +always @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + state <= ST_IDLE; + collect_count <= 0; + feed_count <= 0; + cap_count <= 0; + mult_count <= 0; + out_count <= 0; + feed_primed <= 1'b0; + mult_primed <= 1'b0; + out_primed <= 1'b0; + fft_start <= 1'b0; + fft_inverse <= 1'b0; + fft_din_re <= 0; + fft_din_im <= 0; + fft_din_valid <= 1'b0; + mf_sig_re <= 0; + mf_sig_im <= 0; + mf_ref_re <= 0; + mf_ref_im <= 0; + mf_valid_in <= 1'b0; + mf_flush_count <= 0; + out_valid_reg <= 1'b0; + out_i_reg <= 0; + out_q_reg <= 0; + end else begin + // Defaults + fft_start <= 1'b0; + fft_din_valid <= 1'b0; + mf_valid_in <= 1'b0; + out_valid_reg <= 1'b0; + + case (state) + + // ================================================================ + ST_IDLE: begin + collect_count <= 0; + feed_primed <= 1'b0; + mult_primed <= 1'b0; + out_primed <= 1'b0; + if (adc_valid) begin + // First sample written by sig/ref BRAM ports (they see + // state==ST_IDLE && adc_valid) + collect_count <= 1; + state <= ST_COLLECT; + end + end + + // ================================================================ + // COLLECT: Gather 1024 ADC + reference samples + // Writes happen in sig/ref BRAM ports (they see state==ST_COLLECT) + // ================================================================ + ST_COLLECT: begin + if (adc_valid && collect_count < FFT_SIZE) begin + collect_count <= collect_count + 1; + end + + if (collect_count == FFT_SIZE) begin + // All 1024 samples collected — start signal FFT + state <= ST_SIG_FFT; + fft_start <= 1'b1; + fft_inverse <= 1'b0; // Forward FFT + feed_count <= 0; + cap_count <= 0; + feed_primed <= 1'b0; + end + end + + // ================================================================ + // SIG_FFT: Feed signal buffer to FFT engine (forward) + // BRAM read has 1-cycle latency: address presented in BRAM port, + // data available in sig_rdata_i/q next cycle. + // ================================================================ + ST_SIG_FFT: begin + // Feed phase: read sig_buf -> fft_din + if (feed_count < FFT_SIZE) begin + if (!feed_primed) begin + // Pre-read cycle: address presented to BRAM, wait 1 cycle + feed_primed <= 1'b1; + feed_count <= feed_count + 1; + // fft_din_valid stays 0 (default) + end else begin + // Primed: BRAM rdata is valid for previous address + fft_din_re <= sig_rdata_i; + fft_din_im <= sig_rdata_q; + fft_din_valid <= 1'b1; + feed_count <= feed_count + 1; + end + end else if (feed_count == FFT_SIZE && feed_primed) begin + // Last sample: BRAM rdata has data for address 1023 + fft_din_re <= sig_rdata_i; + fft_din_im <= sig_rdata_q; + fft_din_valid <= 1'b1; + feed_count <= feed_count + 1; // -> 1025, stops feeding + end + + // Capture FFT output (writes happen in BRAM port) + if (fft_dout_valid && cap_count < FFT_SIZE) begin + cap_count <= cap_count + 1; + end + + if (fft_done) begin + state <= ST_SIG_CAP; + end + end + + // ================================================================ + // SIG_CAP: Ensure all signal FFT outputs captured + // ================================================================ + ST_SIG_CAP: begin + if (fft_dout_valid && cap_count < FFT_SIZE) begin + cap_count <= cap_count + 1; + end + + // Start reference FFT + state <= ST_REF_FFT; + fft_start <= 1'b1; + fft_inverse <= 1'b0; // Forward FFT + feed_count <= 0; + cap_count <= 0; + feed_primed <= 1'b0; + end + + // ================================================================ + // REF_FFT: Feed reference buffer to FFT engine (forward) + // ================================================================ + ST_REF_FFT: begin + // Feed phase: read ref_buf -> fft_din + if (feed_count < FFT_SIZE) begin + if (!feed_primed) begin + feed_primed <= 1'b1; + feed_count <= feed_count + 1; + end else begin + fft_din_re <= ref_rdata_i; + fft_din_im <= ref_rdata_q; + fft_din_valid <= 1'b1; + feed_count <= feed_count + 1; + end + end else if (feed_count == FFT_SIZE && feed_primed) begin + fft_din_re <= ref_rdata_i; + fft_din_im <= ref_rdata_q; + fft_din_valid <= 1'b1; + feed_count <= feed_count + 1; + end + + if (fft_dout_valid && cap_count < FFT_SIZE) begin + cap_count <= cap_count + 1; + end + + if (fft_done) begin + state <= ST_REF_CAP; + end + end + + // ================================================================ + // REF_CAP: Ensure all ref FFT outputs captured + // ================================================================ + ST_REF_CAP: begin + if (fft_dout_valid && cap_count < FFT_SIZE) begin + cap_count <= cap_count + 1; + end + + state <= ST_MULTIPLY; + mult_count <= 0; + cap_count <= 0; + mf_flush_count <= 0; + mult_primed <= 1'b0; + end + + // ================================================================ + // MULTIPLY: Stream sig FFT and ref FFT through freq_matched_filter + // Both sig_buf and ref_buf are read simultaneously (separate BRAM + // ports). Pipeline latency = 4 clocks. Feed 1024 pairs, then flush. + // ================================================================ + ST_MULTIPLY: begin + if (mult_count < FFT_SIZE) begin + if (!mult_primed) begin + // Pre-read cycle + mult_primed <= 1'b1; + mult_count <= mult_count + 1; + end else begin + mf_sig_re <= sig_rdata_i; + mf_sig_im <= sig_rdata_q; + mf_ref_re <= ref_rdata_i; + mf_ref_im <= ref_rdata_q; + mf_valid_in <= 1'b1; + mult_count <= mult_count + 1; + end + end else if (mult_count == FFT_SIZE && mult_primed) begin + // Last sample + mf_sig_re <= sig_rdata_i; + mf_sig_im <= sig_rdata_q; + mf_ref_re <= ref_rdata_i; + mf_ref_im <= ref_rdata_q; + mf_valid_in <= 1'b1; + mult_count <= mult_count + 1; + end else begin + // Pipeline flush — wait for remaining outputs + mf_flush_count <= mf_flush_count + 1; + end + + // Capture multiply outputs (writes happen in BRAM port) + if (mf_valid_out && cap_count < FFT_SIZE) begin + cap_count <= cap_count + 1; + end + + // Done when all outputs captured + if (cap_count == FFT_SIZE) begin + state <= ST_INV_FFT; + fft_start <= 1'b1; + fft_inverse <= 1'b1; // Inverse FFT + feed_count <= 0; + cap_count <= 0; + feed_primed <= 1'b0; + end + end + + // ================================================================ + // INV_FFT: Feed product buffer to FFT engine (inverse) + // ================================================================ + ST_INV_FFT: begin + if (feed_count < FFT_SIZE) begin + if (!feed_primed) begin + feed_primed <= 1'b1; + feed_count <= feed_count + 1; + end else begin + fft_din_re <= prod_rdata_i; + fft_din_im <= prod_rdata_q; + fft_din_valid <= 1'b1; + feed_count <= feed_count + 1; + end + end else if (feed_count == FFT_SIZE && feed_primed) begin + fft_din_re <= prod_rdata_i; + fft_din_im <= prod_rdata_q; + fft_din_valid <= 1'b1; + feed_count <= feed_count + 1; + end + + if (fft_dout_valid && cap_count < FFT_SIZE) begin + cap_count <= cap_count + 1; + end + + if (fft_done) begin + state <= ST_INV_CAP; + end + end + + // ================================================================ + // INV_CAP: Ensure all IFFT outputs captured + // ================================================================ + ST_INV_CAP: begin + if (fft_dout_valid && cap_count < FFT_SIZE) begin + cap_count <= cap_count + 1; + end + + state <= ST_OUTPUT; + out_count <= 0; + out_primed <= 1'b0; + end + + // ================================================================ + // OUTPUT: Stream 1024 range profile samples + // BRAM read latency: present address, data valid next cycle. + // ================================================================ + ST_OUTPUT: begin + if (out_count < FFT_SIZE) begin + if (!out_primed) begin + // Pre-read cycle + out_primed <= 1'b1; + out_count <= out_count + 1; + end else begin + out_i_reg <= prod_rdata_i; + out_q_reg <= prod_rdata_q; + out_valid_reg <= 1'b1; + out_count <= out_count + 1; + end + end else if (out_count == FFT_SIZE && out_primed) begin + // Last sample + out_i_reg <= prod_rdata_i; + out_q_reg <= prod_rdata_q; + out_valid_reg <= 1'b1; + out_count <= out_count + 1; + end else begin + state <= ST_DONE; + end + end + + // ================================================================ + // DONE: Return to idle + // ================================================================ + ST_DONE: begin + state <= ST_IDLE; + end + + default: state <= ST_IDLE; + + endcase + end +end + +// ============================================================================ +// OUTPUT ASSIGNMENTS +// ============================================================================ +assign range_profile_i = out_i_reg; +assign range_profile_q = out_q_reg; +assign range_profile_valid = out_valid_reg; +assign chain_state = state; + +// ============================================================================ +// BUFFER INIT (for simulation — Vivado ignores initial blocks on arrays) +// ============================================================================ +integer init_idx; +initial begin + for (init_idx = 0; init_idx < FFT_SIZE; init_idx = init_idx + 1) begin + sig_buf_i[init_idx] = 0; + sig_buf_q[init_idx] = 0; + ref_buf_i[init_idx] = 0; + ref_buf_q[init_idx] = 0; + prod_buf_i[init_idx] = 0; + prod_buf_q[init_idx] = 0; + end +end `endif diff --git a/9_Firmware/9_2_FPGA/tb/tb_fft_engine.v b/9_Firmware/9_2_FPGA/tb/tb_fft_engine.v new file mode 100644 index 0000000..6c45921 --- /dev/null +++ b/9_Firmware/9_2_FPGA/tb/tb_fft_engine.v @@ -0,0 +1,526 @@ +`timescale 1ns / 1ps + +/** + * tb_fft_engine.v + * + * Testbench for the synthesizable FFT engine. + * Tests with N=32 first (fast), then validates key properties. + * + * Test Groups: + * 1. Impulse response: FFT of delta[0] should be all 1s + * 2. DC input: FFT of all-1s should be delta at bin 0 + * 3. Single tone: FFT of cos(2*pi*k/N) should peak at bin k + * 4. Roundtrip: FFT then IFFT should recover original + * 5. Linearity: FFT(a+b) ~= FFT(a) + FFT(b) + * + * Convention: standard check task with pass/fail tracking. + */ + +module tb_fft_engine; + +// ============================================================================ +// PARAMETERS — test with 32-pt for speed +// ============================================================================ +localparam N = 32; +localparam LOG2N = 5; +localparam DATA_W = 16; +localparam INT_W = 32; +localparam TW_W = 16; +localparam CLK_PERIOD = 10; + +// ============================================================================ +// SIGNALS +// ============================================================================ +reg clk, reset_n; +reg start, inverse; +reg signed [DATA_W-1:0] din_re, din_im; +reg din_valid; +wire signed [DATA_W-1:0] dout_re, dout_im; +wire dout_valid, busy, done_sig; + +// ============================================================================ +// DUT +// ============================================================================ +fft_engine #( + .N(N), + .LOG2N(LOG2N), + .DATA_W(DATA_W), + .INTERNAL_W(INT_W), + .TWIDDLE_W(TW_W), + .TWIDDLE_FILE("fft_twiddle_32.mem") +) dut ( + .clk(clk), + .reset_n(reset_n), + .start(start), + .inverse(inverse), + .din_re(din_re), + .din_im(din_im), + .din_valid(din_valid), + .dout_re(dout_re), + .dout_im(dout_im), + .dout_valid(dout_valid), + .busy(busy), + .done(done_sig) +); + +// ============================================================================ +// CLOCK +// ============================================================================ +initial clk = 0; +always #(CLK_PERIOD/2) clk = ~clk; + +// ============================================================================ +// PASS/FAIL TRACKING +// ============================================================================ +integer pass_count, fail_count; + +task check; + input cond; + input [512*8-1:0] label; + begin + if (cond) begin + $display(" [PASS] %0s", label); + pass_count = pass_count + 1; + end else begin + $display(" [FAIL] %0s", label); + fail_count = fail_count + 1; + end + end +endtask + +// ============================================================================ +// STORAGE FOR CAPTURED OUTPUTS +// ============================================================================ +reg signed [DATA_W-1:0] out_re [0:N-1]; +reg signed [DATA_W-1:0] out_im [0:N-1]; +integer out_idx; + +// Second set for roundtrip +reg signed [DATA_W-1:0] out2_re [0:N-1]; +reg signed [DATA_W-1:0] out2_im [0:N-1]; + +// Input storage for roundtrip comparison +reg signed [DATA_W-1:0] in_re [0:N-1]; +reg signed [DATA_W-1:0] in_im [0:N-1]; + +// ============================================================================ +// HELPER TASKS +// ============================================================================ + +// Reset +task do_reset; + begin + reset_n = 0; + start = 0; + inverse = 0; + din_re = 0; + din_im = 0; + din_valid = 0; + repeat(5) @(posedge clk); #1; + reset_n = 1; + repeat(2) @(posedge clk); #1; + end +endtask + +// Run FFT: load N samples from in_re/in_im arrays, capture output to out_re/out_im +task run_fft; + input inv; + integer i; + begin + inverse = inv; + @(posedge clk); #1; + start = 1; + @(posedge clk); #1; + start = 0; + + // Feed N samples + for (i = 0; i < N; i = i + 1) begin + din_re = in_re[i]; + din_im = in_im[i]; + din_valid = 1; + @(posedge clk); #1; + end + din_valid = 0; + din_re = 0; + din_im = 0; + + // Wait for output and capture + out_idx = 0; + while (out_idx < N) begin + @(posedge clk); #1; + if (dout_valid) begin + out_re[out_idx] = dout_re; + out_im[out_idx] = dout_im; + out_idx = out_idx + 1; + end + end + + // Wait for done + @(posedge clk); #1; + end +endtask + +// Run FFT and capture to out2 arrays +task run_fft_to_out2; + input inv; + integer i; + begin + inverse = inv; + @(posedge clk); #1; + start = 1; + @(posedge clk); #1; + start = 0; + + for (i = 0; i < N; i = i + 1) begin + din_re = in_re[i]; + din_im = in_im[i]; + din_valid = 1; + @(posedge clk); #1; + end + din_valid = 0; + din_re = 0; + din_im = 0; + + out_idx = 0; + while (out_idx < N) begin + @(posedge clk); #1; + if (dout_valid) begin + out2_re[out_idx] = dout_re; + out2_im[out_idx] = dout_im; + out_idx = out_idx + 1; + end + end + @(posedge clk); #1; + end +endtask + +// ============================================================================ +// VCD + CSV +// ============================================================================ +initial begin + $dumpfile("tb_fft_engine.vcd"); + $dumpvars(0, tb_fft_engine); +end + +// ============================================================================ +// MAIN TEST +// ============================================================================ +integer i, j; +integer max_mag_bin; +reg signed [31:0] max_mag; +reg signed [31:0] mag; +reg signed [31:0] err; +integer max_err; +integer total_energy_in, total_energy_out; + +// For tone generation +real angle; +reg signed [DATA_W-1:0] cos_val; + +initial begin + pass_count = 0; + fail_count = 0; + + $display("============================================================"); + $display(" FFT Engine Testbench — N=%0d", N); + $display("============================================================"); + + do_reset; + + // ================================================================ + // TEST GROUP 1: Impulse Response + // FFT(delta[0]) should give all bins = 1 (in_re[0]=1, rest=0) + // Since input is Q15-ish (16-bit signed), use amplitude = 1000 + // FFT of impulse with amplitude A: all bins = A + // ================================================================ + $display(""); + $display("--- Test Group 1: Impulse Response ---"); + + for (i = 0; i < N; i = i + 1) begin + in_re[i] = (i == 0) ? 16'sd1000 : 16'sd0; + in_im[i] = 16'sd0; + end + + run_fft(0); // Forward FFT + + // All bins should have re ~= 1000, im ~= 0 + max_err = 0; + for (i = 0; i < N; i = i + 1) begin + err = out_re[i] - 1000; + if (err < 0) err = -err; + if (err > max_err) max_err = err; + err = out_im[i]; + if (err < 0) err = -err; + if (err > max_err) max_err = err; + end + $display(" Impulse FFT max error from expected: %0d", max_err); + check(max_err < 10, "Impulse FFT: all bins ~= input amplitude"); + check(out_re[0] == 1000 || (out_re[0] >= 998 && out_re[0] <= 1002), + "Impulse FFT: bin 0 real ~= 1000"); + + // ================================================================ + // TEST GROUP 2: DC Input + // FFT of constant value A across all N samples: + // bin 0 = A*N, all other bins = 0 + // Use amplitude 100 so bin 0 = 100*32 = 3200 + // ================================================================ + $display(""); + $display("--- Test Group 2: DC Input ---"); + + for (i = 0; i < N; i = i + 1) begin + in_re[i] = 16'sd100; + in_im[i] = 16'sd0; + end + + run_fft(0); + + $display(" DC FFT bin[0] = %0d + j%0d (expect %0d + j0)", out_re[0], out_im[0], 100*N); + // Q15 twiddle rounding over N butterflies can cause ~1% error + check(out_re[0] >= (100*N - 50) && out_re[0] <= (100*N + 50), + "DC FFT: bin 0 real ~= A*N (1.5% tol)"); + + max_err = 0; + for (i = 1; i < N; i = i + 1) begin + mag = out_re[i] * out_re[i] + out_im[i] * out_im[i]; + if (out_re[i] > max_err || -out_re[i] > max_err) + max_err = (out_re[i] > 0) ? out_re[i] : -out_re[i]; + if (out_im[i] > max_err || -out_im[i] > max_err) + max_err = (out_im[i] > 0) ? out_im[i] : -out_im[i]; + end + $display(" DC FFT max non-DC bin magnitude: %0d", max_err); + check(max_err < 20, "DC FFT: non-DC bins ~= 0 (Q15 rounding tol)"); + + // ================================================================ + // TEST GROUP 3: Single Tone (cosine at bin 4) + // cos(2*pi*4*n/32) -> peaks at bins 4 and N-4=28 + // Amplitude 1000 -> each peak = 1000*N/2 = 16000 + // ================================================================ + $display(""); + $display("--- Test Group 3: Single Tone (bin 4) ---"); + + for (i = 0; i < N; i = i + 1) begin + // cos(2*pi*4*i/32) in Q15-ish + angle = 6.28318530718 * 4.0 * i / 32.0; + cos_val = $rtoi($cos(angle) * 1000.0); + in_re[i] = cos_val; + in_im[i] = 16'sd0; + end + + run_fft(0); + + // Find peak bin + max_mag = 0; + max_mag_bin = 0; + for (i = 0; i < N; i = i + 1) begin + mag = out_re[i] * out_re[i] + out_im[i] * out_im[i]; + if (mag > max_mag) begin + max_mag = mag; + max_mag_bin = i; + end + end + $display(" Tone FFT peak bin: %0d (expect 4)", max_mag_bin); + $display(" Tone FFT bin[4] = %0d + j%0d", out_re[4], out_im[4]); + $display(" Tone FFT bin[28] = %0d + j%0d", out_re[28], out_im[28]); + check(max_mag_bin == 4 || max_mag_bin == 28, + "Tone FFT: peak at bin 4 or 28"); + // Bin 4 and 28 should have magnitude ~= N/2 * 1000 = 16000 + mag = out_re[4] * out_re[4] + out_im[4] * out_im[4]; + check(mag > 15000*15000 && mag < 17000*17000, + "Tone FFT: bin 4 magnitude ~= 16000"); + + // ================================================================ + // TEST GROUP 4: Roundtrip (FFT then IFFT = identity) + // Load random-ish data, FFT, IFFT, compare to original + // ================================================================ + $display(""); + $display("--- Test Group 4: Roundtrip (FFT->IFFT) ---"); + + // Use a simple deterministic pattern + for (i = 0; i < N; i = i + 1) begin + in_re[i] = (i * 137 + 42) % 2001 - 1000; // [-1000, 1000] + in_im[i] = (i * 251 + 17) % 2001 - 1000; + end + + // Forward FFT + run_fft(0); + + // Copy FFT output as input for IFFT + for (i = 0; i < N; i = i + 1) begin + in_re[i] = out_re[i]; + in_im[i] = out_im[i]; + end + + // Save original input for comparison + // (we need to recompute since in_re was overwritten) + + // Actually let's redo: store originals first + // We'll do it properly with separate storage + + // Re-do: load original pattern + for (i = 0; i < N; i = i + 1) begin + out2_re[i] = (i * 137 + 42) % 2001 - 1000; + out2_im[i] = (i * 251 + 17) % 2001 - 1000; + end + + // Now in_re/in_im has FFT output. Run IFFT. + run_fft(1); + + // out_re/out_im should match original (out2_re/out2_im) within tolerance + max_err = 0; + for (i = 0; i < N; i = i + 1) begin + err = out_re[i] - out2_re[i]; + if (err < 0) err = -err; + if (err > max_err) max_err = err; + err = out_im[i] - out2_im[i]; + if (err < 0) err = -err; + if (err > max_err) max_err = err; + end + $display(" Roundtrip max error: %0d", max_err); + check(max_err < 20, "Roundtrip: FFT->IFFT recovers original (err < 20)"); + check(max_err < 5, "Roundtrip: FFT->IFFT tight tolerance (err < 5)"); + + // Print first few samples for debugging + $display(" Sample comparison (idx: original vs recovered):"); + for (i = 0; i < 8; i = i + 1) begin + $display(" [%0d] re: %0d vs %0d, im: %0d vs %0d", + i, out2_re[i], out_re[i], out2_im[i], out_im[i]); + end + + // ================================================================ + // TEST GROUP 5: IFFT of impulse + // IFFT(delta[0]) = 1/N for all bins -> should be ~1 for amplitude N + // Input: bin[0] = N (=32), rest = 0 + // IFFT output: all samples = 1 + // ================================================================ + $display(""); + $display("--- Test Group 5: IFFT of Impulse ---"); + + for (i = 0; i < N; i = i + 1) begin + in_re[i] = (i == 0) ? N : 16'sd0; + in_im[i] = 16'sd0; + end + + run_fft(1); // Inverse FFT + + max_err = 0; + for (i = 0; i < N; i = i + 1) begin + err = out_re[i] - 1; + if (err < 0) err = -err; + if (err > max_err) max_err = err; + err = out_im[i]; + if (err < 0) err = -err; + if (err > max_err) max_err = err; + end + $display(" IFFT impulse max error: %0d", max_err); + check(max_err < 2, "IFFT impulse: all samples ~= 1"); + + // ================================================================ + // TEST GROUP 6: Parseval's theorem (energy conservation) + // Sum |x[n]|^2 should equal (1/N) * Sum |X[k]|^2 + // We compare N * sum_time vs sum_freq + // ================================================================ + $display(""); + $display("--- Test Group 6: Parseval's Theorem ---"); + + for (i = 0; i < N; i = i + 1) begin + in_re[i] = (i * 137 + 42) % 2001 - 1000; + in_im[i] = (i * 251 + 17) % 2001 - 1000; + end + + // Compute time-domain energy + total_energy_in = 0; + for (i = 0; i < N; i = i + 1) begin + total_energy_in = total_energy_in + in_re[i] * in_re[i] + in_im[i] * in_im[i]; + end + + run_fft(0); + + // Compute frequency-domain energy + total_energy_out = 0; + for (i = 0; i < N; i = i + 1) begin + total_energy_out = total_energy_out + out_re[i] * out_re[i] + out_im[i] * out_im[i]; + end + + // Parseval: sum_time = (1/N) * sum_freq => N * sum_time = sum_freq + $display(" Time energy * N = %0d", total_energy_in * N); + $display(" Freq energy = %0d", total_energy_out); + // Allow some tolerance for fixed-point rounding + err = total_energy_in * N - total_energy_out; + if (err < 0) err = -err; + $display(" Parseval error = %0d", err); + // Relative error + if (total_energy_in * N > 0) begin + $display(" Parseval rel error = %0d%%", (err * 100) / (total_energy_in * N)); + check((err * 100) / (total_energy_in * N) < 5, + "Parseval: energy conserved within 5%"); + end + + // ================================================================ + // TEST GROUP 7: Pure imaginary input + // FFT of j*sin(2*pi*2*n/N) -> peaks at bins 2 and N-2 + // ================================================================ + $display(""); + $display("--- Test Group 7: Pure Imaginary Tone (bin 2) ---"); + + for (i = 0; i < N; i = i + 1) begin + in_re[i] = 16'sd0; + angle = 6.28318530718 * 2.0 * i / 32.0; + in_im[i] = $rtoi($sin(angle) * 1000.0); + end + + run_fft(0); + + // Find peak + max_mag = 0; + max_mag_bin = 0; + for (i = 0; i < N; i = i + 1) begin + mag = out_re[i] * out_re[i] + out_im[i] * out_im[i]; + if (mag > max_mag) begin + max_mag = mag; + max_mag_bin = i; + end + end + $display(" Imag tone peak bin: %0d (expect 2 or 30)", max_mag_bin); + check(max_mag_bin == 2 || max_mag_bin == 30, + "Imag tone: peak at bin 2 or 30"); + + // ================================================================ + // TEST GROUP 8: Zero input + // ================================================================ + $display(""); + $display("--- Test Group 8: Zero Input ---"); + + for (i = 0; i < N; i = i + 1) begin + in_re[i] = 16'sd0; + in_im[i] = 16'sd0; + end + + run_fft(0); + + max_err = 0; + for (i = 0; i < N; i = i + 1) begin + err = out_re[i]; + if (err < 0) err = -err; + if (err > max_err) max_err = err; + err = out_im[i]; + if (err < 0) err = -err; + if (err > max_err) max_err = err; + end + check(max_err == 0, "Zero input: all output bins = 0"); + + // ================================================================ + // SUMMARY + // ================================================================ + $display(""); + $display("============================================================"); + $display(" RESULTS: %0d/%0d passed", pass_count, pass_count + fail_count); + if (fail_count == 0) + $display(" ALL TESTS PASSED"); + else + $display(" SOME TESTS FAILED"); + $display("============================================================"); + + $finish; +end + +endmodule diff --git a/9_Firmware/9_2_FPGA/tb/tb_mf_chain_synth.v b/9_Firmware/9_2_FPGA/tb/tb_mf_chain_synth.v new file mode 100644 index 0000000..890cb94 --- /dev/null +++ b/9_Firmware/9_2_FPGA/tb/tb_mf_chain_synth.v @@ -0,0 +1,543 @@ +`timescale 1ns / 1ps + +/** + * tb_mf_chain_synth.v + * + * Testbench for the SYNTHESIS branch of matched_filter_processing_chain.v. + * This is compiled WITHOUT -DSIMULATION so the `else` branch (fft_engine-based) + * is activated. + * + * The synthesis branch uses an iterative fft_engine (1024-pt, single butterfly), + * so processing takes ~40K+ clock cycles per frame. Timeouts are set accordingly. + */ + +module tb_mf_chain_synth; + + // ── Parameters ───────────────────────────────────────────── + localparam CLK_PERIOD = 10.0; // 100 MHz + localparam FFT_SIZE = 1024; + // Timeout for full frame processing: + // 3 FFTs × ~12K cycles each + multiply ~1K + overhead ≈ 40K + // Use 200K for safety margin + localparam FRAME_TIMEOUT = 200000; + + // ── Signals ──────────────────────────────────────────────── + reg clk; + reg reset_n; + reg [15:0] adc_data_i; + reg [15:0] adc_data_q; + reg adc_valid; + reg [5:0] chirp_counter; + reg [15:0] long_chirp_real; + reg [15:0] long_chirp_imag; + reg [15:0] short_chirp_real; + reg [15:0] short_chirp_imag; + wire signed [15:0] range_profile_i; + wire signed [15:0] range_profile_q; + wire range_profile_valid; + wire [3:0] chain_state; + + // ── Test bookkeeping ─────────────────────────────────────── + integer pass_count; + integer fail_count; + integer test_num; + integer i; + + // Synthesis-branch states (mirror DUT) + localparam [3:0] ST_IDLE = 4'd0, + ST_COLLECT = 4'd1, + ST_SIG_FFT = 4'd2, + ST_SIG_CAP = 4'd3, + ST_REF_FFT = 4'd4, + ST_REF_CAP = 4'd5, + ST_MULTIPLY = 4'd6, + ST_INV_FFT = 4'd7, + ST_INV_CAP = 4'd8, + ST_OUTPUT = 4'd9, + ST_DONE = 4'd10; + + // ── Concurrent output capture ────────────────────────────── + integer cap_count; + reg cap_enable; + integer cap_max_abs; + integer cap_peak_bin; + integer cap_cur_abs; + + // Output capture arrays + reg signed [15:0] cap_out_i [0:1023]; + reg signed [15:0] cap_out_q [0:1023]; + + // ── Clock ────────────────────────────────────────────────── + always #(CLK_PERIOD/2) clk = ~clk; + + // ── DUT ──────────────────────────────────────────────────── + matched_filter_processing_chain uut ( + .clk (clk), + .reset_n (reset_n), + .adc_data_i (adc_data_i), + .adc_data_q (adc_data_q), + .adc_valid (adc_valid), + .chirp_counter (chirp_counter), + .long_chirp_real (long_chirp_real), + .long_chirp_imag (long_chirp_imag), + .short_chirp_real (short_chirp_real), + .short_chirp_imag (short_chirp_imag), + .range_profile_i (range_profile_i), + .range_profile_q (range_profile_q), + .range_profile_valid (range_profile_valid), + .chain_state (chain_state) + ); + + // ── Concurrent output capture block ──────────────────────── + always @(posedge clk) begin + #1; + if (cap_enable && range_profile_valid) begin + if (cap_count < FFT_SIZE) begin + cap_out_i[cap_count] = range_profile_i; + cap_out_q[cap_count] = range_profile_q; + end + cap_cur_abs = (range_profile_i[15] ? -range_profile_i : range_profile_i) + + (range_profile_q[15] ? -range_profile_q : range_profile_q); + if (cap_cur_abs > cap_max_abs) begin + cap_max_abs = cap_cur_abs; + cap_peak_bin = cap_count; + end + cap_count = cap_count + 1; + end + end + + // ── Check task ───────────────────────────────────────────── + task check; + input cond; + input [511:0] label; + begin + test_num = test_num + 1; + if (cond) begin + $display("[PASS] Test %0d: %0s", test_num, label); + pass_count = pass_count + 1; + end else begin + $display("[FAIL] Test %0d: %0s", test_num, label); + fail_count = fail_count + 1; + end + end + endtask + + // ── Helper: apply reset ──────────────────────────────────── + task apply_reset; + begin + reset_n = 0; + adc_valid = 0; + adc_data_i = 16'd0; + adc_data_q = 16'd0; + chirp_counter = 6'd0; + long_chirp_real = 16'd0; + long_chirp_imag = 16'd0; + short_chirp_real = 16'd0; + short_chirp_imag = 16'd0; + cap_enable = 0; + cap_count = 0; + cap_max_abs = 0; + cap_peak_bin = -1; + repeat (4) @(posedge clk); + reset_n = 1; + @(posedge clk); + #1; + end + endtask + + // ── Helper: start capture ────────────────────────────────── + task start_capture; + begin + cap_count = 0; + cap_max_abs = 0; + cap_peak_bin = -1; + cap_enable = 1; + end + endtask + + // ── Helper: wait for IDLE with long timeout ──────────────── + task wait_for_idle; + integer wait_count; + begin + wait_count = 0; + while (chain_state != ST_IDLE && wait_count < FRAME_TIMEOUT) begin + @(posedge clk); + wait_count = wait_count + 1; + end + #1; + if (wait_count >= FRAME_TIMEOUT) + $display(" WARNING: wait_for_idle timed out at %0d cycles", wait_count); + end + endtask + + // ── Helper: feed DC frame ────────────────────────────────── + task feed_dc_frame; + integer k; + begin + for (k = 0; k < FFT_SIZE; k = k + 1) begin + adc_data_i = 16'sh1000; // +4096 + adc_data_q = 16'sh0000; + long_chirp_real = 16'sh1000; + long_chirp_imag = 16'sh0000; + short_chirp_real = 16'd0; + short_chirp_imag = 16'd0; + adc_valid = 1'b1; + @(posedge clk); + #1; + end + adc_valid = 1'b0; + end + endtask + + // ── Helper: feed tone frame (signal=reference=tone at bin) ─ + task feed_tone_frame; + input integer tone_bin; + integer k; + real angle; + begin + for (k = 0; k < FFT_SIZE; k = k + 1) begin + angle = 6.28318530718 * tone_bin * k / (1.0 * FFT_SIZE); + adc_data_i = $rtoi(8000.0 * $cos(angle)); + adc_data_q = $rtoi(8000.0 * $sin(angle)); + long_chirp_real = $rtoi(8000.0 * $cos(angle)); + long_chirp_imag = $rtoi(8000.0 * $sin(angle)); + short_chirp_real = 16'd0; + short_chirp_imag = 16'd0; + adc_valid = 1'b1; + @(posedge clk); + #1; + end + adc_valid = 1'b0; + end + endtask + + // ── Helper: feed impulse frame (delta at sample 0) ───────── + task feed_impulse_frame; + integer k; + begin + for (k = 0; k < FFT_SIZE; k = k + 1) begin + if (k == 0) begin + adc_data_i = 16'sh4000; // 0.5 in Q15 + adc_data_q = 16'sh0000; + long_chirp_real = 16'sh4000; + long_chirp_imag = 16'sh0000; + end else begin + adc_data_i = 16'sh0000; + adc_data_q = 16'sh0000; + long_chirp_real = 16'sh0000; + long_chirp_imag = 16'sh0000; + end + short_chirp_real = 16'd0; + short_chirp_imag = 16'd0; + adc_valid = 1'b1; + @(posedge clk); + #1; + end + adc_valid = 1'b0; + end + endtask + + // ── Stimulus ─────────────────────────────────────────────── + initial begin + $dumpfile("tb_mf_chain_synth.vcd"); + $dumpvars(0, tb_mf_chain_synth); + + // Init + clk = 0; + pass_count = 0; + fail_count = 0; + test_num = 0; + cap_enable = 0; + cap_count = 0; + cap_max_abs = 0; + cap_peak_bin = -1; + + // ════════════════════════════════════════════════════════ + // TEST GROUP 1: Reset behaviour + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 1: Reset Behaviour ---"); + apply_reset; + + reset_n = 0; + repeat (4) @(posedge clk); #1; + check(range_profile_valid === 1'b0, "range_profile_valid=0 during reset"); + check(chain_state === ST_IDLE, "chain_state=IDLE during reset"); + reset_n = 1; + @(posedge clk); #1; + + // ════════════════════════════════════════════════════════ + // TEST GROUP 2: No valid input stays IDLE + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 2: No Valid Input → Stays IDLE ---"); + apply_reset; + + repeat (100) @(posedge clk); + #1; + check(chain_state === ST_IDLE, "Stays in IDLE with no valid input"); + check(range_profile_valid === 1'b0, "No output when no input"); + + // ════════════════════════════════════════════════════════ + // TEST GROUP 3: DC frame — state transitions and output count + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 3: DC Frame — Full Processing ---"); + apply_reset; + + start_capture; + feed_dc_frame; + + $display(" Waiting for processing (3 FFTs + multiply)..."); + wait_for_idle; + cap_enable = 0; + + $display(" Output count: %0d (expected %0d)", cap_count, FFT_SIZE); + $display(" Peak bin: %0d, magnitude: %0d", cap_peak_bin, cap_max_abs); + check(cap_count == FFT_SIZE, "DC: Outputs exactly 1024 range profile samples"); + check(chain_state === ST_IDLE, "DC: Returns to IDLE after frame"); + // DC autocorrelation: FFT of DC = energy at bin 0 only + // conj multiply = |bin0|^2 at bin 0, zeros elsewhere + // IFFT of single bin = constant => peak at bin 0 (or any bin since all equal) + // With Q15 truncation, expect non-zero output + check(cap_max_abs > 0, "DC: Non-zero output"); + + // ════════════════════════════════════════════════════════ + // TEST GROUP 4: Zero input → zero output + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 4: Zero Input → Zero Output ---"); + apply_reset; + + start_capture; + for (i = 0; i < FFT_SIZE; i = i + 1) begin + adc_data_i = 16'd0; + adc_data_q = 16'd0; + long_chirp_real = 16'd0; + long_chirp_imag = 16'd0; + short_chirp_real = 16'd0; + short_chirp_imag = 16'd0; + adc_valid = 1'b1; + @(posedge clk); #1; + end + adc_valid = 1'b0; + + wait_for_idle; + cap_enable = 0; + + $display(" Output count: %0d", cap_count); + $display(" Max magnitude: %0d", cap_max_abs); + check(cap_count == FFT_SIZE, "Zero: Got 1024 output samples"); + // Allow small rounding noise (fft_engine Q15 rounding can produce ±1) + check(cap_max_abs <= 2, "Zero: Output magnitude <= 2 (near zero)"); + + // ════════════════════════════════════════════════════════ + // TEST GROUP 5: Tone autocorrelation (bin 5) + // signal = reference = tone at bin 5 + // Autocorrelation peak at bin 0 (time lag 0) + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 5: Tone Autocorrelation (bin 5) ---"); + apply_reset; + + start_capture; + feed_tone_frame(5); + + $display(" Waiting for processing..."); + wait_for_idle; + cap_enable = 0; + + $display(" Output count: %0d", cap_count); + $display(" Peak bin: %0d, magnitude: %0d", cap_peak_bin, cap_max_abs); + check(cap_count == FFT_SIZE, "Tone: Got 1024 output samples"); + // Autocorrelation of a pure tone: peak at bin 0 + check(cap_peak_bin <= 5 || cap_peak_bin >= FFT_SIZE - 5, + "Tone: Autocorrelation peak near bin 0"); + check(cap_max_abs > 0, "Tone: Peak magnitude > 0"); + + // ════════════════════════════════════════════════════════ + // TEST GROUP 6: Impulse autocorrelation + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 6: Impulse Autocorrelation ---"); + apply_reset; + + start_capture; + feed_impulse_frame; + + $display(" Waiting for processing..."); + wait_for_idle; + cap_enable = 0; + + $display(" Output count: %0d", cap_count); + $display(" Peak bin: %0d, magnitude: %0d", cap_peak_bin, cap_max_abs); + check(cap_count == FFT_SIZE, "Impulse: Got 1024 output samples"); + check(cap_max_abs > 0, "Impulse: Non-zero output"); + check(chain_state === ST_IDLE, "Impulse: Returns to IDLE"); + + // ════════════════════════════════════════════════════════ + // TEST GROUP 7: Reset mid-operation + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 7: Reset Mid-Operation ---"); + apply_reset; + + // Feed ~512 samples (halfway through collection) + for (i = 0; i < 512; i = i + 1) begin + adc_data_i = 16'sh1000; + adc_data_q = 16'sh0000; + long_chirp_real = 16'sh1000; + long_chirp_imag = 16'sh0000; + short_chirp_real = 16'd0; + short_chirp_imag = 16'd0; + adc_valid = 1'b1; + @(posedge clk); #1; + end + adc_valid = 1'b0; + + // Assert reset + reset_n = 0; + repeat (4) @(posedge clk); #1; + reset_n = 1; + @(posedge clk); #1; + + check(chain_state === ST_IDLE, "Mid-op reset: Returns to IDLE"); + check(range_profile_valid === 1'b0, "Mid-op reset: No output"); + + // Feed a complete frame after reset + start_capture; + feed_dc_frame; + wait_for_idle; + cap_enable = 0; + + $display(" Post-reset frame: %0d outputs", cap_count); + check(cap_count == FFT_SIZE, "Mid-op reset: Post-reset frame gives 1024 outputs"); + + // ════════════════════════════════════════════════════════ + // TEST GROUP 8: Back-to-back frames + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 8: Back-to-Back Frames ---"); + apply_reset; + + // Frame 1 + start_capture; + feed_dc_frame; + wait_for_idle; + cap_enable = 0; + $display(" Frame 1: %0d outputs, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs); + check(cap_count == FFT_SIZE, "B2B Frame 1: 1024 outputs"); + + // Frame 2 + start_capture; + feed_tone_frame(3); + wait_for_idle; + cap_enable = 0; + $display(" Frame 2: %0d outputs, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs); + check(cap_count == FFT_SIZE, "B2B Frame 2: 1024 outputs"); + + // ════════════════════════════════════════════════════════ + // TEST GROUP 9: Mismatched signal vs reference + // Signal at bin 5, reference at bin 10 + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 9: Mismatched Signal vs Reference ---"); + apply_reset; + + start_capture; + for (i = 0; i < FFT_SIZE; i = i + 1) begin + adc_data_i = $rtoi(8000.0 * $cos(6.28318530718 * 5 * i / 1024.0)); + adc_data_q = $rtoi(8000.0 * $sin(6.28318530718 * 5 * i / 1024.0)); + long_chirp_real = $rtoi(8000.0 * $cos(6.28318530718 * 10 * i / 1024.0)); + long_chirp_imag = $rtoi(8000.0 * $sin(6.28318530718 * 10 * i / 1024.0)); + short_chirp_real = 16'd0; + short_chirp_imag = 16'd0; + adc_valid = 1'b1; + @(posedge clk); #1; + end + adc_valid = 1'b0; + + wait_for_idle; + cap_enable = 0; + + $display(" Mismatched: peak bin=%0d, magnitude=%0d", cap_peak_bin, cap_max_abs); + check(cap_count == FFT_SIZE, "Mismatch: Got 1024 output samples"); + // Signal=bin5, ref=bin10: product has energy at bin(5-10)=bin(-5)=bin(1019) + // IFFT of that gives a tone at sample spacing of 5 + // The key check is that it completes and produces output + check(cap_max_abs > 0, "Mismatch: Non-zero output"); + check(chain_state === ST_IDLE, "Mismatch: Returns to IDLE"); + + // ════════════════════════════════════════════════════════ + // TEST GROUP 10: Saturation — max positive values + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 10: Saturation — Max Positive ---"); + apply_reset; + + start_capture; + for (i = 0; i < FFT_SIZE; i = i + 1) begin + adc_data_i = 16'sh7FFF; + adc_data_q = 16'sh7FFF; + long_chirp_real = 16'sh7FFF; + long_chirp_imag = 16'sh7FFF; + short_chirp_real = 16'd0; + short_chirp_imag = 16'd0; + adc_valid = 1'b1; + @(posedge clk); #1; + end + adc_valid = 1'b0; + + wait_for_idle; + cap_enable = 0; + + $display(" Saturation: count=%0d, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs); + check(cap_count == FFT_SIZE, "Saturation: Completes with 1024 outputs"); + check(chain_state === ST_IDLE, "Saturation: Returns to IDLE"); + + // ════════════════════════════════════════════════════════ + // TEST GROUP 11: Valid-gap / stall test + // ════════════════════════════════════════════════════════ + $display("\n--- Test Group 11: Valid-Gap Stall Test ---"); + apply_reset; + + start_capture; + for (i = 0; i < FFT_SIZE; i = i + 1) begin + adc_data_i = 16'sh1000; + adc_data_q = 16'sh0000; + long_chirp_real = 16'sh1000; + long_chirp_imag = 16'sh0000; + short_chirp_real = 16'd0; + short_chirp_imag = 16'd0; + adc_valid = 1'b1; + @(posedge clk); #1; + + // Every 100 samples, insert a 10-cycle gap + if ((i % 100) == 99 && i < FFT_SIZE - 1) begin : stall_block + integer gap_j; + adc_valid = 1'b0; + for (gap_j = 0; gap_j < 10; gap_j = gap_j + 1) begin + @(posedge clk); #1; + end + end + end + adc_valid = 1'b0; + + wait_for_idle; + cap_enable = 0; + + $display(" Stall: count=%0d, peak=%0d, mag=%0d", cap_count, cap_peak_bin, cap_max_abs); + check(cap_count == FFT_SIZE, "Stall: 1024 outputs emitted"); + check(chain_state === ST_IDLE, "Stall: Returns to IDLE"); + + // ════════════════════════════════════════════════════════ + // Summary + // ════════════════════════════════════════════════════════ + $display(""); + $display("========================================"); + $display(" MATCHED FILTER PROCESSING CHAIN"); + $display(" (SYNTHESIS BRANCH — fft_engine)"); + $display(" PASSED: %0d / %0d", pass_count, test_num); + $display(" FAILED: %0d / %0d", fail_count, test_num); + if (fail_count == 0) + $display(" ** ALL TESTS PASSED **"); + else + $display(" ** SOME TESTS FAILED **"); + $display("========================================"); + $display(""); + + #100; + $finish; + end + +endmodule diff --git a/9_Firmware/9_2_FPGA/tb/tb_xfft_32.v b/9_Firmware/9_2_FPGA/tb/tb_xfft_32.v new file mode 100644 index 0000000..ed176cf --- /dev/null +++ b/9_Firmware/9_2_FPGA/tb/tb_xfft_32.v @@ -0,0 +1,355 @@ +`timescale 1ns / 1ps + +/** + * tb_xfft_32.v + * + * Testbench for xfft_32 AXI-Stream FFT wrapper. + * Verifies the wrapper correctly interfaces with fft_engine via AXI-Stream. + * + * Test Groups: + * 1. Impulse response (all output bins = input amplitude) + * 2. DC input (bin 0 = A*N, rest ~= 0) + * 3. Single tone detection + * 4. AXI-Stream handshake correctness (tvalid, tlast, tready) + * 5. Back-to-back transforms (no state leakage) + */ + +module tb_xfft_32; + +// ============================================================================ +// PARAMETERS +// ============================================================================ +localparam N = 32; +localparam CLK_PERIOD = 10; + +// ============================================================================ +// SIGNALS +// ============================================================================ +reg aclk, aresetn; +reg [7:0] cfg_tdata; +reg cfg_tvalid; +wire cfg_tready; +reg [31:0] din_tdata; +reg din_tvalid; +reg din_tlast; +wire [31:0] dout_tdata; +wire dout_tvalid; +wire dout_tlast; +reg dout_tready; + +// ============================================================================ +// DUT +// ============================================================================ +xfft_32 dut ( + .aclk(aclk), + .aresetn(aresetn), + .s_axis_config_tdata(cfg_tdata), + .s_axis_config_tvalid(cfg_tvalid), + .s_axis_config_tready(cfg_tready), + .s_axis_data_tdata(din_tdata), + .s_axis_data_tvalid(din_tvalid), + .s_axis_data_tlast(din_tlast), + .m_axis_data_tdata(dout_tdata), + .m_axis_data_tvalid(dout_tvalid), + .m_axis_data_tlast(dout_tlast), + .m_axis_data_tready(dout_tready) +); + +// ============================================================================ +// CLOCK +// ============================================================================ +initial aclk = 0; +always #(CLK_PERIOD/2) aclk = ~aclk; + +// ============================================================================ +// PASS/FAIL TRACKING +// ============================================================================ +integer pass_count, fail_count; + +task check; + input cond; + input [512*8-1:0] label; + begin + if (cond) begin + $display(" [PASS] %0s", label); + pass_count = pass_count + 1; + end else begin + $display(" [FAIL] %0s", label); + fail_count = fail_count + 1; + end + end +endtask + +// ============================================================================ +// OUTPUT CAPTURE +// ============================================================================ +reg signed [15:0] out_re [0:N-1]; +reg signed [15:0] out_im [0:N-1]; +integer out_idx; +reg got_tlast; +integer tlast_count; + +// ============================================================================ +// HELPER TASKS +// ============================================================================ + +task do_reset; + begin + aresetn = 0; + cfg_tdata = 0; + cfg_tvalid = 0; + din_tdata = 0; + din_tvalid = 0; + din_tlast = 0; + dout_tready = 1; + repeat(5) @(posedge aclk); + aresetn = 1; + repeat(2) @(posedge aclk); + end +endtask + +// Send config (forward FFT: tdata[0]=1) +// Waits for cfg_tready (wrapper in S_IDLE) before sending +task send_config; + input [7:0] cfg; + integer wait_cnt; + begin + // Wait for wrapper to be ready (S_IDLE) + wait_cnt = 0; + while (!cfg_tready && wait_cnt < 5000) begin + @(posedge aclk); + wait_cnt = wait_cnt + 1; + end + cfg_tdata = cfg; + cfg_tvalid = 1; + @(posedge aclk); + cfg_tvalid = 0; + cfg_tdata = 0; + end +endtask + +// Feed N samples: each sample is {im[15:0], re[15:0]} +// in_re_arr and in_im_arr must be pre-loaded +reg signed [15:0] feed_re [0:N-1]; +reg signed [15:0] feed_im [0:N-1]; + +task feed_data; + integer i; + begin + for (i = 0; i < N; i = i + 1) begin + din_tdata = {feed_im[i], feed_re[i]}; + din_tvalid = 1; + din_tlast = (i == N - 1) ? 1 : 0; + @(posedge aclk); + end + din_tvalid = 0; + din_tlast = 0; + din_tdata = 0; + end +endtask + +// Capture N output samples +task capture_output; + integer timeout; + begin + out_idx = 0; + got_tlast = 0; + tlast_count = 0; + timeout = 0; + while (out_idx < N && timeout < 5000) begin + @(posedge aclk); + if (dout_tvalid && dout_tready) begin + out_re[out_idx] = dout_tdata[15:0]; + out_im[out_idx] = dout_tdata[31:16]; + if (dout_tlast) begin + got_tlast = 1; + tlast_count = tlast_count + 1; + end + out_idx = out_idx + 1; + end + timeout = timeout + 1; + end + end +endtask + +// ============================================================================ +// VCD +// ============================================================================ +initial begin + $dumpfile("tb_xfft_32.vcd"); + $dumpvars(0, tb_xfft_32); +end + +// ============================================================================ +// MAIN TEST +// ============================================================================ +integer i; +reg signed [31:0] err; +integer max_err; +integer max_mag_bin; +reg signed [31:0] max_mag, mag; +real angle; + +initial begin + pass_count = 0; + fail_count = 0; + + $display("============================================================"); + $display(" xfft_32 AXI-Stream Wrapper Testbench"); + $display("============================================================"); + + do_reset; + + // ================================================================ + // TEST 1: Impulse Response + // ================================================================ + $display(""); + $display("--- Test 1: Impulse Response ---"); + + for (i = 0; i < N; i = i + 1) begin + feed_re[i] = (i == 0) ? 16'sd1000 : 16'sd0; + feed_im[i] = 16'sd0; + end + + send_config(8'h01); // Forward FFT + feed_data; + capture_output; + + check(out_idx == N, "Received N output samples"); + check(got_tlast == 1, "Got tlast on output"); + + max_err = 0; + for (i = 0; i < N; i = i + 1) begin + err = out_re[i] - 1000; + if (err < 0) err = -err; + if (err > max_err) max_err = err; + err = out_im[i]; + if (err < 0) err = -err; + if (err > max_err) max_err = err; + end + $display(" Impulse max error: %0d", max_err); + check(max_err < 10, "Impulse: all bins ~= 1000"); + + // ================================================================ + // TEST 2: DC Input + // ================================================================ + $display(""); + $display("--- Test 2: DC Input ---"); + + for (i = 0; i < N; i = i + 1) begin + feed_re[i] = 16'sd100; + feed_im[i] = 16'sd0; + end + + send_config(8'h01); + feed_data; + capture_output; + + $display(" DC bin[0] = %0d + j%0d (expect ~3200)", out_re[0], out_im[0]); + check(out_re[0] >= 3100 && out_re[0] <= 3300, "DC: bin 0 ~= 3200 (5% tol)"); + + max_err = 0; + for (i = 1; i < N; i = i + 1) begin + err = out_re[i]; if (err < 0) err = -err; + if (err > max_err) max_err = err; + err = out_im[i]; if (err < 0) err = -err; + if (err > max_err) max_err = err; + end + $display(" DC max non-DC: %0d", max_err); + check(max_err < 25, "DC: non-DC bins ~= 0"); + + // ================================================================ + // TEST 3: Single Tone (bin 4) + // ================================================================ + $display(""); + $display("--- Test 3: Single Tone (bin 4) ---"); + + for (i = 0; i < N; i = i + 1) begin + angle = 6.28318530718 * 4.0 * i / 32.0; + feed_re[i] = $rtoi($cos(angle) * 1000.0); + feed_im[i] = 16'sd0; + end + + send_config(8'h01); + feed_data; + capture_output; + + max_mag = 0; + max_mag_bin = 0; + for (i = 0; i < N; i = i + 1) begin + mag = out_re[i] * out_re[i] + out_im[i] * out_im[i]; + if (mag > max_mag) begin + max_mag = mag; + max_mag_bin = i; + end + end + $display(" Tone peak bin: %0d (expect 4 or 28)", max_mag_bin); + check(max_mag_bin == 4 || max_mag_bin == 28, "Tone: peak at bin 4 or 28"); + + // ================================================================ + // TEST 4: Back-to-back transforms + // ================================================================ + $display(""); + $display("--- Test 4: Back-to-Back Transforms ---"); + + // First: impulse + for (i = 0; i < N; i = i + 1) begin + feed_re[i] = (i == 0) ? 16'sd500 : 16'sd0; + feed_im[i] = 16'sd0; + end + send_config(8'h01); + feed_data; + capture_output; + check(out_idx == N, "Back-to-back 1st: got N outputs"); + + // Second: DC immediately after + for (i = 0; i < N; i = i + 1) begin + feed_re[i] = 16'sd50; + feed_im[i] = 16'sd0; + end + send_config(8'h01); + feed_data; + capture_output; + check(out_idx == N, "Back-to-back 2nd: got N outputs"); + $display(" 2nd transform bin[0] = %0d (expect ~1600)", out_re[0]); + check(out_re[0] >= 1500 && out_re[0] <= 1700, "Back-to-back 2nd: bin 0 ~= 1600"); + + // ================================================================ + // TEST 5: Zero input + // ================================================================ + $display(""); + $display("--- Test 5: Zero Input ---"); + + for (i = 0; i < N; i = i + 1) begin + feed_re[i] = 16'sd0; + feed_im[i] = 16'sd0; + end + send_config(8'h01); + feed_data; + capture_output; + + max_err = 0; + for (i = 0; i < N; i = i + 1) begin + err = out_re[i]; if (err < 0) err = -err; + if (err > max_err) max_err = err; + err = out_im[i]; if (err < 0) err = -err; + if (err > max_err) max_err = err; + end + check(max_err == 0, "Zero input: all outputs = 0"); + + // ================================================================ + // SUMMARY + // ================================================================ + $display(""); + $display("============================================================"); + $display(" RESULTS: %0d/%0d passed", pass_count, pass_count + fail_count); + if (fail_count == 0) + $display(" ALL TESTS PASSED"); + else + $display(" SOME TESTS FAILED"); + $display("============================================================"); + + $finish; +end + +endmodule diff --git a/9_Firmware/9_2_FPGA/xfft_32.v b/9_Firmware/9_2_FPGA/xfft_32.v index 41d9091..cd1932b 100644 --- a/9_Firmware/9_2_FPGA/xfft_32.v +++ b/9_Firmware/9_2_FPGA/xfft_32.v @@ -1,18 +1,15 @@ `timescale 1ns / 1ps // ============================================================================ -// xfft_32.v — Synthesis stub for Xilinx 32-point FFT IP core +// xfft_32.v — 32-point FFT with AXI-Stream interface // ============================================================================ -// This is a PLACEHOLDER module that provides the port interface expected by -// doppler_processor.v. It does NOT perform an actual FFT — it simply passes -// input data through with a one-cycle latency and generates proper AXI-Stream -// handshake signals. -// -// For real hardware, replace this stub with either: -// (a) A Xilinx FFT IP core generated via Vivado IP Catalog, or -// (b) A custom synthesizable radix-2 DIT 32-point FFT in Verilog. +// Wraps the synthesizable fft_engine (radix-2 DIT) with the AXI-Stream port +// interface expected by doppler_processor.v. // // Port interface matches the Xilinx LogiCORE IP Fast Fourier Transform // (AXI-Stream variant) as instantiated in doppler_processor.v. +// +// Data format: {Q[15:0], I[15:0]} packed 32-bit. +// Config tdata[0]: 1 = forward FFT, 0 = inverse FFT. // ============================================================================ module xfft_32 ( @@ -36,36 +33,246 @@ module xfft_32 ( input wire m_axis_data_tready ); -// ---------------------------------------------------------------------------- -// Synthesis stub: pass-through with one-cycle latency -// ---------------------------------------------------------------------------- -// This gives Vivado a real module to synthesize so it can check port -// connectivity, infer timing paths, and estimate utilization. The actual -// FFT computation is deferred to IP integration or a custom RTL FFT. -// ---------------------------------------------------------------------------- +// ============================================================================ +// PARAMETERS +// ============================================================================ +localparam N = 32; +localparam LOG2N = 5; -// Always accept config -assign s_axis_config_tready = 1'b1; +// ============================================================================ +// INTERNAL SIGNALS +// ============================================================================ -// Pipeline registers for data pass-through -reg [31:0] data_reg; -reg valid_reg; -reg last_reg; +// FSM states +localparam [2:0] S_IDLE = 3'd0, + S_CONFIG = 3'd1, // Latch config (fwd/inv) + S_FEED = 3'd2, // Feed input to FFT engine + S_WAIT = 3'd3, // Wait for FFT to complete + S_OUTPUT = 3'd4; // Stream output + +reg [2:0] state; + +// Configuration +reg inverse_reg; + +// Input buffering +reg signed [15:0] in_buf_re [0:N-1]; +reg signed [15:0] in_buf_im [0:N-1]; +reg [5:0] in_count; // 0..31 for loading, extra bit for overflow check + +// Output buffering +reg signed [15:0] out_buf_re [0:N-1]; +reg signed [15:0] out_buf_im [0:N-1]; +reg [5:0] out_count; +reg [5:0] out_total; // counts how many outputs captured from engine + +// FFT engine interface +reg fft_start; +reg fft_inverse; +reg signed [15:0] fft_din_re, fft_din_im; +reg fft_din_valid; +wire signed [15:0] fft_dout_re, fft_dout_im; +wire fft_dout_valid; +wire fft_busy; +wire fft_done; + +// Feed counter for streaming into engine +reg [5:0] feed_count; + +// ============================================================================ +// FFT ENGINE INSTANCE +// ============================================================================ +fft_engine #( + .N(N), + .LOG2N(LOG2N), + .DATA_W(16), + .INTERNAL_W(32), + .TWIDDLE_W(16), + .TWIDDLE_FILE("fft_twiddle_32.mem") +) fft_core ( + .clk(aclk), + .reset_n(aresetn), + .start(fft_start), + .inverse(fft_inverse), + .din_re(fft_din_re), + .din_im(fft_din_im), + .din_valid(fft_din_valid), + .dout_re(fft_dout_re), + .dout_im(fft_dout_im), + .dout_valid(fft_dout_valid), + .busy(fft_busy), + .done(fft_done) +); + +// ============================================================================ +// AXI-STREAM OUTPUTS +// ============================================================================ + +// Config is accepted when idle +assign s_axis_config_tready = (state == S_IDLE); + +// Output data: {Q, I} packed +assign m_axis_data_tdata = {out_buf_im[out_count[4:0]], out_buf_re[out_count[4:0]]}; +assign m_axis_data_tvalid = (state == S_OUTPUT) && (out_count < N); +assign m_axis_data_tlast = (state == S_OUTPUT) && (out_count == N - 1); + +// ============================================================================ +// BUFFER WRITE LOGIC — separate always block, NO async reset +// Allows Vivado to infer distributed RAM instead of dissolving into registers. +// ============================================================================ +// Input buffer write enable +reg in_buf_we; +reg [4:0] in_buf_waddr; +reg signed [15:0] in_buf_wdata_re, in_buf_wdata_im; + +// Output buffer write enable +reg out_buf_we; +reg [4:0] out_buf_waddr; +reg signed [15:0] out_buf_wdata_re, out_buf_wdata_im; always @(posedge aclk) begin - if (!aresetn) begin - data_reg <= 32'd0; - valid_reg <= 1'b0; - last_reg <= 1'b0; - end else begin - data_reg <= s_axis_data_tdata; - valid_reg <= s_axis_data_tvalid; - last_reg <= s_axis_data_tlast; + if (in_buf_we) begin + in_buf_re[in_buf_waddr] <= in_buf_wdata_re; + in_buf_im[in_buf_waddr] <= in_buf_wdata_im; + end + if (out_buf_we) begin + out_buf_re[out_buf_waddr] <= out_buf_wdata_re; + out_buf_im[out_buf_waddr] <= out_buf_wdata_im; end end -assign m_axis_data_tdata = data_reg; -assign m_axis_data_tvalid = valid_reg; -assign m_axis_data_tlast = last_reg; +// ============================================================================ +// MAIN FSM +// ============================================================================ +always @(posedge aclk or negedge aresetn) begin + if (!aresetn) begin + state <= S_IDLE; + inverse_reg <= 1'b0; + in_count <= 0; + out_count <= 0; + out_total <= 0; + feed_count <= 0; + fft_start <= 1'b0; + fft_inverse <= 1'b0; + fft_din_re <= 0; + fft_din_im <= 0; + fft_din_valid <= 1'b0; + in_buf_we <= 1'b0; + in_buf_waddr <= 0; + in_buf_wdata_re <= 0; + in_buf_wdata_im <= 0; + out_buf_we <= 1'b0; + out_buf_waddr <= 0; + out_buf_wdata_re <= 0; + out_buf_wdata_im <= 0; + end else begin + // Defaults + fft_start <= 1'b0; + fft_din_valid <= 1'b0; + in_buf_we <= 1'b0; + out_buf_we <= 1'b0; + + case (state) + + // ================================================================ + S_IDLE: begin + in_count <= 0; + if (s_axis_config_tvalid) begin + // Config tdata[0]: 1=forward, 0=inverse + // fft_engine: inverse=0 means forward, inverse=1 means inverse + inverse_reg <= ~s_axis_config_tdata[0]; + state <= S_FEED; + in_count <= 0; + feed_count <= 0; + end + end + + // ================================================================ + // S_FEED: Buffer all N inputs first, then start engine. + // ================================================================ + S_FEED: begin + if (in_count < N) begin + // Still accepting input data + if (s_axis_data_tvalid) begin + in_buf_we <= 1'b1; + in_buf_waddr <= in_count[4:0]; + in_buf_wdata_re <= s_axis_data_tdata[15:0]; + in_buf_wdata_im <= s_axis_data_tdata[31:16]; + in_count <= in_count + 1; + end + end else if (feed_count == 0) begin + // All N inputs buffered, start the FFT engine + fft_start <= 1'b1; + fft_inverse <= inverse_reg; + feed_count <= 0; + state <= S_WAIT; + out_total <= 0; + end + end + + // ================================================================ + // S_WAIT: Feed buffered data to engine, then wait for output + // ================================================================ + S_WAIT: begin + if (feed_count < N) begin + fft_din_re <= in_buf_re[feed_count[4:0]]; + fft_din_im <= in_buf_im[feed_count[4:0]]; + fft_din_valid <= 1'b1; + feed_count <= feed_count + 1; + end + + // Capture engine outputs + if (fft_dout_valid && out_total < N) begin + out_buf_we <= 1'b1; + out_buf_waddr <= out_total[4:0]; + out_buf_wdata_re <= fft_dout_re; + out_buf_wdata_im <= fft_dout_im; + out_total <= out_total + 1; + end + + // Engine done + if (fft_done) begin + state <= S_OUTPUT; + out_count <= 0; + end + end + + // ================================================================ + // S_OUTPUT: Stream buffered results via AXI-Stream master + // ================================================================ + S_OUTPUT: begin + if (m_axis_data_tready || !m_axis_data_tvalid) begin + if (out_count < N) begin + // m_axis_data_tdata driven combinationally from out_buf + if (m_axis_data_tready) begin + out_count <= out_count + 1; + end + end + if (out_count >= N - 1 && m_axis_data_tready) begin + state <= S_IDLE; + end + end + end + + default: state <= S_IDLE; + + endcase + end +end + +// ============================================================================ +// MEMORY INIT (simulation only) +// ============================================================================ +`ifdef SIMULATION +integer init_k; +initial begin + for (init_k = 0; init_k < N; init_k = init_k + 1) begin + in_buf_re[init_k] = 0; + in_buf_im[init_k] = 0; + out_buf_re[init_k] = 0; + out_buf_im[init_k] = 0; + end +end +`endif endmodule