Merge pull request #33 from JJassonn69/fix/staggered-prf-dual16-doppler

Fix staggered-PRF Doppler path using dual 16-point FFT sub-frames
This commit is contained in:
NawfalMotii79
2026-03-27 22:09:08 +01:00
committed by GitHub
18 changed files with 12801 additions and 12657 deletions
+447 -432
View File
@@ -1,11 +1,44 @@
`timescale 1ns / 1ps `timescale 1ns / 1ps
// ============================================================================
// doppler_processor.v Staggered-PRF Doppler Processor (CORRECTED)
// ============================================================================
//
// ARCHITECTURE:
// This module implements dual 16-point FFTs for the AERIS-10 staggered-PRF
// waveform. The radar transmits 16 long-PRI chirps followed by 16 short-PRI
// chirps per frame (32 total). Rather than a single 32-point FFT over the
// non-uniformly sampled frame (which is signal-processing invalid), this
// module processes each sub-frame independently:
//
// Sub-frame 0 (long PRI): chirps 0..15 16-pt windowed FFT
// Sub-frame 1 (short PRI): chirps 16..31 16-pt windowed FFT
//
// Each sub-frame produces 16 Doppler bins per range bin. The outputs are
// tagged with a sub_frame bit and the 4-bit bin index is packed into the
// existing 5-bit doppler_bin port as {sub_frame, bin[3:0]}.
//
// This architecture enables downstream staggered-PRF ambiguity resolution:
// the same target velocity maps to DIFFERENT Doppler bins at different PRIs,
// and comparing the two sub-frame results resolves velocity ambiguity.
//
// INTERFACE COMPATIBILITY:
// The port list is a superset of the original module. Existing instantiations
// that don't connect `sub_frame` will still work. The FORMAL ports are
// retained. CHIRPS_PER_FRAME must be 32 (16 per sub-frame).
//
// WINDOW:
// 16-point Hamming window (Q15), symmetric. Computed as:
// w[n] = 0.54 - 0.46 * cos(2*pi*n/15), n=0..15
// ============================================================================
module doppler_processor_optimized #( module doppler_processor_optimized #(
parameter DOPPLER_FFT_SIZE = 32, parameter DOPPLER_FFT_SIZE = 16, // FFT size per sub-frame (was 32)
parameter RANGE_BINS = 64, parameter RANGE_BINS = 64,
parameter CHIRPS_PER_FRAME = 32, parameter CHIRPS_PER_FRAME = 32, // Total chirps in frame (16+16)
parameter WINDOW_TYPE = 0, // 0=Hamming, 1=Rectangular parameter CHIRPS_PER_SUBFRAME = 16, // Chirps per sub-frame
parameter DATA_WIDTH = 16 parameter WINDOW_TYPE = 0, // 0=Hamming, 1=Rectangular
parameter DATA_WIDTH = 16
)( )(
input wire clk, input wire clk,
input wire reset_n, input wire reset_n,
@@ -14,62 +47,63 @@ module doppler_processor_optimized #(
input wire new_chirp_frame, input wire new_chirp_frame,
output reg [31:0] doppler_output, output reg [31:0] doppler_output,
output reg doppler_valid, output reg doppler_valid,
output reg [4:0] doppler_bin, output reg [4:0] doppler_bin, // {sub_frame, bin[3:0]}
output reg [5:0] range_bin, output reg [5:0] range_bin,
output wire processing_active, output reg sub_frame, // 0=long PRI, 1=short PRI
output wire frame_complete, output wire processing_active,
output reg [3:0] status output wire frame_complete,
output reg [3:0] status
`ifdef FORMAL
, `ifdef FORMAL
output wire [2:0] fv_state, ,
output wire [10:0] fv_mem_write_addr, output wire [2:0] fv_state,
output wire [10:0] fv_mem_read_addr, output wire [10:0] fv_mem_write_addr,
output wire [5:0] fv_write_range_bin, output wire [10:0] fv_mem_read_addr,
output wire [4:0] fv_write_chirp_index, output wire [5:0] fv_write_range_bin,
output wire [5:0] fv_read_range_bin, output wire [4:0] fv_write_chirp_index,
output wire [4:0] fv_read_doppler_index, output wire [5:0] fv_read_range_bin,
output wire [9:0] fv_processing_timeout, output wire [4:0] fv_read_doppler_index,
output wire fv_frame_buffer_full, output wire [9:0] fv_processing_timeout,
output wire fv_mem_we, output wire fv_frame_buffer_full,
output wire [10:0] fv_mem_waddr_r output wire fv_mem_we,
`endif output wire [10:0] fv_mem_waddr_r
); `endif
);
// ==============================================
// Window Coefficients (Simple Implementation) // ==============================================
// ============================================== // Window Coefficients 16-point Hamming (Q15)
reg [DATA_WIDTH-1:0] window_coeff [0:31]; // ==============================================
// w[n] = 0.54 - 0.46 * cos(2*pi*n/15), n=0..15
// Symmetric: w[n] = w[15-n]
reg [DATA_WIDTH-1:0] window_coeff [0:15];
// Generate window coefficients
integer w; integer w;
initial begin initial begin
if (WINDOW_TYPE == 0) begin if (WINDOW_TYPE == 0) begin
// Pre-calculated Hamming window (Q15 format) // 16-point Hamming window, Q15 format
window_coeff[0] = 16'h0800; window_coeff[1] = 16'h0862; // Computed: round(32767 * (0.54 - 0.46*cos(2*pi*n/15)))
window_coeff[2] = 16'h09CB; window_coeff[3] = 16'h0C3B; window_coeff[0] = 16'h0A3D; // 0.0800 * 32767 = 2621
window_coeff[4] = 16'h0FB2; window_coeff[5] = 16'h142F; window_coeff[1] = 16'h0E5C; // 0.1116 * 32767 = 3676
window_coeff[6] = 16'h19B2; window_coeff[7] = 16'h2039; window_coeff[2] = 16'h1B6D; // 0.2138 * 32767 = 7021
window_coeff[8] = 16'h27C4; window_coeff[9] = 16'h3050; window_coeff[3] = 16'h3088; // 0.3790 * 32767 = 12424
window_coeff[10] = 16'h39DB; window_coeff[11] = 16'h4462; window_coeff[4] = 16'h4B33; // 0.5868 * 32767 = 19251
window_coeff[12] = 16'h4FE3; window_coeff[13] = 16'h5C5A; window_coeff[5] = 16'h6573; // 0.7930 * 32767 = 25971
window_coeff[14] = 16'h69C4; window_coeff[15] = 16'h781D; window_coeff[6] = 16'h7642; // 0.9245 * 32767 = 30274
window_coeff[16] = 16'h7FFF; // Peak window_coeff[7] = 16'h7F62; // 0.9932 * 32767 = 32610
window_coeff[17] = 16'h781D; window_coeff[18] = 16'h69C4; window_coeff[8] = 16'h7F62; // symmetric
window_coeff[19] = 16'h5C5A; window_coeff[20] = 16'h4FE3; window_coeff[9] = 16'h7642;
window_coeff[21] = 16'h4462; window_coeff[22] = 16'h39DB; window_coeff[10] = 16'h6573;
window_coeff[23] = 16'h3050; window_coeff[24] = 16'h27C4; window_coeff[11] = 16'h4B33;
window_coeff[25] = 16'h2039; window_coeff[26] = 16'h19B2; window_coeff[12] = 16'h3088;
window_coeff[27] = 16'h142F; window_coeff[28] = 16'h0FB2; window_coeff[13] = 16'h1B6D;
window_coeff[29] = 16'h0C3B; window_coeff[30] = 16'h09CB; window_coeff[14] = 16'h0E5C;
window_coeff[31] = 16'h0862; window_coeff[15] = 16'h0A3D;
end else begin end else begin
// Rectangular window (all ones) for (w = 0; w < 16; w = w + 1) begin
for (w = 0; w < 32; w = w + 1) begin
window_coeff[w] = 16'h7FFF; window_coeff[w] = 16'h7FFF;
end end
end end
end end
// ============================================== // ==============================================
// Memory Declaration - FIXED SIZE // Memory Declaration - FIXED SIZE
@@ -81,57 +115,53 @@ localparam MEM_DEPTH = RANGE_BINS * CHIRPS_PER_FRAME;
// ============================================== // ==============================================
// Control Registers // Control Registers
// ============================================== // ==============================================
reg [5:0] write_range_bin; // Changed to match RANGE_BINS width reg [5:0] write_range_bin;
reg [4:0] write_chirp_index; // Changed to match CHIRPS_PER_FRAME width reg [4:0] write_chirp_index;
reg [5:0] read_range_bin; reg [5:0] read_range_bin;
reg [4:0] read_doppler_index; // Changed name for clarity reg [4:0] read_doppler_index;
reg frame_buffer_full; reg frame_buffer_full;
reg [9:0] chirps_received; // Enough for up to 1024 chirps reg [9:0] chirps_received;
reg [1:0] chirp_state; // Track chirp accumulation state reg [1:0] chirp_state;
// Sub-frame tracking
reg current_sub_frame; // 0=processing long, 1=processing short
// ============================================== // ==============================================
// FFT Interface // FFT Interface
// ============================================== // ==============================================
reg fft_start; reg fft_start;
wire fft_ready; wire fft_ready;
reg [DATA_WIDTH-1:0] fft_input_i; reg [DATA_WIDTH-1:0] fft_input_i;
reg [DATA_WIDTH-1:0] fft_input_q; reg [DATA_WIDTH-1:0] fft_input_q;
reg signed [31:0] mult_i, mult_q; // 32-bit to avoid overflow reg signed [31:0] mult_i, mult_q;
reg signed [DATA_WIDTH-1:0] window_val_reg; // BREG pipeline stage reg signed [DATA_WIDTH-1:0] window_val_reg;
reg signed [31:0] mult_i_raw, mult_q_raw; // MREG pipeline stage reg signed [31:0] mult_i_raw, mult_q_raw;
reg fft_input_valid; reg fft_input_valid;
reg fft_input_last; reg fft_input_last;
wire [DATA_WIDTH-1:0] fft_output_i; wire [DATA_WIDTH-1:0] fft_output_i;
wire [DATA_WIDTH-1:0] fft_output_q; wire [DATA_WIDTH-1:0] fft_output_q;
wire fft_output_valid; wire fft_output_valid;
wire fft_output_last; wire fft_output_last;
// ============================================== // ==============================================
// Addressing // Addressing
// ============================================== // ==============================================
wire [10:0] mem_write_addr; wire [10:0] mem_write_addr;
wire [10:0] mem_read_addr; wire [10:0] mem_read_addr;
// Proper address calculation using parameters
assign mem_write_addr = (write_chirp_index * RANGE_BINS) + write_range_bin; assign mem_write_addr = (write_chirp_index * RANGE_BINS) + write_range_bin;
assign mem_read_addr = (read_doppler_index * RANGE_BINS) + read_range_bin; assign mem_read_addr = (read_doppler_index * RANGE_BINS) + read_range_bin;
// Alternative organization (choose one): // ==============================================
// If you want range-major organization (all chirps for one range bin together): // State Machine
// assign mem_write_addr = (write_range_bin * CHIRPS_PER_FRAME) + write_chirp_index; // ==============================================
// assign mem_read_addr = (read_range_bin * CHIRPS_PER_FRAME) + read_doppler_index; reg [2:0] state;
localparam S_IDLE = 3'b000;
// ============================================== localparam S_ACCUMULATE = 3'b001;
// State Machine localparam S_PRE_READ = 3'b101;
// ============================================== localparam S_LOAD_FFT = 3'b010;
reg [2:0] state; localparam S_FFT_WAIT = 3'b011;
localparam S_IDLE = 3'b000;
localparam S_ACCUMULATE = 3'b001;
localparam S_PRE_READ = 3'b101; // Prime BRAM pipeline before FFT load
localparam S_LOAD_FFT = 3'b010;
localparam S_FFT_WAIT = 3'b011;
localparam S_OUTPUT = 3'b100; localparam S_OUTPUT = 3'b100;
// Frame sync detection // Frame sync detection
@@ -142,361 +172,347 @@ always @(posedge clk or negedge reset_n) begin
end end
wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1; wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1;
// ============================================== // ==============================================
// Main State Machine - FIXED // Main State Machine
// ============================================== // ==============================================
reg [5:0] fft_sample_counter; reg [4:0] fft_sample_counter; // Reduced: only need 0..17 for 16-pt FFT
reg [9:0] processing_timeout; reg [9:0] processing_timeout;
// Memory write enable and data signals (extracted for BRAM inference) // Memory write enable and data signals
reg mem_we; reg mem_we;
reg [10:0] mem_waddr_r; reg [10:0] mem_waddr_r;
reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q; reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q;
// Memory read data (registered for BRAM read latency) // Memory read data
reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q; reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q;
`ifdef FORMAL `ifdef FORMAL
assign fv_state = state; assign fv_state = state;
assign fv_mem_write_addr = mem_write_addr; assign fv_mem_write_addr = mem_write_addr;
assign fv_mem_read_addr = mem_read_addr; assign fv_mem_read_addr = mem_read_addr;
assign fv_write_range_bin = write_range_bin; assign fv_write_range_bin = write_range_bin;
assign fv_write_chirp_index = write_chirp_index; assign fv_write_chirp_index = write_chirp_index;
assign fv_read_range_bin = read_range_bin; assign fv_read_range_bin = read_range_bin;
assign fv_read_doppler_index = read_doppler_index; assign fv_read_doppler_index = read_doppler_index;
assign fv_processing_timeout = processing_timeout; assign fv_processing_timeout = processing_timeout;
assign fv_frame_buffer_full = frame_buffer_full; assign fv_frame_buffer_full = frame_buffer_full;
assign fv_mem_we = mem_we; assign fv_mem_we = mem_we;
assign fv_mem_waddr_r = mem_waddr_r; assign fv_mem_waddr_r = mem_waddr_r;
`endif `endif
// ---------------------------------------------------------- // ----------------------------------------------------------
// Separate always block for memory writes NO async reset // Separate always block for memory writes NO async reset
// in sensitivity list, so Vivado can infer Block RAM. // ----------------------------------------------------------
// ---------------------------------------------------------- always @(posedge clk) begin
always @(posedge clk) begin if (mem_we) begin
if (mem_we) begin doppler_i_mem[mem_waddr_r] <= mem_wdata_i;
doppler_i_mem[mem_waddr_r] <= mem_wdata_i; doppler_q_mem[mem_waddr_r] <= mem_wdata_q;
doppler_q_mem[mem_waddr_r] <= mem_wdata_q; end
end mem_rdata_i <= doppler_i_mem[mem_read_addr];
// Registered read address driven by mem_read_addr from FSM mem_rdata_q <= doppler_q_mem[mem_read_addr];
mem_rdata_i <= doppler_i_mem[mem_read_addr]; end
mem_rdata_q <= doppler_q_mem[mem_read_addr];
end // ----------------------------------------------------------
// Block 1: FSM / Control async reset
// ---------------------------------------------------------- // ----------------------------------------------------------
// Block 1: FSM / Control async reset (posedge clk or negedge reset_n). always @(posedge clk or negedge reset_n) begin
// Only state-machine and control registers live here. if (!reset_n) begin
// BRAM-driving and DSP datapath registers are intentionally state <= S_IDLE;
// excluded to avoid Vivado REQP-1839 (async-reset on BRAM write_range_bin <= 0;
// address) and DPOR-1/DPIP-1 (async-reset blocking DSP48 write_chirp_index <= 0;
// absorption) DRC warnings. frame_buffer_full <= 0;
// ---------------------------------------------------------- doppler_valid <= 0;
always @(posedge clk or negedge reset_n) begin fft_start <= 0;
if (!reset_n) begin fft_input_valid <= 0;
state <= S_IDLE; fft_input_last <= 0;
write_range_bin <= 0; fft_sample_counter <= 0;
write_chirp_index <= 0; processing_timeout <= 0;
// read_range_bin, read_doppler_index moved to Block 2 (sync reset) status <= 0;
// to enable BRAM address register absorption (REQP-1839 fix) chirps_received <= 0;
frame_buffer_full <= 0; chirp_state <= 0;
doppler_valid <= 0; doppler_output <= 0;
fft_start <= 0; doppler_bin <= 0;
fft_input_valid <= 0; range_bin <= 0;
fft_input_last <= 0; sub_frame <= 0;
fft_sample_counter <= 0; current_sub_frame <= 0;
processing_timeout <= 0; end else begin
status <= 0; doppler_valid <= 0;
chirps_received <= 0; fft_input_valid <= 0;
chirp_state <= 0; fft_input_last <= 0;
doppler_output <= 0;
doppler_bin <= 0; if (processing_timeout > 0) begin
range_bin <= 0; processing_timeout <= processing_timeout - 1;
end else begin end
doppler_valid <= 0;
fft_input_valid <= 0; case (state)
fft_input_last <= 0; S_IDLE: begin
if (frame_start_pulse) begin
if (processing_timeout > 0) begin write_chirp_index <= 0;
processing_timeout <= processing_timeout - 1; write_range_bin <= 0;
end frame_buffer_full <= 0;
chirps_received <= 0;
case (state) end
S_IDLE: begin
if (frame_start_pulse) begin if (data_valid && !frame_buffer_full) begin
// Start new frame state <= S_ACCUMULATE;
write_chirp_index <= 0; write_range_bin <= 1;
write_range_bin <= 0; end
frame_buffer_full <= 0; end
chirps_received <= 0;
end S_ACCUMULATE: begin
if (data_valid) begin
if (data_valid && !frame_buffer_full) begin if (write_range_bin < RANGE_BINS - 1) begin
state <= S_ACCUMULATE; write_range_bin <= write_range_bin + 1;
write_range_bin <= 1; end else begin
end write_range_bin <= 0;
end write_chirp_index <= write_chirp_index + 1;
chirps_received <= chirps_received + 1;
S_ACCUMULATE: begin
if (data_valid) begin if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
// Increment range bin frame_buffer_full <= 1;
if (write_range_bin < RANGE_BINS - 1) begin chirp_state <= 0;
write_range_bin <= write_range_bin + 1; state <= S_PRE_READ;
end else begin fft_sample_counter <= 0;
// Completed one chirp write_chirp_index <= 0;
write_range_bin <= 0; write_range_bin <= 0;
write_chirp_index <= write_chirp_index + 1; // Start with sub-frame 0 (long PRI chirps 0..15)
chirps_received <= chirps_received + 1; current_sub_frame <= 0;
end
// Check if frame is complete end
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin end
frame_buffer_full <= 1; end
chirp_state <= 0;
state <= S_PRE_READ; S_PRE_READ: begin
// read_range_bin/read_doppler_index zeroed in Block 2 // Prime BRAM pipeline for current sub-frame
fft_sample_counter <= 0; // read_doppler_index already set in Block 2 to sub-frame base
// Reset write pointers no longer needed for fft_start <= 1;
// this frame, and prevents stale overflow of state <= S_LOAD_FFT;
// write_chirp_index (which was just incremented end
// past CHIRPS_PER_FRAME-1 above).
write_chirp_index <= 0; S_LOAD_FFT: begin
write_range_bin <= 0; fft_start <= 0;
end
end // Pipeline: 2 priming cycles + CHIRPS_PER_SUBFRAME data cycles
end if (fft_sample_counter <= 1) begin
end fft_sample_counter <= fft_sample_counter + 1;
end else if (fft_sample_counter <= CHIRPS_PER_SUBFRAME + 1) begin
S_PRE_READ: begin fft_input_valid <= 1;
// Prime the BRAM pipeline: present addr for chirp 0 of
// current read_range_bin. read_doppler_index is already 0. if (fft_sample_counter == CHIRPS_PER_SUBFRAME + 1) begin
// mem_read_addr = 0 * RANGE_BINS + read_range_bin. fft_input_last <= 1;
// After this cycle, mem_rdata_i will hold data[chirp=0][rbin]. state <= S_FFT_WAIT;
// Advance read_doppler_index to 1 so the NEXT BRAM read fft_sample_counter <= 0;
// (which happens every cycle in the memory block) will processing_timeout <= 1000;
// fetch chirp 1. end else begin
// read_doppler_index <= 1 moved to Block 2 fft_sample_counter <= fft_sample_counter + 1;
fft_start <= 1; end
state <= S_LOAD_FFT; end
end end
S_LOAD_FFT: begin S_FFT_WAIT: begin
fft_start <= 0; if (fft_output_valid) begin
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]};
// Pipeline alignment (after S_PRE_READ primed the BRAM // Pack: {sub_frame, bin[3:0]}
// and pre-registered window_val_reg = window_coeff[0]): doppler_bin <= {current_sub_frame, fft_sample_counter[3:0]};
// range_bin <= read_range_bin;
// With DSP48 BREG+MREG pipelining, data flows through: sub_frame <= current_sub_frame;
// sub=0: multiply mem_rdata * window_val_reg -> mult_i_raw doppler_valid <= 1;
// pre-register window_coeff[1] into window_val_reg
// sub=1: MREG capture mult_i_raw -> mult_i (sample 0) fft_sample_counter <= fft_sample_counter + 1;
// new multiply for sample 1
// sub=2..DOPPLER_FFT_SIZE+1: steady state if (fft_output_last) begin
// fft_input = rounding(mult_i), mult_i = mult_i_raw, state <= S_OUTPUT;
// mult_i_raw = new multiply, window_val_reg = next coeff fft_sample_counter <= 0;
// end
// fft_input_valid asserted at sub=2..DOPPLER_FFT_SIZE+1 end
// fft_input_last asserted at sub=DOPPLER_FFT_SIZE+1
if (processing_timeout == 0) begin
// read_doppler_index updates moved to Block 2 (sync reset) state <= S_OUTPUT;
if (fft_sample_counter <= 1) begin end
// Sub 0..1: pipeline priming no valid FFT data yet end
fft_sample_counter <= fft_sample_counter + 1;
end else if (fft_sample_counter <= DOPPLER_FFT_SIZE + 1) begin S_OUTPUT: begin
// Sub 2..DOPPLER_FFT_SIZE+1: steady state if (current_sub_frame == 0) begin
// (fft_input_i/fft_input_q captured in Block 2) // Just finished long PRI sub-frame now do short PRI
fft_input_valid <= 1; current_sub_frame <= 1;
fft_sample_counter <= 0;
if (fft_sample_counter == DOPPLER_FFT_SIZE + 1) begin state <= S_PRE_READ;
// Last sample: flush // read_range_bin stays the same, read_doppler_index
fft_input_last <= 1; // will be set to CHIRPS_PER_SUBFRAME in Block 2
state <= S_FFT_WAIT; end else begin
fft_sample_counter <= 0; // Finished both sub-frames for this range bin
processing_timeout <= 1000; current_sub_frame <= 0;
end else begin if (read_range_bin < RANGE_BINS - 1) begin
fft_sample_counter <= fft_sample_counter + 1; fft_sample_counter <= 0;
end state <= S_PRE_READ;
end // read_range_bin incremented in Block 2
end end else begin
state <= S_IDLE;
S_FFT_WAIT: begin frame_buffer_full <= 0;
if (fft_output_valid) begin end
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]}; end
doppler_bin <= fft_sample_counter; end
range_bin <= read_range_bin;
doppler_valid <= 1; endcase
fft_sample_counter <= fft_sample_counter + 1; status <= {state, frame_buffer_full};
end
if (fft_output_last) begin end
state <= S_OUTPUT;
fft_sample_counter <= 0; // ----------------------------------------------------------
end // Block 2: BRAM address/data & DSP datapath synchronous reset
end // ----------------------------------------------------------
always @(posedge clk) begin
if (processing_timeout == 0) begin if (!reset_n) begin
state <= S_OUTPUT; mem_we <= 0;
end mem_waddr_r <= 0;
end mem_wdata_i <= 0;
mem_wdata_q <= 0;
S_OUTPUT: begin mult_i <= 0;
if (read_range_bin < RANGE_BINS - 1) begin mult_q <= 0;
// read_range_bin/read_doppler_index updated in Block 2 mult_i_raw <= 0;
fft_sample_counter <= 0; mult_q_raw <= 0;
state <= S_PRE_READ; window_val_reg <= 0;
end else begin fft_input_i <= 0;
state <= S_IDLE; fft_input_q <= 0;
frame_buffer_full <= 0; read_range_bin <= 0;
end read_doppler_index <= 0;
end end else begin
mem_we <= 0;
endcase
case (state)
status <= {state, frame_buffer_full}; S_IDLE: begin
end if (data_valid && !frame_buffer_full) begin
end mem_we <= 1;
mem_waddr_r <= mem_write_addr;
// ---------------------------------------------------------- mem_wdata_i <= range_data[15:0];
// Block 2: BRAM address/data & DSP datapath synchronous reset only. mem_wdata_q <= range_data[31:16];
// Uses always @(posedge clk) so Vivado can absorb multipliers end
// into DSP48 primitives and does not flag REQP-1839/1840 on end
// BRAM address registers. Replicates the same state/condition
// structure as Block 1 for the registers: S_ACCUMULATE: begin
// mem_we, mem_waddr_r, mem_wdata_i, mem_wdata_q, if (data_valid) begin
// mult_i, mult_q, fft_input_i, fft_input_q, mem_we <= 1;
// read_range_bin, read_doppler_index mem_waddr_r <= mem_write_addr;
// ---------------------------------------------------------- mem_wdata_i <= range_data[15:0];
always @(posedge clk) begin mem_wdata_q <= range_data[31:16];
if (!reset_n) begin
mem_we <= 0; if (write_range_bin >= RANGE_BINS - 1 &&
mem_waddr_r <= 0; write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
mem_wdata_i <= 0; read_range_bin <= 0;
mem_wdata_q <= 0; // Start reading from chirp 0 (long PRI sub-frame)
mult_i <= 0; read_doppler_index <= 0;
mult_q <= 0; end
mult_i_raw <= 0; end
mult_q_raw <= 0; end
window_val_reg <= 0;
fft_input_i <= 0; S_PRE_READ: begin
fft_input_q <= 0; // Set read_doppler_index to first chirp of current sub-frame + 1
read_range_bin <= 0; // (because address is presented this cycle, data arrives next)
read_doppler_index <= 0; if (current_sub_frame == 0)
end else begin read_doppler_index <= 1; // Long PRI: chirps 0..15
mem_we <= 0; else
read_doppler_index <= CHIRPS_PER_SUBFRAME + 1; // Short PRI: chirps 16..31
case (state)
S_IDLE: begin // BREG priming: window coeff for sample 0
if (data_valid && !frame_buffer_full) begin window_val_reg <= $signed(window_coeff[0]);
// Write the first sample immediately (Bug #3 fix: end
// previously this transition consumed data_valid
// without writing to BRAM) S_LOAD_FFT: begin
mem_we <= 1; if (fft_sample_counter == 0) begin
mem_waddr_r <= mem_write_addr; // Pipe stage 1: multiply using pre-registered BREG value
mem_wdata_i <= range_data[15:0]; mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mem_wdata_q <= range_data[31:16]; mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
end window_val_reg <= $signed(window_coeff[1]);
end // Advance to chirp base+2
if (current_sub_frame == 0)
S_ACCUMULATE: begin read_doppler_index <= (2 < CHIRPS_PER_SUBFRAME) ? 2
if (data_valid) begin : CHIRPS_PER_SUBFRAME - 1;
// Drive memory write signals (actual write in separate block) else
mem_we <= 1; read_doppler_index <= (CHIRPS_PER_SUBFRAME + 2 < CHIRPS_PER_FRAME)
mem_waddr_r <= mem_write_addr; ? CHIRPS_PER_SUBFRAME + 2
mem_wdata_i <= range_data[15:0]; : CHIRPS_PER_FRAME - 1;
mem_wdata_q <= range_data[31:16]; end else if (fft_sample_counter == 1) begin
mult_i <= mult_i_raw;
// Transition to S_PRE_READ when frame complete mult_q <= mult_q_raw;
if (write_range_bin >= RANGE_BINS - 1 && mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
write_chirp_index >= CHIRPS_PER_FRAME - 1) begin mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
read_range_bin <= 0; if (2 < CHIRPS_PER_SUBFRAME)
read_doppler_index <= 0; window_val_reg <= $signed(window_coeff[2]);
end // Advance to chirp base+3
end begin : advance_chirp3
end reg [4:0] next_chirp;
next_chirp = (current_sub_frame == 0) ? 3 : CHIRPS_PER_SUBFRAME + 3;
S_PRE_READ: begin if (next_chirp < CHIRPS_PER_FRAME)
// Advance read_doppler_index to 1 so next BRAM read read_doppler_index <= next_chirp;
// fetches chirp 1 else
read_doppler_index <= 1; read_doppler_index <= CHIRPS_PER_FRAME - 1;
// BREG priming: pre-register window coeff for sample 0 end
// so it is ready when S_LOAD_FFT sub=0 performs the multiply end else if (fft_sample_counter <= CHIRPS_PER_SUBFRAME + 1) begin
window_val_reg <= $signed(window_coeff[0]); // Steady state
end fft_input_i <= (mult_i + (1 << 14)) >>> 15;
fft_input_q <= (mult_q + (1 << 14)) >>> 15;
S_LOAD_FFT: begin mult_i <= mult_i_raw;
if (fft_sample_counter == 0) begin mult_q <= mult_q_raw;
// Pipe stage 1: multiply using pre-registered BREG value
// mem_rdata_i = data[chirp=0][rbin] (primed by S_PRE_READ) if (fft_sample_counter <= CHIRPS_PER_SUBFRAME - 1) begin
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg; mult_i_raw <= $signed(mem_rdata_i) * window_val_reg;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg; mult_q_raw <= $signed(mem_rdata_q) * window_val_reg;
// Pre-register next window coeff (sample 1) // Window coeff index within sub-frame
window_val_reg <= $signed(window_coeff[1]); begin : advance_window
// Present BRAM addr for chirp 2 reg [4:0] win_idx;
read_doppler_index <= (2 < DOPPLER_FFT_SIZE) ? 2 win_idx = fft_sample_counter[3:0] + 1;
: DOPPLER_FFT_SIZE - 1; if (win_idx < CHIRPS_PER_SUBFRAME)
end else if (fft_sample_counter == 1) begin window_val_reg <= $signed(window_coeff[win_idx]);
// Pipe stage 2 (MREG): capture sample 0 multiply result end
mult_i <= mult_i_raw; // Advance BRAM read
mult_q <= mult_q_raw; begin : advance_bram
// Multiply sample 1 using registered window value reg [4:0] chirp_offset;
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg; reg [4:0] chirp_base;
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg; chirp_offset = fft_sample_counter[3:0] + 2;
// Pre-register next window coeff (sample 2) chirp_base = (current_sub_frame == 0) ? 0 : CHIRPS_PER_SUBFRAME;
if (2 < DOPPLER_FFT_SIZE) if (chirp_base + chirp_offset < CHIRPS_PER_FRAME)
window_val_reg <= $signed(window_coeff[2]); read_doppler_index <= chirp_base + chirp_offset;
// Advance BRAM read to chirp 3 else
if (3 < DOPPLER_FFT_SIZE) read_doppler_index <= CHIRPS_PER_FRAME - 1;
read_doppler_index <= 3; end
else end
read_doppler_index <= DOPPLER_FFT_SIZE - 1;
end else if (fft_sample_counter <= DOPPLER_FFT_SIZE + 1) begin if (fft_sample_counter == CHIRPS_PER_SUBFRAME + 1) begin
// Sub 2..DOPPLER_FFT_SIZE+1: steady state // Reset read index for potential next operation
// Capture rounding into fft_input from MREG output if (current_sub_frame == 0)
fft_input_i <= (mult_i + (1 << 14)) >>> 15; read_doppler_index <= CHIRPS_PER_SUBFRAME; // Ready for short sub-frame
fft_input_q <= (mult_q + (1 << 14)) >>> 15; else
// MREG: capture multiply result read_doppler_index <= 0;
mult_i <= mult_i_raw; end
mult_q <= mult_q_raw; end
end
if (fft_sample_counter <= DOPPLER_FFT_SIZE - 1) begin
// New multiply from current BRAM data S_OUTPUT: begin
mult_i_raw <= $signed(mem_rdata_i) * window_val_reg; if (current_sub_frame == 0) begin
mult_q_raw <= $signed(mem_rdata_q) * window_val_reg; // Transitioning to short PRI sub-frame
// Pre-register next window coeff (clamped) // Set read_doppler_index to start of short sub-frame
if (fft_sample_counter + 1 < DOPPLER_FFT_SIZE) read_doppler_index <= CHIRPS_PER_SUBFRAME;
window_val_reg <= $signed(window_coeff[fft_sample_counter + 1]); end else begin
// Advance BRAM read // Both sub-frames done
if (fft_sample_counter + 2 < DOPPLER_FFT_SIZE) if (read_range_bin < RANGE_BINS - 1) begin
read_doppler_index <= fft_sample_counter + 2; read_range_bin <= read_range_bin + 1;
else read_doppler_index <= 0; // Next range bin starts with long sub-frame
read_doppler_index <= DOPPLER_FFT_SIZE - 1; end
end end
end
if (fft_sample_counter == DOPPLER_FFT_SIZE + 1) begin
// Flush complete reset read index default: begin
read_doppler_index <= 0; // S_FFT_WAIT: no BRAM-write or address operations needed
end end
end endcase
end end
S_OUTPUT: begin
if (read_range_bin < RANGE_BINS - 1) begin
read_range_bin <= read_range_bin + 1;
read_doppler_index <= 0;
end
end
default: begin
// S_IDLE, S_FFT_WAIT:
// no BRAM-write, DSP, or read-address operations needed
end
endcase
end
end end
// ============================================== // ==============================================
// FFT Module // FFT Module 16-point
// ============================================== // ==============================================
xfft_32 fft_inst ( xfft_16 fft_inst (
.aclk(clk), .aclk(clk),
.aresetn(reset_n), .aresetn(reset_n),
.s_axis_config_tdata(8'h01), .s_axis_config_tdata(8'h01),
@@ -517,5 +533,4 @@ xfft_32 fft_inst (
assign processing_active = (state != S_IDLE); assign processing_active = (state != S_IDLE);
assign frame_complete = (state == S_IDLE && frame_buffer_full == 0); assign frame_complete = (state == S_IDLE && frame_buffer_full == 0);
endmodule
endmodule
+8
View File
@@ -0,0 +1,8 @@
// Quarter-wave cosine ROM for 16-point FFT
// 4 entries (N/4), 16-bit signed Q15 format
// cos(2*pi*k/16) for k = 0..3
// Used by fft_engine with N=16, LOG2N=4
7FFF
7641
5A82
30FB
@@ -8,8 +8,8 @@
// Single-clock design: clk is an input wire, async2sync handles async reset. // Single-clock design: clk is an input wire, async2sync handles async reset.
// Each formal step = one clock edge. // Each formal step = one clock edge.
// //
// Parameters reduced: RANGE_BINS=4, CHIRPS_PER_FRAME=4, DOPPLER_FFT_SIZE=4. // Parameters reduced: RANGE_BINS=4, CHIRPS_PER_FRAME=4, CHIRPS_PER_SUBFRAME=2, DOPPLER_FFT_SIZE=2.
// Includes full xfft_32 and fft_engine sub-modules. // Includes full xfft_16 and fft_engine sub-modules.
// //
// Focus: memory address bounds (highest-value finding) and state encoding. // Focus: memory address bounds (highest-value finding) and state encoding.
// ============================================================================ // ============================================================================
@@ -20,7 +20,8 @@ module fv_doppler_processor (
// Reduced parameters for tractable BMC // Reduced parameters for tractable BMC
localparam RANGE_BINS = 4; localparam RANGE_BINS = 4;
localparam CHIRPS_PER_FRAME = 4; localparam CHIRPS_PER_FRAME = 4;
localparam DOPPLER_FFT_SIZE = 4; localparam CHIRPS_PER_SUBFRAME = 2; // Dual sub-frame: 2 chirps per sub-frame
localparam DOPPLER_FFT_SIZE = 2; // FFT size matches sub-frame size
localparam MEM_DEPTH = RANGE_BINS * CHIRPS_PER_FRAME; // 16 localparam MEM_DEPTH = RANGE_BINS * CHIRPS_PER_FRAME; // 16
// State encoding (mirrors DUT localparams) // State encoding (mirrors DUT localparams)
@@ -62,6 +63,7 @@ module fv_doppler_processor (
wire doppler_valid; wire doppler_valid;
wire [4:0] doppler_bin; wire [4:0] doppler_bin;
wire [5:0] range_bin; wire [5:0] range_bin;
wire sub_frame;
wire processing_active; wire processing_active;
wire frame_complete; wire frame_complete;
wire [3:0] status; wire [3:0] status;
@@ -86,6 +88,7 @@ module fv_doppler_processor (
.DOPPLER_FFT_SIZE (DOPPLER_FFT_SIZE), .DOPPLER_FFT_SIZE (DOPPLER_FFT_SIZE),
.RANGE_BINS (RANGE_BINS), .RANGE_BINS (RANGE_BINS),
.CHIRPS_PER_FRAME (CHIRPS_PER_FRAME), .CHIRPS_PER_FRAME (CHIRPS_PER_FRAME),
.CHIRPS_PER_SUBFRAME (CHIRPS_PER_SUBFRAME),
.WINDOW_TYPE (1), // Rectangular — simpler for formal .WINDOW_TYPE (1), // Rectangular — simpler for formal
.DATA_WIDTH (16) .DATA_WIDTH (16)
) dut ( ) dut (
@@ -98,6 +101,7 @@ module fv_doppler_processor (
.doppler_valid (doppler_valid), .doppler_valid (doppler_valid),
.doppler_bin (doppler_bin), .doppler_bin (doppler_bin),
.range_bin (range_bin), .range_bin (range_bin),
.sub_frame (sub_frame),
.processing_active(processing_active), .processing_active(processing_active),
.frame_complete (frame_complete), .frame_complete (frame_complete),
.status (status), .status (status),
@@ -36,6 +36,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
DOPPLER_FFT = 32 DOPPLER_FFT = 32
RANGE_BINS = 64 RANGE_BINS = 64
TOTAL_OUTPUTS = RANGE_BINS * DOPPLER_FFT # 2048 TOTAL_OUTPUTS = RANGE_BINS * DOPPLER_FFT # 2048
SUBFRAME_SIZE = 16
SCENARIOS = { SCENARIOS = {
'stationary': { 'stationary': {
@@ -125,6 +126,19 @@ def find_peak_bin(i_arr, q_arr):
return max(range(len(mags)), key=lambda k: mags[k]) return max(range(len(mags)), key=lambda k: mags[k])
def peak_bins_match(py_peak, rtl_peak):
"""Return True if peaks match within +/-1 bin inside the same sub-frame."""
py_sf = py_peak // SUBFRAME_SIZE
rtl_sf = rtl_peak // SUBFRAME_SIZE
if py_sf != rtl_sf:
return False
py_bin = py_peak % SUBFRAME_SIZE
rtl_bin = rtl_peak % SUBFRAME_SIZE
diff = abs(py_bin - rtl_bin)
return diff <= 1 or diff >= SUBFRAME_SIZE - 1
def total_energy(data_dict): def total_energy(data_dict):
"""Sum of I^2 + Q^2 across all range bins and Doppler bins.""" """Sum of I^2 + Q^2 across all range bins and Doppler bins."""
total = 0 total = 0
@@ -207,8 +221,8 @@ def compare_scenario(name, config, base_dir):
py_peak = find_peak_bin(py_i, py_q) py_peak = find_peak_bin(py_i, py_q)
rtl_peak = find_peak_bin(rtl_i, rtl_q) rtl_peak = find_peak_bin(rtl_i, rtl_q)
# Peak agreement (allow +/- 1 bin tolerance) # Peak agreement (allow +/-1 bin tolerance, but only within a sub-frame)
if abs(py_peak - rtl_peak) <= 1 or abs(py_peak - rtl_peak) >= DOPPLER_FFT - 1: if peak_bins_match(py_peak, rtl_peak):
peak_agreements += 1 peak_agreements += 1
py_mag = magnitude_l1(py_i, py_q) py_mag = magnitude_l1(py_i, py_q)
@@ -242,7 +256,7 @@ def compare_scenario(name, config, base_dir):
avg_corr_q = sum(q_correlations) / len(q_correlations) avg_corr_q = sum(q_correlations) / len(q_correlations)
print(f"\n Per-range-bin metrics:") print(f"\n Per-range-bin metrics:")
print(f" Peak Doppler bin agreement (+/-1): {peak_agreements}/{RANGE_BINS} " print(f" Peak Doppler bin agreement (+/-1 within sub-frame): {peak_agreements}/{RANGE_BINS} "
f"({peak_agreement_frac:.0%})") f"({peak_agreement_frac:.0%})")
print(f" Avg magnitude correlation: {avg_mag_corr:.4f}") print(f" Avg magnitude correlation: {avg_mag_corr:.4f}")
print(f" Avg I-channel correlation: {avg_corr_i:.4f}") print(f" Avg I-channel correlation: {avg_corr_i:.4f}")
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1106,8 +1106,8 @@ FFFF0000
00000000 00000000
00000000 00000000
00000000 00000000
FFFF0001 00000001
FFFF0000 00000000
FFFF0005 FFFF0005
00000001 00000001
00000001 00000001
@@ -1172,7 +1172,7 @@ FFFF0000
00010000 00010000
00010000 00010000
00010000 00010000
00060003 00060002
00010001 00010001
00000001 00000001
00000000 00000000
@@ -1236,7 +1236,7 @@ FFFF0000
00000000 00000000
0001FFFF 0001FFFF
0002FFFF 0002FFFF
0006FFFD 0005FFFC
00010000 00010000
0001FFFF 0001FFFF
00000001 00000001
@@ -1300,7 +1300,7 @@ FFFF0000
00000000 00000000
00000000 00000000
FFFFFFFF FFFFFFFF
FFFFFFFA FFFEFFFA
0000FFFF 0000FFFF
0000FFFF 0000FFFF
00010001 00010001
@@ -1364,9 +1364,9 @@ FFFF0000
00000000 00000000
00000000 00000000
FFFF0000 FFFF0000
FFFAFFFD FFFAFFFF
FFFFFFFF FFFFFFFF
00000000 00000001
00000001 00000001
FFFF0000 FFFF0000
00000000 00000000
@@ -1427,74 +1427,74 @@ FFFF0000
FFFF0000 FFFF0000
00000000 00000000
FFFF0000 FFFF0000
00000001
FFFB0005
FFFE0001
00000000
00010000
00000000
00000000
00000001
00000000
0000FFFF
00010001
00000000
00000000
00000000
00000000
00000000
00000001
00000001
00000000
00010001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
FFFFFFFF
FFFFFFFF
0000FFFF
00000000
00000000
00000001
00000000
00000000
FFFF0000
FFFF0000
00000001
00010000
00000000
FFFF0000
00010000
00000001
FFFF0000
FFFF0000
00010001
FFFF0000
FFFFFFFF
00000000
00010000
FFFF0000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00010001
00000000
00000000
FFFF0000
00000000
00010001
00000001
00010006
00000002 00000002
FFFD0006
FFFE0001
00000001 00000001
00010000
00000000
00000000
00000001
00000000
0000FFFF
00010001
00000000
00000000
00000000
00000000
00000000
00000001
00000001
00000000
00010001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
FFFFFFFF
FFFFFFFF
0000FFFF
00000000
00000000
00000001
00000000
00000000
FFFF0000
FFFF0000
00000001
00010000
00000000
FFFF0000
00010000
00000001
FFFF0000
FFFF0000
00010001
FFFF0000
FFFFFFFF
00000000
00010000
FFFF0000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00010001
00000000
00000000
FFFF0000
00000000
00010000
00010001
00030005
00010001
00010001
00000000 00000000
00000000 00000000
FFFF0000 FFFF0000
@@ -1556,8 +1556,8 @@ FFFFFFFF
00000000 00000000
00010000 00010000
00020000 00020000
00060001 0006FFFE
00010000 0001FFFF
00010000 00010000
FFFF0000 FFFF0000
00000001 00000001
@@ -1619,9 +1619,9 @@ FFFFFFFE
00000001 00000001
0000FFFF 0000FFFF
00010000 00010000
0001FFFF 0001FFFE
0004FFFB 0001FFFA
0002FFFF 0002FFFE
00010000 00010000
FFFF0000 FFFF0000
FFFF0000 FFFF0000
@@ -1682,9 +1682,9 @@ FFFF0000
00000000 00000000
00000001 00000001
00000001 00000001
00000000
FFFF0000 FFFF0000
FFFEFFFA FFFF0000
FFFBFFFC
FFFFFFFF FFFFFFFF
FFFF0000 FFFF0000
0000FFFF 0000FFFF
@@ -1747,9 +1747,9 @@ FFFFFFFF
00000000 00000000
0000FFFF 0000FFFF
FFFF0001 FFFF0001
FFFF0000 FFFF0001
FFFA0000 FFFA0003
FFFE0000 FFFF0001
FFFF0000 FFFF0000
00000000 00000000
00000001 00000001
@@ -1811,74 +1811,74 @@ FFFF0001
00010000 00010000
0000FFFF 0000FFFF
00000000 00000000
FFFF0002
FFFD0005
FFFF0001
00000001
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000001
00000000
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
0000FFFF
00010000
FFFF0000
0001FFFF
0000FFFF
0001FFFF
00000000
0000FFFF
00000001
00010002
00030005
00000002 00000002
00000006
FFFF0002
00010001 00010001
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000001
00000000
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
0000FFFF
00010000
FFFF0000
0001FFFF
0000FFFF
0001FFFF
00000000
0000FFFF
00010000
00020001
00060002
00000001
00010000
0001FFFF 0001FFFF
00000000 00000000
00000000 00000000
@@ -1939,9 +1939,9 @@ FFFF0000
00000000 00000000
0000FFFF 0000FFFF
0001FFFF 0001FFFF
0001FFFF 0000FFFE
00070000 0005FFFC
00000000 0000FFFF
00010001 00010001
FFFF0000 FFFF0000
0000FFFF 0000FFFF
@@ -2003,9 +2003,9 @@ FFFF0000
00000001 00000001
00000000 00000000
0000FFFF 0000FFFF
0001FFFF
0002FFF9
0000FFFF 0000FFFF
FFFDFFF9
FFFFFFFF
FFFFFFFF FFFFFFFF
00000000 00000000
00000000 00000000
@@ -1099,7 +1099,7 @@ FFFF0000
00000000 00000000
00000002 00000002
FFFF0003 FFFF0003
FFFE0012 FFFF0012
00000003 00000003
FFFF0002 FFFF0002
00010001 00010001
@@ -1163,7 +1163,7 @@ FFFF0000
00010001 00010001
00010002 00010002
00020003 00020003
000C000D 000D000C
00030003 00030003
00000001 00000001
00000001 00000001
@@ -1226,9 +1226,9 @@ FFFF0000
00000000 00000000
FFFF0000 FFFF0000
00020000 00020000
00030000 0003FFFF
00110004 00120002
00030000 0003FFFF
00020000 00020000
00000000 00000000
FFFF0000 FFFF0000
@@ -1291,8 +1291,8 @@ FFFF0000
00010000 00010000
0002FFFF 0002FFFF
0003FFFE 0003FFFE
000FFFF6 000EFFF4
0004FFFF 0003FFFE
0002FFFF 0002FFFF
00000000 00000000
FFFF0000 FFFF0000
@@ -1312,8 +1312,8 @@ FFFF0000
00010000 00010000
00000001 00000001
0000FFFF 0000FFFF
00000000
00010000 00010000
00010001
FFFF0000 FFFF0000
00000001 00000001
0000FFFF 0000FFFF
@@ -1353,10 +1353,10 @@ FFFF0000
00010001 00010001
0001FFFF 0001FFFF
00010000 00010000
0001FFFE 0000FFFE
0001FFFD 0000FFFD
0006FFF0 0003FFEF
0001FFFD 0000FFFD
0000FFFE 0000FFFE
00000000 00000000
00010000 00010000
@@ -1376,7 +1376,7 @@ FFFF0000
0000FFFF 0000FFFF
00010000 00010000
00000001 00000001
00010001 00010002
00000000 00000000
00000001 00000001
00000000 00000000
@@ -1418,10 +1418,10 @@ FFFF0000
0000FFFF 0000FFFF
FFFF0000 FFFF0000
FFFFFFFE FFFFFFFE
FFFEFFFD FFFDFFFD
FFF9FFF1 FFF5FFF2
FFFEFFFD FFFEFFFE
FFFFFFFF FFFE0000
FFFF0000 FFFF0000
00000001 00000001
FFFF0000 FFFF0000
@@ -1439,8 +1439,8 @@ FFFF0000
0000FFFF 0000FFFF
00010001 00010001
FFFF0000 FFFF0000
FFFF0001 FFFF0000
FFFF0001 FFFF0000
00000000 00000000
00000000 00000000
00000001 00000001
@@ -1482,10 +1482,10 @@ FFFF0000
00000000 00000000
00000000 00000000
FFFF0000 FFFF0000
FFFCFFFF FFFC0000
FFEFFFF9 FFEEFFFE
FFFCFFFF FFFC0000
FFFF0000 FFFF0001
00000000 00000000
00000000 00000000
FFFF0000 FFFF0000
@@ -1504,7 +1504,7 @@ FFFF0000
00000000 00000000
00000000 00000000
00000000 00000000
FFFFFFFF 0000FFFF
FFFF0001 FFFF0001
00000000 00000000
00010000 00010000
@@ -1546,10 +1546,10 @@ FFFFFFFF
00000000 00000000
FFFFFFFF FFFFFFFF
FFFE0001 FFFE0001
FFFD0001 FFFD0002
FFEF0006 FFF1000B
FFFD0001 FFFD0002
FFFF0000 FFFF0001
00000000 00000000
FFFFFFFF FFFFFFFF
00010000 00010000
@@ -1609,77 +1609,77 @@ FFFF0001
00000000 00000000
00000001 00000001
00000000 00000000
FFFF0002
FFFE0003
FFF7000E
FFFF0005
FFFF0001
0001FFFF
00000000
00000001
0000FFFF
00000000
00000000
FFFF0000
00010000
00010000
FFFF0000
FFFF0000
0000FFFF
00000000
00000000
00010000
00000000
00000000
00010000
00020001
00000000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000001
00000001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
0000FFFF
00010000
FFFF0000
0001FFFF
00010001
00000000
FFFF0001
00010000
0000FFFF
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
00000001
00000000
FFFF0000
FFFF0000
00000000
0000FFFF
00000001
00000002 00000002
00000003 FFFF0004
00050012 FFFC0010
00010003 00000005
00000001
0001FFFF
00000000
00000001
0000FFFF
00000000
00000000
FFFF0000
00010000
00010000
FFFF0000
FFFF0000
0000FFFF
00000000
00000000
00010000
00000000
00000000
00010000
00010002 00010002
00000000 00000000
00000000 00000000
00000000
FFFF0000
00000000
00000000
00010000
00000001
00000001
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
0000FFFF
00010000
FFFF0000
0001FFFF
00010001
00000000
FFFF0001
00010000
0000FFFF
00000001
FFFF0000
00000000
0000FFFF
FFFF0000
00000001
00000000
FFFF0000
FFFF0000
00000000
0000FFFF
00000001
00000002
00010003
000B000F
00020003
00020002
00000000
00000000
00000001 00000001
00000001 00000001
00000001 00000001
@@ -1696,9 +1696,9 @@ FFFFFFFF
00000000 00000000
0000FFFF 0000FFFF
00000000 00000000
00000002 FFFF0001
00010000 0000FFFF
00000000 FFFF0000
00000000 00000000
00000000 00000000
00000000 00000000
@@ -1737,160 +1737,160 @@ FFFFFFFF
00000000 00000000
00000000 00000000
00000001 00000001
00020001
00030000
00110004
00040000
00020000
00000000
00000000
00000000
0000FFFF
00000001
00000000
00000001
00000000
00000000
00000000
00000001
FFFFFFFF
0000FFFF
FFFF0000
00000000
FFFF0000
00000001
00000000
0000FFFF
FFFFFFFF
00000000
00000000
FFFF0000
FFFF0000
0000FFFF
00010000
00000001
00010000
00010001
00000000
0000FFFF
00000001
00000000
FFFF0001
00010001
00000000
00000000
00000000
00000000
FFFFFFFF
FFFF0000
00000000
00010001
00010000
FFFFFFFF
00000000
00000001
00000000
00000000
00000000
00000000
00000000
00010000
00000000
FFFF0000
0000FFFF
0000FFFF
00000000
00000000
0001FFFF
0004FFFE
000FFFF7
0004FFFE
00010000
FFFF0001
0000FFFF
00010000
0000FFFF
00000000
FFFF0001
00000000
FFFF0000
00010000
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000000
00010000
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
0000FFFE
0001FFFB
0005FFEF
0000FFFC
0001FFFE
0000FFFF
0001FFFF
00000000
0000FFFF
00000000
00010001
00000000
FFFF0001
00000000
0001FFFF
00000000
00000000
00010000
FFFF0000
00000000
0001FFFF
00000000
00000001
00020002 00020002
00030001
000E000A
00040001
00020001
00000000
00000000
00000000 00000000
0000FFFF 0000FFFF
00000001
00000000
00000001
00000000
00000000
00000000
00000001
FFFFFFFF
0000FFFF
FFFF0000
00000000
FFFF0000
00000001
00000000
FFFFFFFF
FFFFFFFF
00000000
00000000
FFFF0000
FFFF0000
0000FFFF
00010000
00000001
00010000
00010001
00000000
0000FFFF
00000001
00000000
FFFF0001
00010001
00000000
00000000
00000000
00000000
FFFFFFFF
FFFF0000
00000000
00010001
00010000
FFFFFFFF
00000000
00000001
00000000
00000000
00000000
00000000
00000000
00010000
00000000
FFFF0000
0000FFFF
0000FFFF
00000000
00000000
00020000
00050000
0012FFFE
00040000
00020000
FFFF0001
0000FFFF
00010000
0000FFFF
00000000
FFFF0001
00000000
FFFF0000
00010000
0000FFFF
FFFF0001
00000000
00000000
00000000
FFFFFFFF
00010001
FFFFFFFF
00000000
0000FFFF
00000000
00000000
00010000
00000000
00000000
FFFF0000
00000000
00000000
00010000
00000000
00000000
00000000
00000000
00000000
0000FFFF
00000000
0000FFFF
00000000
00000000
00000001
00000001
00000000
00000000
00000000
00000000
00000001
FFFF0000
00010000
FFFF0000
FFFF0000
00000000
00000000
00000000
00000001
00000000
FFFF0000
00000001
FFFF0000
00000000
0000FFFF
0000FFFE
0003FFFC
000CFFF3
0001FFFD
0002FFFE
0000FFFF
0001FFFF
00000000
0000FFFF
00000000
00010001
00000000
FFFF0001
00000000
0001FFFF
00000000
00000000
00010000
FFFF0000
00000000
0001FFFF
00010000
00000000
00030001
00000000
0001FFFF
00000000 00000000
00000000 00000000
0000FFFF 0000FFFF
@@ -1929,78 +1929,78 @@ FFFF0000
FFFF0000 FFFF0000
00000000 00000000
00000000 00000000
0000FFFE
FFFFFFFD
FFFFFFEE
FFFFFFFC
FFFFFFFE FFFFFFFE
00000000
FFFF0000
00000000
0000FFFF
0000FFFF
FFFFFFFF
00000000
FFFF0000
00000001
FFFF0000
0000FFFF
00000000
00000000
00000000
00010000
FFFF0000
00000000
00000000
00010001
00000000
00000000
0000FFFF
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
00000000
00000000
00000000
00010000
00000000
00000001
00000000
FFFF0000
00000000
00000001
00010000
00000000
00000001
00010000
00000000
FFFF0000
00000001
00000000
00000000
00000000
00000000
00000000
00000001
00010000
00000000
00000000
0001FFFF
0000FFFF
00010000
FFFF0000
FFFFFFFF
FFFEFFFE
FFF3FFF3
FFFEFFFD FFFEFFFD
FFF7FFF1
FFFEFFFD
FFFEFFFE
00000000
FFFF0000
00000000
0000FFFF
0000FFFF
FFFFFFFF FFFFFFFF
00000000 00000000
FFFF0000 FFFF0000
00000001 00000001
FFFF0000
0000FFFF
00000000
00000000
00000000
00010000
FFFF0000
00000000
00000000
FFFF0000
00000000
00000000
0000FFFF
00000000
00000000
00000000
00000000
00000001
0000FFFF
00000000
00000000
00000000
00000000
00010000
00000000
00000001
00000000
FFFF0000
00000000
00000001
00010000
00000000
00000001
00010000
00000000
FFFF0000
00000001
00000000
00000000
00000000
00000000
00000000
00000001
00010000
00000000
00000000
0001FFFF
0000FFFF
00010000
FFFF0000
FFFF0000
FFFEFFFF
FFEEFFFB
FFFDFFFE
FFFEFFFF
00000000
FFFF0000
00000001
00000000 00000000
00000000 00000000
00000001 00000001
@@ -2016,7 +2016,7 @@ FFFF0001
00010000 00010000
00000000 00000000
0001FFFF 0001FFFF
FFFE0000 FFFFFFFF
00000001 00000001
00000000 00000000
00010000 00010000
+66 -50
View File
@@ -1075,44 +1075,43 @@ class RangeBinDecimator:
# ============================================================================= # =============================================================================
# Doppler Processor (Hamming window + 32-point FFT) # Doppler Processor (Hamming window + dual 16-point FFT)
# ============================================================================= # =============================================================================
# Hamming window LUT (32 entries, 16-bit unsigned Q15) # Hamming window LUT (16 entries, 16-bit unsigned Q15)
# Matches doppler_processor.v window_coeff[0:15]
# w[n] = 0.54 - 0.46 * cos(2*pi*n/15), n=0..15, symmetric
HAMMING_WINDOW = [ HAMMING_WINDOW = [
0x0800, 0x0862, 0x09CB, 0x0C3B, 0x0FB2, 0x142F, 0x19B2, 0x2039, 0x0A3D, 0x0E5C, 0x1B6D, 0x3088, 0x4B33, 0x6573, 0x7642, 0x7F62,
0x27C4, 0x3050, 0x39DB, 0x4462, 0x4FE3, 0x5C5A, 0x69C4, 0x781D, 0x7F62, 0x7642, 0x6573, 0x4B33, 0x3088, 0x1B6D, 0x0E5C, 0x0A3D,
0x7FFF, 0x781D, 0x69C4, 0x5C5A, 0x4FE3, 0x4462, 0x39DB, 0x3050,
0x27C4, 0x2039, 0x19B2, 0x142F, 0x0FB2, 0x0C3B, 0x09CB, 0x0862,
] ]
class DopplerProcessor: class DopplerProcessor:
""" """
Bit-accurate model of doppler_processor_optimized.v Bit-accurate model of doppler_processor_optimized.v (dual 16-pt FFT architecture).
For each range bin (0-63): The staggered-PRF frame has 32 chirps total:
1. Read 32 chirps of data from accumulation buffer - Sub-frame 0 (long PRI): chirps 0-15 -> 16-pt Hamming -> 16-pt FFT -> bins 0-15
2. Apply Hamming window (Q15 multiply, round, >>>15) - Sub-frame 1 (short PRI): chirps 16-31 -> 16-pt Hamming -> 16-pt FFT -> bins 16-31
3. 32-point FFT
The 32-point FFT uses xfft_32.v (Xilinx IP wrapper around fft_engine). Output: doppler_bin[4:0] = {sub_frame_id, bin_in_subframe[3:0]}
For the Python model, we use FFTEngine with N=32. Total output per range bin: 32 bins (16 + 16), same interface as before.
""" """
DOPPLER_FFT_SIZE = 32 DOPPLER_FFT_SIZE = 16 # Per sub-frame
RANGE_BINS = 64 RANGE_BINS = 64
CHIRPS_PER_FRAME = 32 CHIRPS_PER_FRAME = 32
CHIRPS_PER_SUBFRAME = 16
def __init__(self, twiddle_file_32=None): def __init__(self, twiddle_file_16=None):
""" """
For 32-point FFT, we need the 32-point twiddle file. For 16-point FFT, we need the 16-point twiddle file.
If not provided, we generate twiddle factors mathematically If not provided, we generate twiddle factors mathematically
(since the 32-pt twiddle ROM is cos(2*pi*k/32) for k=0..7). (cos(2*pi*k/16) for k=0..3, quarter-wave ROM with 4 entries).
""" """
self.fft32 = None self.fft16 = None
self._twiddle_file_32 = twiddle_file_32 self._twiddle_file_16 = twiddle_file_16
# We'll use a simple 32-pt FFT with computed twiddles
@staticmethod @staticmethod
def window_multiply(data_16, window_16): def window_multiply(data_16, window_16):
@@ -1134,7 +1133,7 @@ class DopplerProcessor:
def process_frame(self, chirp_data_i, chirp_data_q): def process_frame(self, chirp_data_i, chirp_data_q):
""" """
Process one complete Doppler frame. Process one complete Doppler frame using dual 16-pt FFTs.
Args: Args:
chirp_data_i: 2D array [32 chirps][64 range bins] of signed 16-bit I chirp_data_i: 2D array [32 chirps][64 range bins] of signed 16-bit I
@@ -1143,46 +1142,63 @@ class DopplerProcessor:
Returns: Returns:
(doppler_map_i, doppler_map_q): 2D arrays [64 range bins][32 doppler bins] (doppler_map_i, doppler_map_q): 2D arrays [64 range bins][32 doppler bins]
of signed 16-bit of signed 16-bit
Bins 0-15 = sub-frame 0 (long PRI)
Bins 16-31 = sub-frame 1 (short PRI)
""" """
doppler_map_i = [] doppler_map_i = []
doppler_map_q = [] doppler_map_q = []
# Generate 32-pt twiddle factors (quarter-wave cos, 8 entries) # Generate 16-pt twiddle factors (quarter-wave cos, 4 entries)
# cos(2*pi*k/32) for k=0..7 # cos(2*pi*k/16) for k=0..3
# Matches fft_twiddle_16.mem: 7FFF, 7641, 5A82, 30FB
import math import math
cos_rom_32 = [] cos_rom_16 = []
for k in range(8): for k in range(4):
val = round(32767.0 * math.cos(2.0 * math.pi * k / 32.0)) val = round(32767.0 * math.cos(2.0 * math.pi * k / 16.0))
cos_rom_32.append(sign_extend(val & 0xFFFF, 16)) cos_rom_16.append(sign_extend(val & 0xFFFF, 16))
fft32 = FFTEngine.__new__(FFTEngine) fft16 = FFTEngine.__new__(FFTEngine)
fft32.N = 32 fft16.N = 16
fft32.LOG2N = 5 fft16.LOG2N = 4
fft32.cos_rom = cos_rom_32 fft16.cos_rom = cos_rom_16
fft32.mem_re = [0] * 32 fft16.mem_re = [0] * 16
fft32.mem_im = [0] * 32 fft16.mem_im = [0] * 16
for rbin in range(self.RANGE_BINS): for rbin in range(self.RANGE_BINS):
# Gather 32 chirps for this range bin # Output bins for this range bin: 32 total (16 from each sub-frame)
fft_in_re = [] out_re = [0] * 32
fft_in_im = [] out_im = [0] * 32
for chirp in range(self.CHIRPS_PER_FRAME): # Process each sub-frame independently
re_val = sign_extend(chirp_data_i[chirp][rbin] & 0xFFFF, 16) for sf in range(2):
im_val = sign_extend(chirp_data_q[chirp][rbin] & 0xFFFF, 16) chirp_start = sf * self.CHIRPS_PER_SUBFRAME
bin_offset = sf * self.DOPPLER_FFT_SIZE
# Apply Hamming window fft_in_re = []
win_re = self.window_multiply(re_val, HAMMING_WINDOW[chirp]) fft_in_im = []
win_im = self.window_multiply(im_val, HAMMING_WINDOW[chirp])
fft_in_re.append(win_re) for c in range(self.CHIRPS_PER_SUBFRAME):
fft_in_im.append(win_im) chirp = chirp_start + c
re_val = sign_extend(chirp_data_i[chirp][rbin] & 0xFFFF, 16)
im_val = sign_extend(chirp_data_q[chirp][rbin] & 0xFFFF, 16)
# 32-point forward FFT # Apply 16-pt Hamming window (index = c within sub-frame)
fft_out_re, fft_out_im = fft32.compute(fft_in_re, fft_in_im, inverse=False) win_re = self.window_multiply(re_val, HAMMING_WINDOW[c])
win_im = self.window_multiply(im_val, HAMMING_WINDOW[c])
doppler_map_i.append(fft_out_re) fft_in_re.append(win_re)
doppler_map_q.append(fft_out_im) fft_in_im.append(win_im)
# 16-point forward FFT
fft_out_re, fft_out_im = fft16.compute(fft_in_re, fft_in_im, inverse=False)
# Pack into output: sub-frame 0 -> bins 0-15, sub-frame 1 -> bins 16-31
for b in range(self.DOPPLER_FFT_SIZE):
out_re[bin_offset + b] = fft_out_re[b]
out_im[bin_offset + b] = fft_out_im[b]
doppler_map_i.append(out_re)
doppler_map_q.append(out_im)
return doppler_map_i, doppler_map_q return doppler_map_i, doppler_map_q
@@ -1207,7 +1223,7 @@ class SignalChain:
IF_FREQ = 120_000_000 # IF frequency IF_FREQ = 120_000_000 # IF frequency
FTW_120MHZ = 0x4CCCCCCD # Phase increment for 120 MHz at 400 MSPS FTW_120MHZ = 0x4CCCCCCD # Phase increment for 120 MHz at 400 MSPS
def __init__(self, twiddle_file_1024=None, twiddle_file_32=None): def __init__(self, twiddle_file_1024=None, twiddle_file_16=None):
self.nco = NCO() self.nco = NCO()
self.mixer = Mixer() self.mixer = Mixer()
self.cic_i = CICDecimator() self.cic_i = CICDecimator()
@@ -1217,7 +1233,7 @@ class SignalChain:
self.ddc_interface = DDCInputInterface() self.ddc_interface = DDCInputInterface()
self.matched_filter = MatchedFilterChain(fft_size=1024, twiddle_file=twiddle_file_1024) self.matched_filter = MatchedFilterChain(fft_size=1024, twiddle_file=twiddle_file_1024)
self.range_decimator = RangeBinDecimator() self.range_decimator = RangeBinDecimator()
self.doppler = DopplerProcessor(twiddle_file_32=twiddle_file_32) self.doppler = DopplerProcessor(twiddle_file_16=twiddle_file_16)
def ddc_step(self, adc_data_8bit, ftw=None): def ddc_step(self, adc_data_8bit, ftw=None):
""" """
@@ -3,23 +3,17 @@
Generate Doppler processor co-simulation golden reference data. Generate Doppler processor co-simulation golden reference data.
Uses the bit-accurate Python model (fpga_model.py) to compute the expected Uses the bit-accurate Python model (fpga_model.py) to compute the expected
Doppler FFT output. Also generates the input hex files consumed by the Doppler FFT output for the dual 16-pt FFT architecture. Also generates the
Verilog testbench (tb_doppler_cosim.v). input hex files consumed by the Verilog testbench (tb_doppler_cosim.v).
Two output modes: Architecture:
1. "clean" — straight Python model (correct windowing alignment) Sub-frame 0 (long PRI): chirps 0-15 -> 16-pt Hamming -> 16-pt FFT -> bins 0-15
2. "buggy" — replicates the RTL's windowing pipeline misalignment: Sub-frame 1 (short PRI): chirps 16-31 -> 16-pt Hamming -> 16-pt FFT -> bins 16-31
* Sample 0: fft_input = 0 (from reset mult value)
* Sample 1: fft_input = window_multiply(data[wrong_rbin_or_0], window[0])
* Sample k (k>=2): fft_input = window_multiply(data[k-2], window[k-1])
Default mode is "clean". The comparison script uses correlation-based
metrics that are tolerant of the pipeline shift.
Usage: Usage:
cd ~/PLFM_RADAR/9_Firmware/9_2_FPGA/tb/cosim cd ~/PLFM_RADAR/9_Firmware/9_2_FPGA/tb/cosim
python3 gen_doppler_golden.py # clean model python3 gen_doppler_golden.py
python3 gen_doppler_golden.py --buggy # replicate RTL pipeline bug python3 gen_doppler_golden.py stationary # single scenario
Author: Phase 0.5 Doppler co-simulation suite for PLFM_RADAR Author: Phase 0.5 Doppler co-simulation suite for PLFM_RADAR
""" """
@@ -31,7 +25,7 @@ import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from fpga_model import ( from fpga_model import (
DopplerProcessor, FFTEngine, sign_extend, HAMMING_WINDOW DopplerProcessor, sign_extend, HAMMING_WINDOW
) )
from radar_scene import Target, generate_doppler_frame from radar_scene import Target, generate_doppler_frame
@@ -40,7 +34,8 @@ from radar_scene import Target, generate_doppler_frame
# Constants # Constants
# ============================================================================= # =============================================================================
DOPPLER_FFT_SIZE = 32 DOPPLER_FFT_SIZE = 16 # Per sub-frame
DOPPLER_TOTAL_BINS = 32 # Total output (2 sub-frames x 16)
RANGE_BINS = 64 RANGE_BINS = 64
CHIRPS_PER_FRAME = 32 CHIRPS_PER_FRAME = 32
TOTAL_SAMPLES = CHIRPS_PER_FRAME * RANGE_BINS # 2048 TOTAL_SAMPLES = CHIRPS_PER_FRAME * RANGE_BINS # 2048
@@ -82,154 +77,6 @@ def write_hex_16bit(filepath, data):
# Buggy-model helpers (match RTL pipeline misalignment) # Buggy-model helpers (match RTL pipeline misalignment)
# ============================================================================= # =============================================================================
def window_multiply(data_16, window_16):
"""Hamming window multiply matching RTL."""
d = sign_extend(data_16 & 0xFFFF, 16)
w = sign_extend(window_16 & 0xFFFF, 16)
product = d * w
rounded = product + (1 << 14)
result = rounded >> 15
return sign_extend(result & 0xFFFF, 16)
def buggy_process_frame(chirp_data_i, chirp_data_q):
"""
Replicate the RTL's exact windowing pipeline for all 64 range bins.
For each range bin we model the three-stage pipeline:
Stage A (BRAM registered read):
mem_rdata captures doppler_i_mem[mem_read_addr] one cycle AFTER
mem_read_addr is presented.
Stage B (multiply):
mult_i <= mem_rdata_i * window_coeff[read_doppler_index]
-- read_doppler_index is the CURRENT cycle's value, but mem_rdata_i
-- is from the PREVIOUS cycle's address.
Stage C (round+shift):
fft_input_i <= (mult_i + (1<<14)) >>> 15
-- uses the PREVIOUS cycle's mult_i.
Additionally, at the S_ACCUMULATE->S_LOAD_FFT transition (rbin=0) or
S_OUTPUT->S_LOAD_FFT transition (rbin>0), the BRAM address during the
transition cycle depends on the stale read_doppler_index and read_range_bin
values.
This function models every detail to produce bit-exact FFT inputs.
"""
# Build the 32-pt FFT engine (matching fpga_model.py)
import math as _math
cos_rom_32 = []
for k in range(8):
val = round(32767.0 * _math.cos(2.0 * _math.pi * k / 32.0))
cos_rom_32.append(sign_extend(val & 0xFFFF, 16))
fft32 = FFTEngine.__new__(FFTEngine)
fft32.N = 32
fft32.LOG2N = 5
fft32.cos_rom = cos_rom_32
fft32.mem_re = [0] * 32
fft32.mem_im = [0] * 32
# Build flat BRAM contents: addr = chirp_index * 64 + range_bin
bram_i = [0] * TOTAL_SAMPLES
bram_q = [0] * TOTAL_SAMPLES
for chirp in range(CHIRPS_PER_FRAME):
for rb in range(RANGE_BINS):
addr = chirp * RANGE_BINS + rb
bram_i[addr] = sign_extend(chirp_data_i[chirp][rb] & 0xFFFF, 16)
bram_q[addr] = sign_extend(chirp_data_q[chirp][rb] & 0xFFFF, 16)
doppler_map_i = []
doppler_map_q = []
# State carried across range bins (simulates the RTL registers)
# After reset: read_doppler_index=0, read_range_bin=0, mult_i=0, mult_q=0,
# fft_input_i=0, fft_input_q=0
# The BRAM read is always active: mem_rdata <= doppler_i_mem[mem_read_addr]
# mem_read_addr = read_doppler_index * 64 + read_range_bin
# We need to track what read_doppler_index and read_range_bin are at each
# transition, since the BRAM captures data one cycle before S_LOAD_FFT runs.
# Before processing starts (just entered S_LOAD_FFT from S_ACCUMULATE):
# At the S_ACCUMULATE clock that transitions:
# read_doppler_index <= 0 (NBA)
# read_range_bin <= 0 (NBA)
# These take effect NEXT cycle. At the transition clock itself,
# read_doppler_index and read_range_bin still had their old values.
# From reset, both were 0. So BRAM captures addr=0*64+0=0.
#
# For rbin>0 transitions from S_OUTPUT:
# At S_OUTPUT clock:
# read_doppler_index <= 0 (was 0, since it wrapped from 32->0 in 5 bits)
# read_range_bin <= prev_rbin + 1 (NBA, takes effect next cycle)
# At S_OUTPUT clock, the current read_range_bin = prev_rbin,
# read_doppler_index = 0 (wrapped). So BRAM captures addr=0*64+prev_rbin.
for rbin in range(RANGE_BINS):
# Determine what BRAM data was captured during the transition clock
# (one cycle before S_LOAD_FFT's first execution cycle).
if rbin == 0:
# From S_ACCUMULATE: both indices were 0 (from reset or previous NBA)
# BRAM captures addr = 0*64+0 = 0 -> data[chirp=0][rbin=0]
transition_bram_addr = 0 * RANGE_BINS + 0
else:
# From S_OUTPUT: read_doppler_index=0 (wrapped), read_range_bin=rbin-1
# BRAM captures addr = 0*64+(rbin-1) -> data[chirp=0][rbin-1]
transition_bram_addr = 0 * RANGE_BINS + (rbin - 1)
transition_data_i = bram_i[transition_bram_addr]
transition_data_q = bram_q[transition_bram_addr]
# Now simulate the 32 cycles of S_LOAD_FFT for this range bin.
# Register pipeline state at entry:
mult_i_reg = 0 # From reset (rbin=0) or from end of previous S_FFT_WAIT
mult_q_reg = 0
fft_in_i_list = []
fft_in_q_list = []
for k in range(DOPPLER_FFT_SIZE):
# read_doppler_index = k at this cycle's start
# mem_read_addr = k * 64 + rbin
# What mem_rdata holds THIS cycle:
if k == 0:
# BRAM captured transition_bram_addr last cycle
rd_i = transition_data_i
rd_q = transition_data_q
else:
# BRAM captured addr from PREVIOUS cycle: (k-1)*64 + rbin
prev_addr = (k - 1) * RANGE_BINS + rbin
rd_i = bram_i[prev_addr]
rd_q = bram_q[prev_addr]
# Stage B: multiply (uses current read_doppler_index = k)
new_mult_i = sign_extend(rd_i & 0xFFFF, 16) * \
sign_extend(HAMMING_WINDOW[k] & 0xFFFF, 16)
new_mult_q = sign_extend(rd_q & 0xFFFF, 16) * \
sign_extend(HAMMING_WINDOW[k] & 0xFFFF, 16)
# Stage C: round+shift (uses PREVIOUS cycle's mult)
fft_i = (mult_i_reg + (1 << 14)) >> 15
fft_q = (mult_q_reg + (1 << 14)) >> 15
fft_in_i_list.append(sign_extend(fft_i & 0xFFFF, 16))
fft_in_q_list.append(sign_extend(fft_q & 0xFFFF, 16))
# Update pipeline registers for next cycle
mult_i_reg = new_mult_i
mult_q_reg = new_mult_q
# 32-point FFT
fft_out_re, fft_out_im = fft32.compute(
fft_in_i_list, fft_in_q_list, inverse=False
)
doppler_map_i.append(fft_out_re)
doppler_map_q.append(fft_out_im)
return doppler_map_i, doppler_map_q
# ============================================================================= # =============================================================================
# Test scenario definitions # Test scenario definitions
@@ -244,9 +91,10 @@ def make_scenario_stationary():
def make_scenario_moving(): def make_scenario_moving():
"""Single target with moderate Doppler shift.""" """Single target with moderate Doppler shift."""
# v = 15 m/s → fd = 2*v*fc/c ≈ 1050 Hz # v = 15 m/s → fd = 2*v*fc/c ≈ 1050 Hz
# PRI = 167 us → Doppler bin = fd * N_chirps * PRI = 1050 * 32 * 167e-6 ≈ 5.6 # Long PRI = 167 us → sub-frame 0 bin = fd * 16 * 167e-6 ≈ 2.8 → bin ~3
# Short PRI = 175 us → sub-frame 1 bin = fd * 16 * 175e-6 ≈ 2.9 → bin 16+3 = 19
targets = [Target(range_m=500, velocity_mps=15.0, rcs_dbsm=20.0)] targets = [Target(range_m=500, velocity_mps=15.0, rcs_dbsm=20.0)]
return targets, "Single moving target v=15m/s (~1050Hz Doppler, bin~5-6)" return targets, "Single moving target v=15m/s (~1050Hz Doppler, sf0 bin~3, sf1 bin~19)"
def make_scenario_two_targets(): def make_scenario_two_targets():
@@ -269,12 +117,11 @@ SCENARIOS = {
# Main generator # Main generator
# ============================================================================= # =============================================================================
def generate_scenario(name, targets, description, base_dir, use_buggy_model=False): def generate_scenario(name, targets, description, base_dir):
"""Generate input hex + golden output for one scenario.""" """Generate input hex + golden output for one scenario."""
print(f"\n{'='*60}") print(f"\n{'='*60}")
print(f"Scenario: {name}{description}") print(f"Scenario: {name}{description}")
model_label = "BUGGY (RTL pipeline)" if use_buggy_model else "CLEAN" print(f"Model: CLEAN (dual 16-pt FFT)")
print(f"Model: {model_label}")
print(f"{'='*60}") print(f"{'='*60}")
# Generate Doppler frame (32 chirps x 64 range bins) # Generate Doppler frame (32 chirps x 64 range bins)
@@ -292,26 +139,24 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
input_hex = os.path.join(base_dir, f"doppler_input_{name}.hex") input_hex = os.path.join(base_dir, f"doppler_input_{name}.hex")
write_hex_32bit(input_hex, packed_samples) write_hex_32bit(input_hex, packed_samples)
# ---- Run through Python model ---- # ---- Run through Python model (dual 16-pt FFT) ----
if use_buggy_model: dp = DopplerProcessor()
doppler_i, doppler_q = buggy_process_frame(frame_i, frame_q) doppler_i, doppler_q = dp.process_frame(frame_i, frame_q)
else:
dp = DopplerProcessor()
doppler_i, doppler_q = dp.process_frame(frame_i, frame_q)
print(f" Doppler output: {len(doppler_i)} range bins x " print(f" Doppler output: {len(doppler_i)} range bins x "
f"{len(doppler_i[0])} doppler bins") f"{len(doppler_i[0])} doppler bins (2 sub-frames x {DOPPLER_FFT_SIZE})")
# ---- Write golden output CSV ---- # ---- Write golden output CSV ----
# Format: range_bin, doppler_bin, out_i, out_q # Format: range_bin, doppler_bin, out_i, out_q
# Ordered same as RTL output: all doppler bins for rbin 0, then rbin 1, ... # Ordered same as RTL output: all doppler bins for rbin 0, then rbin 1, ...
# Bins 0-15 = sub-frame 0 (long PRI), bins 16-31 = sub-frame 1 (short PRI)
flat_rbin = [] flat_rbin = []
flat_dbin = [] flat_dbin = []
flat_i = [] flat_i = []
flat_q = [] flat_q = []
for rbin in range(RANGE_BINS): for rbin in range(RANGE_BINS):
for dbin in range(DOPPLER_FFT_SIZE): for dbin in range(DOPPLER_TOTAL_BINS):
flat_rbin.append(rbin) flat_rbin.append(rbin)
flat_dbin.append(dbin) flat_dbin.append(dbin)
flat_i.append(doppler_i[rbin][dbin]) flat_i.append(doppler_i[rbin][dbin])
@@ -331,8 +176,8 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
peak_info = [] peak_info = []
for rbin in range(RANGE_BINS): for rbin in range(RANGE_BINS):
mags = [abs(doppler_i[rbin][d]) + abs(doppler_q[rbin][d]) mags = [abs(doppler_i[rbin][d]) + abs(doppler_q[rbin][d])
for d in range(DOPPLER_FFT_SIZE)] for d in range(DOPPLER_TOTAL_BINS)]
peak_dbin = max(range(DOPPLER_FFT_SIZE), key=lambda d: mags[d]) peak_dbin = max(range(DOPPLER_TOTAL_BINS), key=lambda d: mags[d])
peak_mag = mags[peak_dbin] peak_mag = mags[peak_dbin]
peak_info.append((rbin, peak_dbin, peak_mag)) peak_info.append((rbin, peak_dbin, peak_mag))
@@ -341,33 +186,14 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
for rbin, dbin, mag in peak_info[:5]: for rbin, dbin, mag in peak_info[:5]:
i_val = doppler_i[rbin][dbin] i_val = doppler_i[rbin][dbin]
q_val = doppler_q[rbin][dbin] q_val = doppler_q[rbin][dbin]
print(f" rbin={rbin:2d}, dbin={dbin:2d}, mag={mag:6d}, " sf = dbin // DOPPLER_FFT_SIZE
bin_in_sf = dbin % DOPPLER_FFT_SIZE
print(f" rbin={rbin:2d}, dbin={dbin:2d} (sf{sf}:{bin_in_sf:2d}), mag={mag:6d}, "
f"I={i_val:6d}, Q={q_val:6d}") f"I={i_val:6d}, Q={q_val:6d}")
# ---- Write frame data for debugging ----
# Also write per-range-bin FFT input (for debugging pipeline alignment)
if use_buggy_model:
# Write the buggy FFT inputs for debugging
debug_csv = os.path.join(base_dir, f"doppler_fft_inputs_{name}.csv")
# Regenerate to capture FFT inputs
dp_debug = DopplerProcessor()
clean_i, clean_q = dp_debug.process_frame(frame_i, frame_q)
# Show the difference between clean and buggy
print(f"\n Comparing clean vs buggy model outputs:")
mismatches = 0
for rbin in range(RANGE_BINS):
for dbin in range(DOPPLER_FFT_SIZE):
if (doppler_i[rbin][dbin] != clean_i[rbin][dbin] or
doppler_q[rbin][dbin] != clean_q[rbin][dbin]):
mismatches += 1
total = RANGE_BINS * DOPPLER_FFT_SIZE
print(f" {mismatches}/{total} output samples differ "
f"({100*mismatches/total:.1f}%)")
return { return {
'name': name, 'name': name,
'description': description, 'description': description,
'model': 'buggy' if use_buggy_model else 'clean',
'peak_info': peak_info[:5], 'peak_info': peak_info[:5],
} }
@@ -375,11 +201,9 @@ def generate_scenario(name, targets, description, base_dir, use_buggy_model=Fals
def main(): def main():
base_dir = os.path.dirname(os.path.abspath(__file__)) base_dir = os.path.dirname(os.path.abspath(__file__))
use_buggy = '--buggy' in sys.argv
print("=" * 60) print("=" * 60)
print("Doppler Processor Co-Sim Golden Reference Generator") print("Doppler Processor Co-Sim Golden Reference Generator")
print(f"Model: {'BUGGY (RTL pipeline replication)' if use_buggy else 'CLEAN'}") print(f"Architecture: dual {DOPPLER_FFT_SIZE}-pt FFT ({DOPPLER_TOTAL_BINS} total bins)")
print("=" * 60) print("=" * 60)
scenarios_to_run = list(SCENARIOS.keys()) scenarios_to_run = list(SCENARIOS.keys())
@@ -395,15 +219,14 @@ def main():
results = [] results = []
for name in scenarios_to_run: for name in scenarios_to_run:
targets, description = SCENARIOS[name]() targets, description = SCENARIOS[name]()
r = generate_scenario(name, targets, description, base_dir, r = generate_scenario(name, targets, description, base_dir)
use_buggy_model=use_buggy)
results.append(r) results.append(r)
print(f"\n{'='*60}") print(f"\n{'='*60}")
print("Summary:") print("Summary:")
print(f"{'='*60}") print(f"{'='*60}")
for r in results: for r in results:
print(f" {r['name']:<15s} [{r['model']}] top peak: " print(f" {r['name']:<15s} top peak: "
f"rbin={r['peak_info'][0][0]}, dbin={r['peak_info'][0][1]}, " f"rbin={r['peak_info'][0][0]}, dbin={r['peak_info'][0][1]}, "
f"mag={r['peak_info'][0][2]}") f"mag={r['peak_info'][0][2]}")
+17 -7
View File
@@ -48,19 +48,24 @@ ADC_BITS = 8 # ADC resolution
T_LONG_CHIRP = 30e-6 # 30 us long chirp duration T_LONG_CHIRP = 30e-6 # 30 us long chirp duration
T_SHORT_CHIRP = 0.5e-6 # 0.5 us short chirp T_SHORT_CHIRP = 0.5e-6 # 0.5 us short chirp
T_LISTEN_LONG = 137e-6 # 137 us listening window T_LISTEN_LONG = 137e-6 # 137 us listening window
T_PRI_LONG = 167e-6 # 30 us chirp + 137 us listen
T_PRI_SHORT = 175e-6 # staggered short-PRI sub-frame
N_SAMPLES_LISTEN = int(T_LISTEN_LONG * FS_ADC) # 54800 samples N_SAMPLES_LISTEN = int(T_LISTEN_LONG * FS_ADC) # 54800 samples
# Processing chain # Processing chain
CIC_DECIMATION = 4 CIC_DECIMATION = 4
FFT_SIZE = 1024 FFT_SIZE = 1024
RANGE_BINS = 64 RANGE_BINS = 64
DOPPLER_FFT_SIZE = 32 DOPPLER_FFT_SIZE = 16 # Per sub-frame
DOPPLER_TOTAL_BINS = 32 # Total output bins (2 sub-frames x 16)
CHIRPS_PER_SUBFRAME = 16
CHIRPS_PER_FRAME = 32 CHIRPS_PER_FRAME = 32
# Derived # Derived
RANGE_RESOLUTION = C_LIGHT / (2 * CHIRP_BW) # 7.5 m RANGE_RESOLUTION = C_LIGHT / (2 * CHIRP_BW) # 7.5 m
MAX_UNAMBIGUOUS_RANGE = C_LIGHT * T_LISTEN_LONG / 2 # ~20.55 km MAX_UNAMBIGUOUS_RANGE = C_LIGHT * T_LISTEN_LONG / 2 # ~20.55 km
VELOCITY_RESOLUTION = WAVELENGTH / (2 * CHIRPS_PER_FRAME * T_LONG_CHIRP) VELOCITY_RESOLUTION_LONG = WAVELENGTH / (2 * CHIRPS_PER_SUBFRAME * T_PRI_LONG)
VELOCITY_RESOLUTION_SHORT = WAVELENGTH / (2 * CHIRPS_PER_SUBFRAME * T_PRI_SHORT)
# Short chirp LUT (60 entries, 8-bit unsigned) # Short chirp LUT (60 entries, 8-bit unsigned)
SHORT_CHIRP_LUT = [ SHORT_CHIRP_LUT = [
@@ -384,9 +389,6 @@ def generate_doppler_frame(targets, n_chirps=CHIRPS_PER_FRAME,
break break
return math.sqrt(-2.0 * math.log(u1)) * math.cos(2.0 * math.pi * u2) return math.sqrt(-2.0 * math.log(u1)) * math.cos(2.0 * math.pi * u2)
# Chirp repetition interval (PRI)
t_pri = T_LONG_CHIRP + T_LISTEN_LONG # ~167 us
frame_i = [] frame_i = []
frame_q = [] frame_q = []
@@ -408,8 +410,16 @@ def generate_doppler_frame(targets, n_chirps=CHIRPS_PER_FRAME,
# Amplitude (simplified) # Amplitude (simplified)
amp = target.amplitude / 4.0 amp = target.amplitude / 4.0
# Doppler phase for this chirp # Doppler phase for this chirp.
doppler_phase = 2 * math.pi * target.doppler_hz * chirp_idx * t_pri # The frame uses staggered PRF: chirps 0-15 use the long PRI,
# chirps 16-31 use the short PRI.
if chirp_idx < CHIRPS_PER_SUBFRAME:
slow_time_s = chirp_idx * T_PRI_LONG
else:
slow_time_s = (CHIRPS_PER_SUBFRAME * T_PRI_LONG) + \
((chirp_idx - CHIRPS_PER_SUBFRAME) * T_PRI_SHORT)
doppler_phase = 2 * math.pi * target.doppler_hz * slow_time_s
total_phase = doppler_phase + target.phase_deg * math.pi / 180.0 total_phase = doppler_phase + target.phase_deg * math.pi / 180.0
# Spread across a few bins (sinc-like response from matched filter) # Spread across a few bins (sinc-like response from matched filter)
@@ -91,6 +91,7 @@ doppler_processor_optimized dut (
.doppler_valid(doppler_valid), .doppler_valid(doppler_valid),
.doppler_bin(doppler_bin), .doppler_bin(doppler_bin),
.range_bin(range_bin), .range_bin(range_bin),
.sub_frame(), // Not used in this testbench
.processing_active(processing_active), .processing_active(processing_active),
.frame_complete(frame_complete), .frame_complete(frame_complete),
.status(dut_status) .status(dut_status)
@@ -75,6 +75,7 @@ doppler_processor_optimized dut (
.doppler_valid(doppler_valid), .doppler_valid(doppler_valid),
.doppler_bin(doppler_bin), .doppler_bin(doppler_bin),
.range_bin(range_bin), .range_bin(range_bin),
.sub_frame(), // Not used in this testbench
.processing_active(processing_active), .processing_active(processing_active),
.frame_complete(frame_complete), .frame_complete(frame_complete),
.status(dut_status) .status(dut_status)
+252
View File
@@ -0,0 +1,252 @@
`timescale 1ns / 1ps
// ============================================================================
// xfft_16.v 16-point FFT with AXI-Stream interface
// ============================================================================
// Wraps the synthesizable fft_engine (radix-2 DIT) with the AXI-Stream port
// interface expected by the doppler_processor dual-FFT architecture.
//
// Identical interface to xfft_32.v but with N=16.
//
// Data format: {Q[15:0], I[15:0]} packed 32-bit.
// Config tdata[0]: 1 = forward FFT, 0 = inverse FFT.
// ============================================================================
module xfft_16 (
input wire aclk,
input wire aresetn,
// Configuration channel (AXI-Stream slave)
input wire [7:0] s_axis_config_tdata,
input wire s_axis_config_tvalid,
output wire s_axis_config_tready,
// Data input channel (AXI-Stream slave)
input wire [31:0] s_axis_data_tdata,
input wire s_axis_data_tvalid,
input wire s_axis_data_tlast,
// Data output channel (AXI-Stream master)
output wire [31:0] m_axis_data_tdata,
output wire m_axis_data_tvalid,
output wire m_axis_data_tlast,
input wire m_axis_data_tready
);
// ============================================================================
// PARAMETERS
// ============================================================================
localparam N = 16;
localparam LOG2N = 4;
// ============================================================================
// INTERNAL SIGNALS
// ============================================================================
// FSM states
localparam [2:0] S_IDLE = 3'd0,
S_CONFIG = 3'd1,
S_FEED = 3'd2,
S_WAIT = 3'd3,
S_OUTPUT = 3'd4;
reg [2:0] state;
// Configuration
reg inverse_reg;
// Input buffering
reg signed [15:0] in_buf_re [0:N-1];
reg signed [15:0] in_buf_im [0:N-1];
reg [4:0] in_count;
// Output buffering
reg signed [15:0] out_buf_re [0:N-1];
reg signed [15:0] out_buf_im [0:N-1];
reg [4:0] out_count;
reg [4:0] out_total;
// FFT engine interface
reg fft_start;
reg fft_inverse;
reg signed [15:0] fft_din_re, fft_din_im;
reg fft_din_valid;
wire signed [15:0] fft_dout_re, fft_dout_im;
wire fft_dout_valid;
wire fft_busy;
wire fft_done;
// Feed counter
reg [4:0] feed_count;
// ============================================================================
// FFT ENGINE INSTANCE
// ============================================================================
fft_engine #(
.N(N),
.LOG2N(LOG2N),
.DATA_W(16),
.INTERNAL_W(32),
.TWIDDLE_W(16),
.TWIDDLE_FILE("fft_twiddle_16.mem")
) fft_core (
.clk(aclk),
.reset_n(aresetn),
.start(fft_start),
.inverse(fft_inverse),
.din_re(fft_din_re),
.din_im(fft_din_im),
.din_valid(fft_din_valid),
.dout_re(fft_dout_re),
.dout_im(fft_dout_im),
.dout_valid(fft_dout_valid),
.busy(fft_busy),
.done(fft_done)
);
// ============================================================================
// AXI-STREAM OUTPUTS
// ============================================================================
assign s_axis_config_tready = (state == S_IDLE);
assign m_axis_data_tdata = {out_buf_im[out_count[3:0]], out_buf_re[out_count[3:0]]};
assign m_axis_data_tvalid = (state == S_OUTPUT) && (out_count < N);
assign m_axis_data_tlast = (state == S_OUTPUT) && (out_count == N - 1);
// ============================================================================
// BUFFER WRITE LOGIC separate always block, NO async reset
// ============================================================================
reg in_buf_we;
reg [3:0] in_buf_waddr;
reg signed [15:0] in_buf_wdata_re, in_buf_wdata_im;
reg out_buf_we;
reg [3:0] out_buf_waddr;
reg signed [15:0] out_buf_wdata_re, out_buf_wdata_im;
always @(posedge aclk) begin
if (in_buf_we) begin
in_buf_re[in_buf_waddr] <= in_buf_wdata_re;
in_buf_im[in_buf_waddr] <= in_buf_wdata_im;
end
if (out_buf_we) begin
out_buf_re[out_buf_waddr] <= out_buf_wdata_re;
out_buf_im[out_buf_waddr] <= out_buf_wdata_im;
end
end
// ============================================================================
// MAIN FSM
// ============================================================================
always @(posedge aclk or negedge aresetn) begin
if (!aresetn) begin
state <= S_IDLE;
inverse_reg <= 1'b0;
in_count <= 0;
out_count <= 0;
out_total <= 0;
feed_count <= 0;
fft_start <= 1'b0;
fft_inverse <= 1'b0;
fft_din_re <= 0;
fft_din_im <= 0;
fft_din_valid <= 1'b0;
in_buf_we <= 1'b0;
in_buf_waddr <= 0;
in_buf_wdata_re <= 0;
in_buf_wdata_im <= 0;
out_buf_we <= 1'b0;
out_buf_waddr <= 0;
out_buf_wdata_re <= 0;
out_buf_wdata_im <= 0;
end else begin
fft_start <= 1'b0;
fft_din_valid <= 1'b0;
in_buf_we <= 1'b0;
out_buf_we <= 1'b0;
case (state)
S_IDLE: begin
in_count <= 0;
if (s_axis_config_tvalid) begin
inverse_reg <= ~s_axis_config_tdata[0];
state <= S_FEED;
in_count <= 0;
feed_count <= 0;
end
end
S_FEED: begin
if (in_count < N) begin
if (s_axis_data_tvalid) begin
in_buf_we <= 1'b1;
in_buf_waddr <= in_count[3:0];
in_buf_wdata_re <= s_axis_data_tdata[15:0];
in_buf_wdata_im <= s_axis_data_tdata[31:16];
in_count <= in_count + 1;
end
end else if (feed_count == 0) begin
fft_start <= 1'b1;
fft_inverse <= inverse_reg;
feed_count <= 0;
state <= S_WAIT;
out_total <= 0;
end
end
S_WAIT: begin
if (feed_count < N) begin
fft_din_re <= in_buf_re[feed_count[3:0]];
fft_din_im <= in_buf_im[feed_count[3:0]];
fft_din_valid <= 1'b1;
feed_count <= feed_count + 1;
end
if (fft_dout_valid && out_total < N) begin
out_buf_we <= 1'b1;
out_buf_waddr <= out_total[3:0];
out_buf_wdata_re <= fft_dout_re;
out_buf_wdata_im <= fft_dout_im;
out_total <= out_total + 1;
end
if (fft_done) begin
state <= S_OUTPUT;
out_count <= 0;
end
end
S_OUTPUT: begin
if (m_axis_data_tready || !m_axis_data_tvalid) begin
if (out_count < N) begin
if (m_axis_data_tready) begin
out_count <= out_count + 1;
end
end
if (out_count >= N - 1 && m_axis_data_tready) begin
state <= S_IDLE;
end
end
end
default: state <= S_IDLE;
endcase
end
end
// ============================================================================
// MEMORY INIT (simulation only)
// ============================================================================
`ifdef SIMULATION
integer init_k;
initial begin
for (init_k = 0; init_k < N; init_k = init_k + 1) begin
in_buf_re[init_k] = 0;
in_buf_im[init_k] = 0;
out_buf_re[init_k] = 0;
out_buf_im[init_k] = 0;
end
end
`endif
endmodule