Replace FFT stubs with synthesizable radix-2 DIT engine, fix BRAM inference
Implement iterative single-butterfly FFT engine (fft_engine.v) supporting 1024-pt and 32-pt transforms with quarter-wave twiddle ROM, XPM_MEMORY_TDPRAM for guaranteed BRAM mapping in Vivado, and behavioral model for simulation. Add xfft_32.v AXI-Stream wrapper for doppler_processor integration and dual-branch matched_filter_processing_chain.v (behavioral + synthesis paths). Fix placement failure caused by 68K+ registers from dissolved memory arrays: - doppler_processor.v: extract mem writes to sync-only always block for BRAM - xfft_32.v: extract buffer writes to sync-only always block for LUTRAM Post-implementation: 37K regs (29%), 23K LUTs (37%), 10 BRAM (7%), fully routed. All testbenches pass: fft_engine 12/12, xfft_32 10/10, mf_chain 27/27.
This commit is contained in:
@@ -124,157 +124,188 @@ always @(posedge clk or negedge reset_n) begin
|
||||
end
|
||||
wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1;
|
||||
|
||||
// ==============================================
|
||||
// Main State Machine - FIXED
|
||||
// ==============================================
|
||||
reg [5:0] fft_sample_counter;
|
||||
reg [9:0] processing_timeout;
|
||||
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
if (!reset_n) begin
|
||||
state <= S_IDLE;
|
||||
write_range_bin <= 0;
|
||||
write_chirp_index <= 0;
|
||||
read_range_bin <= 0;
|
||||
read_doppler_index <= 0;
|
||||
frame_buffer_full <= 0;
|
||||
doppler_valid <= 0;
|
||||
fft_start <= 0;
|
||||
fft_input_valid <= 0;
|
||||
fft_input_last <= 0;
|
||||
fft_sample_counter <= 0;
|
||||
processing_timeout <= 0;
|
||||
status <= 0;
|
||||
chirps_received <= 0;
|
||||
chirp_state <= 0;
|
||||
end else begin
|
||||
doppler_valid <= 0;
|
||||
fft_input_valid <= 0;
|
||||
fft_input_last <= 0;
|
||||
|
||||
if (processing_timeout > 0) begin
|
||||
processing_timeout <= processing_timeout - 1;
|
||||
end
|
||||
|
||||
case (state)
|
||||
S_IDLE: begin
|
||||
if (frame_start_pulse) begin
|
||||
// Start new frame
|
||||
write_chirp_index <= 0;
|
||||
write_range_bin <= 0;
|
||||
frame_buffer_full <= 0;
|
||||
chirps_received <= 0;
|
||||
//chirp_state <= 1; // Start accumulating
|
||||
end
|
||||
|
||||
if (data_valid && !frame_buffer_full) begin
|
||||
// ==============================================
|
||||
// Main State Machine - FIXED
|
||||
// ==============================================
|
||||
reg [5:0] fft_sample_counter;
|
||||
reg [9:0] processing_timeout;
|
||||
|
||||
// Memory write enable and data signals (extracted for BRAM inference)
|
||||
reg mem_we;
|
||||
reg [10:0] mem_waddr_r;
|
||||
reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q;
|
||||
|
||||
// Memory read data (registered for BRAM read latency)
|
||||
reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q;
|
||||
|
||||
// ----------------------------------------------------------
|
||||
// Separate always block for memory writes — NO async reset
|
||||
// in sensitivity list, so Vivado can infer Block RAM.
|
||||
// ----------------------------------------------------------
|
||||
always @(posedge clk) begin
|
||||
if (mem_we) begin
|
||||
doppler_i_mem[mem_waddr_r] <= mem_wdata_i;
|
||||
doppler_q_mem[mem_waddr_r] <= mem_wdata_q;
|
||||
end
|
||||
// Registered read — address driven by mem_read_addr from FSM
|
||||
mem_rdata_i <= doppler_i_mem[mem_read_addr];
|
||||
mem_rdata_q <= doppler_q_mem[mem_read_addr];
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------
|
||||
// Main FSM — async reset for control registers only.
|
||||
// Memory arrays are NOT touched here.
|
||||
// ----------------------------------------------------------
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
if (!reset_n) begin
|
||||
state <= S_IDLE;
|
||||
write_range_bin <= 0;
|
||||
write_chirp_index <= 0;
|
||||
read_range_bin <= 0;
|
||||
read_doppler_index <= 0;
|
||||
frame_buffer_full <= 0;
|
||||
doppler_valid <= 0;
|
||||
fft_start <= 0;
|
||||
fft_input_valid <= 0;
|
||||
fft_input_last <= 0;
|
||||
fft_sample_counter <= 0;
|
||||
processing_timeout <= 0;
|
||||
status <= 0;
|
||||
chirps_received <= 0;
|
||||
chirp_state <= 0;
|
||||
mem_we <= 0;
|
||||
mem_waddr_r <= 0;
|
||||
mem_wdata_i <= 0;
|
||||
mem_wdata_q <= 0;
|
||||
mult_i <= 0;
|
||||
mult_q <= 0;
|
||||
fft_input_i <= 0;
|
||||
fft_input_q <= 0;
|
||||
doppler_output <= 0;
|
||||
doppler_bin <= 0;
|
||||
end else begin
|
||||
doppler_valid <= 0;
|
||||
fft_input_valid <= 0;
|
||||
fft_input_last <= 0;
|
||||
mem_we <= 0;
|
||||
|
||||
if (processing_timeout > 0) begin
|
||||
processing_timeout <= processing_timeout - 1;
|
||||
end
|
||||
|
||||
case (state)
|
||||
S_IDLE: begin
|
||||
if (frame_start_pulse) begin
|
||||
// Start new frame
|
||||
write_chirp_index <= 0;
|
||||
write_range_bin <= 0;
|
||||
frame_buffer_full <= 0;
|
||||
chirps_received <= 0;
|
||||
end
|
||||
|
||||
if (data_valid && !frame_buffer_full) begin
|
||||
state <= S_ACCUMULATE;
|
||||
write_range_bin <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
S_ACCUMULATE: begin
|
||||
if (data_valid) begin
|
||||
// Store with proper addressing
|
||||
doppler_i_mem[mem_write_addr] <= range_data[15:0];
|
||||
doppler_q_mem[mem_write_addr] <= range_data[31:16];
|
||||
|
||||
// Debug output to see what's being written
|
||||
// $display("Time=%t: Write addr=%d (chirp=%d, range=%d), Data=%h",
|
||||
// $time, mem_write_addr, write_chirp_index, write_range_bin, range_data);
|
||||
|
||||
// Increment range bin
|
||||
if (write_range_bin < RANGE_BINS - 1) begin
|
||||
write_range_bin <= write_range_bin + 1;
|
||||
end else begin
|
||||
// Completed one chirp
|
||||
write_range_bin <= 0;
|
||||
write_chirp_index <= write_chirp_index + 1;
|
||||
chirps_received <= chirps_received + 1;
|
||||
|
||||
// Check if frame is complete
|
||||
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
|
||||
frame_buffer_full <= 1;
|
||||
chirp_state <= 0; // Stop accumulating
|
||||
// Could automatically start processing here:
|
||||
state <= S_LOAD_FFT;
|
||||
read_range_bin <= 0;
|
||||
read_doppler_index <= 0;
|
||||
fft_sample_counter <= 0;
|
||||
fft_start <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// [Rest of S_LOAD_FFT, S_FFT_WAIT, S_OUTPUT states remain similar]
|
||||
// But with fixed addressing in S_LOAD_FFT:
|
||||
S_LOAD_FFT: begin
|
||||
fft_start <= 0;
|
||||
|
||||
if (fft_sample_counter < DOPPLER_FFT_SIZE) begin
|
||||
// Use correct addressing for reading
|
||||
mult_i <= $signed(doppler_i_mem[mem_read_addr]) *
|
||||
$signed(window_coeff[read_doppler_index]);
|
||||
mult_q <= $signed(doppler_q_mem[mem_read_addr]) *
|
||||
$signed(window_coeff[read_doppler_index]);
|
||||
write_range_bin <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
S_ACCUMULATE: begin
|
||||
if (data_valid) begin
|
||||
// Drive memory write signals (actual write in separate block)
|
||||
mem_we <= 1;
|
||||
mem_waddr_r <= mem_write_addr;
|
||||
mem_wdata_i <= range_data[15:0];
|
||||
mem_wdata_q <= range_data[31:16];
|
||||
|
||||
// Round instead of truncate
|
||||
fft_input_i <= (mult_i + (1 << 14)) >>> 15; // Round to nearest
|
||||
fft_input_q <= (mult_q + (1 << 14)) >>> 15;
|
||||
// Increment range bin
|
||||
if (write_range_bin < RANGE_BINS - 1) begin
|
||||
write_range_bin <= write_range_bin + 1;
|
||||
end else begin
|
||||
// Completed one chirp
|
||||
write_range_bin <= 0;
|
||||
write_chirp_index <= write_chirp_index + 1;
|
||||
chirps_received <= chirps_received + 1;
|
||||
|
||||
// Check if frame is complete
|
||||
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
|
||||
frame_buffer_full <= 1;
|
||||
chirp_state <= 0;
|
||||
state <= S_LOAD_FFT;
|
||||
read_range_bin <= 0;
|
||||
read_doppler_index <= 0;
|
||||
fft_sample_counter <= 0;
|
||||
fft_start <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
S_LOAD_FFT: begin
|
||||
fft_start <= 0;
|
||||
|
||||
if (fft_sample_counter < DOPPLER_FFT_SIZE) begin
|
||||
// Use registered read data (one cycle latency from BRAM)
|
||||
mult_i <= $signed(mem_rdata_i) *
|
||||
$signed(window_coeff[read_doppler_index]);
|
||||
mult_q <= $signed(mem_rdata_q) *
|
||||
$signed(window_coeff[read_doppler_index]);
|
||||
|
||||
fft_input_valid <= 1;
|
||||
|
||||
if (fft_sample_counter == DOPPLER_FFT_SIZE - 1) begin
|
||||
fft_input_last <= 1;
|
||||
end
|
||||
|
||||
// Increment chirp index for next sample
|
||||
read_doppler_index <= read_doppler_index + 1;
|
||||
fft_sample_counter <= fft_sample_counter + 1;
|
||||
end else begin
|
||||
state <= S_FFT_WAIT;
|
||||
fft_sample_counter <= 0;
|
||||
processing_timeout <= 100;
|
||||
end
|
||||
end
|
||||
|
||||
S_FFT_WAIT: begin
|
||||
if (fft_output_valid) begin
|
||||
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]};
|
||||
doppler_bin <= fft_sample_counter;
|
||||
range_bin <= read_range_bin;
|
||||
doppler_valid <= 1;
|
||||
|
||||
fft_sample_counter <= fft_sample_counter + 1;
|
||||
|
||||
if (fft_output_last) begin
|
||||
state <= S_OUTPUT;
|
||||
fft_sample_counter <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (processing_timeout == 0) begin
|
||||
state <= S_OUTPUT;
|
||||
end
|
||||
end
|
||||
|
||||
S_OUTPUT: begin
|
||||
if (read_range_bin < RANGE_BINS - 1) begin
|
||||
read_range_bin <= read_range_bin + 1;
|
||||
read_doppler_index <= 0;
|
||||
state <= S_LOAD_FFT;
|
||||
fft_start <= 1;
|
||||
end else begin
|
||||
state <= S_IDLE;
|
||||
frame_buffer_full <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
endcase
|
||||
|
||||
status <= {state, frame_buffer_full};
|
||||
end
|
||||
// Round instead of truncate
|
||||
fft_input_i <= (mult_i + (1 << 14)) >>> 15;
|
||||
fft_input_q <= (mult_q + (1 << 14)) >>> 15;
|
||||
|
||||
fft_input_valid <= 1;
|
||||
|
||||
if (fft_sample_counter == DOPPLER_FFT_SIZE - 1) begin
|
||||
fft_input_last <= 1;
|
||||
end
|
||||
|
||||
// Increment chirp index for next sample
|
||||
read_doppler_index <= read_doppler_index + 1;
|
||||
fft_sample_counter <= fft_sample_counter + 1;
|
||||
end else begin
|
||||
state <= S_FFT_WAIT;
|
||||
fft_sample_counter <= 0;
|
||||
processing_timeout <= 100;
|
||||
end
|
||||
end
|
||||
|
||||
S_FFT_WAIT: begin
|
||||
if (fft_output_valid) begin
|
||||
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]};
|
||||
doppler_bin <= fft_sample_counter;
|
||||
range_bin <= read_range_bin;
|
||||
doppler_valid <= 1;
|
||||
|
||||
fft_sample_counter <= fft_sample_counter + 1;
|
||||
|
||||
if (fft_output_last) begin
|
||||
state <= S_OUTPUT;
|
||||
fft_sample_counter <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (processing_timeout == 0) begin
|
||||
state <= S_OUTPUT;
|
||||
end
|
||||
end
|
||||
|
||||
S_OUTPUT: begin
|
||||
if (read_range_bin < RANGE_BINS - 1) begin
|
||||
read_range_bin <= read_range_bin + 1;
|
||||
read_doppler_index <= 0;
|
||||
state <= S_LOAD_FFT;
|
||||
fft_start <= 1;
|
||||
end else begin
|
||||
state <= S_IDLE;
|
||||
frame_buffer_full <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
endcase
|
||||
|
||||
status <= {state, frame_buffer_full};
|
||||
end
|
||||
end
|
||||
|
||||
// ==============================================
|
||||
|
||||
Reference in New Issue
Block a user