fb59e98737
Formal verification (SymbiYosys + smtbmc/z3): - cdc_single_bit: BMC PASS depth 80, cover PASS 3/3 - cdc_handshake: BMC PASS depth 100, cover PASS 4/4 - cdc_adc_to_processing: BMC PASS depth 80, cover PASS - radar_mode_controller: BMC PASS depth 200, cover PASS 8/8 - range_bin_decimator: cover PASS 7/7, BMC running (step 61+) - doppler_processor: cover running (step 133/150), BMC running (step 35+) DUT bug fixes found by formal: - doppler_processor: write_chirp_index overflow past CHIRPS_PER_FRAME-1 in S_ACCUMULATE frame-complete branch (reset to 0) - doppler_processor: read_doppler_index unclamped prefetch in S_LOAD_FFT causing OOB BRAM reads (clamped to DOPPLER_FFT_SIZE-1) CDC fix (prior session, included): - cdc_modules: async reset changed to sync reset on all CDC sync chains to prevent metastability on reset deassertion RTL changes for formal observability: - Added ifdef FORMAL output ports to cdc_handshake (6), cdc_adc (2), radar_mode_controller (2), range_bin_decimator (5), doppler_processor (11)
441 lines
18 KiB
Verilog
441 lines
18 KiB
Verilog
`timescale 1ns / 1ps
|
|
|
|
module doppler_processor_optimized #(
|
|
parameter DOPPLER_FFT_SIZE = 32,
|
|
parameter RANGE_BINS = 64,
|
|
parameter CHIRPS_PER_FRAME = 32,
|
|
parameter WINDOW_TYPE = 0, // 0=Hamming, 1=Rectangular
|
|
parameter DATA_WIDTH = 16
|
|
)(
|
|
input wire clk,
|
|
input wire reset_n,
|
|
input wire [31:0] range_data,
|
|
input wire data_valid,
|
|
input wire new_chirp_frame,
|
|
output reg [31:0] doppler_output,
|
|
output reg doppler_valid,
|
|
output reg [4:0] doppler_bin,
|
|
output reg [5:0] range_bin,
|
|
output wire processing_active,
|
|
output wire frame_complete,
|
|
output reg [3:0] status
|
|
|
|
`ifdef FORMAL
|
|
,
|
|
output wire [2:0] fv_state,
|
|
output wire [10:0] fv_mem_write_addr,
|
|
output wire [10:0] fv_mem_read_addr,
|
|
output wire [5:0] fv_write_range_bin,
|
|
output wire [4:0] fv_write_chirp_index,
|
|
output wire [5:0] fv_read_range_bin,
|
|
output wire [4:0] fv_read_doppler_index,
|
|
output wire [9:0] fv_processing_timeout,
|
|
output wire fv_frame_buffer_full,
|
|
output wire fv_mem_we,
|
|
output wire [10:0] fv_mem_waddr_r
|
|
`endif
|
|
);
|
|
|
|
// ==============================================
|
|
// Window Coefficients (Simple Implementation)
|
|
// ==============================================
|
|
reg [DATA_WIDTH-1:0] window_coeff [0:31];
|
|
|
|
// Generate window coefficients
|
|
integer w;
|
|
initial begin
|
|
if (WINDOW_TYPE == 0) begin
|
|
// Pre-calculated Hamming window (Q15 format)
|
|
window_coeff[0] = 16'h0800; window_coeff[1] = 16'h0862;
|
|
window_coeff[2] = 16'h09CB; window_coeff[3] = 16'h0C3B;
|
|
window_coeff[4] = 16'h0FB2; window_coeff[5] = 16'h142F;
|
|
window_coeff[6] = 16'h19B2; window_coeff[7] = 16'h2039;
|
|
window_coeff[8] = 16'h27C4; window_coeff[9] = 16'h3050;
|
|
window_coeff[10] = 16'h39DB; window_coeff[11] = 16'h4462;
|
|
window_coeff[12] = 16'h4FE3; window_coeff[13] = 16'h5C5A;
|
|
window_coeff[14] = 16'h69C4; window_coeff[15] = 16'h781D;
|
|
window_coeff[16] = 16'h7FFF; // Peak
|
|
window_coeff[17] = 16'h781D; window_coeff[18] = 16'h69C4;
|
|
window_coeff[19] = 16'h5C5A; window_coeff[20] = 16'h4FE3;
|
|
window_coeff[21] = 16'h4462; window_coeff[22] = 16'h39DB;
|
|
window_coeff[23] = 16'h3050; window_coeff[24] = 16'h27C4;
|
|
window_coeff[25] = 16'h2039; window_coeff[26] = 16'h19B2;
|
|
window_coeff[27] = 16'h142F; window_coeff[28] = 16'h0FB2;
|
|
window_coeff[29] = 16'h0C3B; window_coeff[30] = 16'h09CB;
|
|
window_coeff[31] = 16'h0862;
|
|
end else begin
|
|
// Rectangular window (all ones)
|
|
for (w = 0; w < 32; w = w + 1) begin
|
|
window_coeff[w] = 16'h7FFF;
|
|
end
|
|
end
|
|
end
|
|
|
|
// ==============================================
|
|
// Memory Declaration - FIXED SIZE
|
|
// ==============================================
|
|
localparam MEM_DEPTH = RANGE_BINS * CHIRPS_PER_FRAME;
|
|
(* ram_style = "block" *) reg [DATA_WIDTH-1:0] doppler_i_mem [0:MEM_DEPTH-1];
|
|
(* ram_style = "block" *) reg [DATA_WIDTH-1:0] doppler_q_mem [0:MEM_DEPTH-1];
|
|
|
|
// ==============================================
|
|
// Control Registers
|
|
// ==============================================
|
|
reg [5:0] write_range_bin; // Changed to match RANGE_BINS width
|
|
reg [4:0] write_chirp_index; // Changed to match CHIRPS_PER_FRAME width
|
|
reg [5:0] read_range_bin;
|
|
reg [4:0] read_doppler_index; // Changed name for clarity
|
|
reg frame_buffer_full;
|
|
reg [9:0] chirps_received; // Enough for up to 1024 chirps
|
|
reg [1:0] chirp_state; // Track chirp accumulation state
|
|
|
|
|
|
// ==============================================
|
|
// FFT Interface
|
|
// ==============================================
|
|
reg fft_start;
|
|
wire fft_ready;
|
|
reg [DATA_WIDTH-1:0] fft_input_i;
|
|
reg [DATA_WIDTH-1:0] fft_input_q;
|
|
reg signed [31:0] mult_i, mult_q; // 32-bit to avoid overflow
|
|
|
|
reg fft_input_valid;
|
|
reg fft_input_last;
|
|
wire [DATA_WIDTH-1:0] fft_output_i;
|
|
wire [DATA_WIDTH-1:0] fft_output_q;
|
|
wire fft_output_valid;
|
|
wire fft_output_last;
|
|
|
|
// ==============================================
|
|
// Addressing
|
|
// ==============================================
|
|
wire [10:0] mem_write_addr;
|
|
wire [10:0] mem_read_addr;
|
|
|
|
// Proper address calculation using parameters
|
|
assign mem_write_addr = (write_chirp_index * RANGE_BINS) + write_range_bin;
|
|
assign mem_read_addr = (read_doppler_index * RANGE_BINS) + read_range_bin;
|
|
|
|
// Alternative organization (choose one):
|
|
// If you want range-major organization (all chirps for one range bin together):
|
|
// assign mem_write_addr = (write_range_bin * CHIRPS_PER_FRAME) + write_chirp_index;
|
|
// assign mem_read_addr = (read_range_bin * CHIRPS_PER_FRAME) + read_doppler_index;
|
|
|
|
// ==============================================
|
|
// State Machine
|
|
// ==============================================
|
|
reg [2:0] state;
|
|
localparam S_IDLE = 3'b000;
|
|
localparam S_ACCUMULATE = 3'b001;
|
|
localparam S_PRE_READ = 3'b101; // Prime BRAM pipeline before FFT load
|
|
localparam S_LOAD_FFT = 3'b010;
|
|
localparam S_FFT_WAIT = 3'b011;
|
|
localparam S_OUTPUT = 3'b100;
|
|
|
|
// Frame sync detection
|
|
reg new_chirp_frame_d1;
|
|
always @(posedge clk or negedge reset_n) begin
|
|
if (!reset_n) new_chirp_frame_d1 <= 0;
|
|
else new_chirp_frame_d1 <= new_chirp_frame;
|
|
end
|
|
wire frame_start_pulse = new_chirp_frame & ~new_chirp_frame_d1;
|
|
|
|
// ==============================================
|
|
// Main State Machine - FIXED
|
|
// ==============================================
|
|
reg [5:0] fft_sample_counter;
|
|
reg [9:0] processing_timeout;
|
|
|
|
// Memory write enable and data signals (extracted for BRAM inference)
|
|
reg mem_we;
|
|
reg [10:0] mem_waddr_r;
|
|
reg [DATA_WIDTH-1:0] mem_wdata_i, mem_wdata_q;
|
|
|
|
// Memory read data (registered for BRAM read latency)
|
|
reg [DATA_WIDTH-1:0] mem_rdata_i, mem_rdata_q;
|
|
|
|
`ifdef FORMAL
|
|
assign fv_state = state;
|
|
assign fv_mem_write_addr = mem_write_addr;
|
|
assign fv_mem_read_addr = mem_read_addr;
|
|
assign fv_write_range_bin = write_range_bin;
|
|
assign fv_write_chirp_index = write_chirp_index;
|
|
assign fv_read_range_bin = read_range_bin;
|
|
assign fv_read_doppler_index = read_doppler_index;
|
|
assign fv_processing_timeout = processing_timeout;
|
|
assign fv_frame_buffer_full = frame_buffer_full;
|
|
assign fv_mem_we = mem_we;
|
|
assign fv_mem_waddr_r = mem_waddr_r;
|
|
`endif
|
|
|
|
// ----------------------------------------------------------
|
|
// Separate always block for memory writes — NO async reset
|
|
// in sensitivity list, so Vivado can infer Block RAM.
|
|
// ----------------------------------------------------------
|
|
always @(posedge clk) begin
|
|
if (mem_we) begin
|
|
doppler_i_mem[mem_waddr_r] <= mem_wdata_i;
|
|
doppler_q_mem[mem_waddr_r] <= mem_wdata_q;
|
|
end
|
|
// Registered read — address driven by mem_read_addr from FSM
|
|
mem_rdata_i <= doppler_i_mem[mem_read_addr];
|
|
mem_rdata_q <= doppler_q_mem[mem_read_addr];
|
|
end
|
|
|
|
// ----------------------------------------------------------
|
|
// Main FSM — async reset for control registers only.
|
|
// Memory arrays are NOT touched here.
|
|
// ----------------------------------------------------------
|
|
always @(posedge clk or negedge reset_n) begin
|
|
if (!reset_n) begin
|
|
state <= S_IDLE;
|
|
write_range_bin <= 0;
|
|
write_chirp_index <= 0;
|
|
read_range_bin <= 0;
|
|
read_doppler_index <= 0;
|
|
frame_buffer_full <= 0;
|
|
doppler_valid <= 0;
|
|
fft_start <= 0;
|
|
fft_input_valid <= 0;
|
|
fft_input_last <= 0;
|
|
fft_sample_counter <= 0;
|
|
processing_timeout <= 0;
|
|
status <= 0;
|
|
chirps_received <= 0;
|
|
chirp_state <= 0;
|
|
mem_we <= 0;
|
|
mem_waddr_r <= 0;
|
|
mem_wdata_i <= 0;
|
|
mem_wdata_q <= 0;
|
|
mult_i <= 0;
|
|
mult_q <= 0;
|
|
fft_input_i <= 0;
|
|
fft_input_q <= 0;
|
|
doppler_output <= 0;
|
|
doppler_bin <= 0;
|
|
end else begin
|
|
doppler_valid <= 0;
|
|
fft_input_valid <= 0;
|
|
fft_input_last <= 0;
|
|
mem_we <= 0;
|
|
|
|
if (processing_timeout > 0) begin
|
|
processing_timeout <= processing_timeout - 1;
|
|
end
|
|
|
|
case (state)
|
|
S_IDLE: begin
|
|
if (frame_start_pulse) begin
|
|
// Start new frame
|
|
write_chirp_index <= 0;
|
|
write_range_bin <= 0;
|
|
frame_buffer_full <= 0;
|
|
chirps_received <= 0;
|
|
end
|
|
|
|
if (data_valid && !frame_buffer_full) begin
|
|
state <= S_ACCUMULATE;
|
|
// Write the first sample immediately (Bug #3 fix:
|
|
// previously this transition consumed data_valid
|
|
// without writing to BRAM)
|
|
mem_we <= 1;
|
|
mem_waddr_r <= mem_write_addr;
|
|
mem_wdata_i <= range_data[15:0];
|
|
mem_wdata_q <= range_data[31:16];
|
|
write_range_bin <= 1;
|
|
end
|
|
end
|
|
|
|
S_ACCUMULATE: begin
|
|
if (data_valid) begin
|
|
// Drive memory write signals (actual write in separate block)
|
|
mem_we <= 1;
|
|
mem_waddr_r <= mem_write_addr;
|
|
mem_wdata_i <= range_data[15:0];
|
|
mem_wdata_q <= range_data[31:16];
|
|
|
|
// Increment range bin
|
|
if (write_range_bin < RANGE_BINS - 1) begin
|
|
write_range_bin <= write_range_bin + 1;
|
|
end else begin
|
|
// Completed one chirp
|
|
write_range_bin <= 0;
|
|
write_chirp_index <= write_chirp_index + 1;
|
|
chirps_received <= chirps_received + 1;
|
|
|
|
// Check if frame is complete
|
|
if (write_chirp_index >= CHIRPS_PER_FRAME - 1) begin
|
|
frame_buffer_full <= 1;
|
|
chirp_state <= 0;
|
|
state <= S_PRE_READ;
|
|
read_range_bin <= 0;
|
|
read_doppler_index <= 0;
|
|
fft_sample_counter <= 0;
|
|
// Reset write pointers — no longer needed for
|
|
// this frame, and prevents stale overflow of
|
|
// write_chirp_index (which was just incremented
|
|
// past CHIRPS_PER_FRAME-1 above).
|
|
write_chirp_index <= 0;
|
|
write_range_bin <= 0;
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
S_PRE_READ: begin
|
|
// Prime the BRAM pipeline: present addr for chirp 0 of
|
|
// current read_range_bin. read_doppler_index is already 0.
|
|
// mem_read_addr = 0 * RANGE_BINS + read_range_bin.
|
|
// After this cycle, mem_rdata_i will hold data[chirp=0][rbin].
|
|
// Advance read_doppler_index to 1 so the NEXT BRAM read
|
|
// (which happens every cycle in the memory block) will
|
|
// fetch chirp 1.
|
|
read_doppler_index <= 1;
|
|
fft_start <= 1;
|
|
state <= S_LOAD_FFT;
|
|
end
|
|
|
|
S_LOAD_FFT: begin
|
|
fft_start <= 0;
|
|
|
|
// Pipeline alignment (after S_PRE_READ primed the BRAM):
|
|
//
|
|
// At cycle k (fft_sample_counter = k, k = 0..31):
|
|
// mem_rdata_i = data[chirp=k][rbin] (from addr presented
|
|
// LAST cycle: read_doppler_index was k)
|
|
// We compute: mult_i <= mem_rdata_i * window_coeff[k]
|
|
// We capture: fft_input_i <= (prev_mult_i + round) >>> 15
|
|
// We present: BRAM addr for chirp k+1 (for next cycle)
|
|
//
|
|
// For k=0: fft_input_i captures the stale mult_i (= 0 from
|
|
// reset or previous rbin's flush). This is WRONG
|
|
// for a naive implementation. Instead, we use a
|
|
// sub-counter approach:
|
|
//
|
|
// sub=0 (pre-multiply): We have mem_rdata_i = data[0].
|
|
// Compute mult_i = data[0] * window[0].
|
|
// Do NOT assert fft_input_valid yet.
|
|
// Present BRAM addr for chirp 1.
|
|
//
|
|
// sub=1..31 (normal): mem_rdata_i = data[sub].
|
|
// fft_input_i = (prev mult) >>> 15 -> VALID
|
|
// mult_i = data[sub] * window[sub]
|
|
// Present BRAM addr for chirp sub+1.
|
|
//
|
|
// sub=32 (flush): No new BRAM data needed.
|
|
// fft_input_i = (mult from sub=31) >>> 15 -> VALID, LAST
|
|
// Transition to S_FFT_WAIT.
|
|
//
|
|
// We reuse fft_sample_counter as the sub-counter (0..32).
|
|
|
|
if (fft_sample_counter == 0) begin
|
|
// Sub 0: pre-multiply. mem_rdata_i = data[chirp=0][rbin].
|
|
mult_i <= $signed(mem_rdata_i) *
|
|
$signed(window_coeff[0]);
|
|
mult_q <= $signed(mem_rdata_q) *
|
|
$signed(window_coeff[0]);
|
|
// Present BRAM addr for chirp 2 (sub=1 reads chirp 1
|
|
// from the BRAM read we triggered in S_PRE_READ;
|
|
// we need chirp 2 ready for sub=2).
|
|
read_doppler_index <= (2 < DOPPLER_FFT_SIZE) ? 2
|
|
: DOPPLER_FFT_SIZE - 1;
|
|
fft_sample_counter <= 1;
|
|
end else if (fft_sample_counter <= DOPPLER_FFT_SIZE) begin
|
|
// Sub 1..32
|
|
// Capture previous mult into fft_input
|
|
fft_input_i <= (mult_i + (1 << 14)) >>> 15;
|
|
fft_input_q <= (mult_q + (1 << 14)) >>> 15;
|
|
fft_input_valid <= 1;
|
|
|
|
if (fft_sample_counter == DOPPLER_FFT_SIZE) begin
|
|
// Sub 32: flush last sample
|
|
fft_input_last <= 1;
|
|
state <= S_FFT_WAIT;
|
|
fft_sample_counter <= 0;
|
|
processing_timeout <= 1000;
|
|
// Reset read index to prevent stale OOB address
|
|
// on BRAM read port during S_FFT_WAIT
|
|
read_doppler_index <= 0;
|
|
end else begin
|
|
// Sub 1..31: also compute new mult from current BRAM data
|
|
// mem_rdata_i = data[chirp = fft_sample_counter][rbin]
|
|
mult_i <= $signed(mem_rdata_i) *
|
|
$signed(window_coeff[fft_sample_counter]);
|
|
mult_q <= $signed(mem_rdata_q) *
|
|
$signed(window_coeff[fft_sample_counter]);
|
|
// Advance BRAM read to chirp fft_sample_counter+2
|
|
// (so data is ready two cycles later when we need it)
|
|
// Clamp to DOPPLER_FFT_SIZE-1 to prevent OOB memory read
|
|
if (fft_sample_counter + 2 < DOPPLER_FFT_SIZE)
|
|
read_doppler_index <= fft_sample_counter + 2;
|
|
else
|
|
read_doppler_index <= DOPPLER_FFT_SIZE - 1;
|
|
fft_sample_counter <= fft_sample_counter + 1;
|
|
end
|
|
end
|
|
end
|
|
|
|
S_FFT_WAIT: begin
|
|
if (fft_output_valid) begin
|
|
doppler_output <= {fft_output_q[15:0], fft_output_i[15:0]};
|
|
doppler_bin <= fft_sample_counter;
|
|
range_bin <= read_range_bin;
|
|
doppler_valid <= 1;
|
|
|
|
fft_sample_counter <= fft_sample_counter + 1;
|
|
|
|
if (fft_output_last) begin
|
|
state <= S_OUTPUT;
|
|
fft_sample_counter <= 0;
|
|
end
|
|
end
|
|
|
|
if (processing_timeout == 0) begin
|
|
state <= S_OUTPUT;
|
|
end
|
|
end
|
|
|
|
S_OUTPUT: begin
|
|
if (read_range_bin < RANGE_BINS - 1) begin
|
|
read_range_bin <= read_range_bin + 1;
|
|
read_doppler_index <= 0;
|
|
fft_sample_counter <= 0;
|
|
state <= S_PRE_READ;
|
|
end else begin
|
|
state <= S_IDLE;
|
|
frame_buffer_full <= 0;
|
|
end
|
|
end
|
|
|
|
endcase
|
|
|
|
status <= {state, frame_buffer_full};
|
|
end
|
|
end
|
|
|
|
// ==============================================
|
|
// FFT Module
|
|
// ==============================================
|
|
xfft_32 fft_inst (
|
|
.aclk(clk),
|
|
.aresetn(reset_n),
|
|
.s_axis_config_tdata(8'h01),
|
|
.s_axis_config_tvalid(fft_start),
|
|
.s_axis_config_tready(fft_ready),
|
|
.s_axis_data_tdata({fft_input_q, fft_input_i}),
|
|
.s_axis_data_tvalid(fft_input_valid),
|
|
.s_axis_data_tlast(fft_input_last),
|
|
.m_axis_data_tdata({fft_output_q, fft_output_i}),
|
|
.m_axis_data_tvalid(fft_output_valid),
|
|
.m_axis_data_tlast(fft_output_last),
|
|
.m_axis_data_tready(1'b1)
|
|
);
|
|
|
|
// ==============================================
|
|
// Status Outputs
|
|
// ==============================================
|
|
assign processing_active = (state != S_IDLE);
|
|
assign frame_complete = (state == S_IDLE && frame_buffer_full == 0);
|
|
|
|
|
|
endmodule |