Phase 1 hardware bring-up prep: ILA debug probes, CDC waivers, programming scripts
- Rename latency_buffer_2159 -> latency_buffer (module + file + all refs) - Add CDC waivers for 5 verified false-positive criticals to XDC - Add ILA debug probe insertion script (4 cores, 126 probe bits, 2 clock domains) - Add FPGA programming script (7-step flow with DONE pin verification) - Add ILA capture script (4 scenarios + health check, CSV export) - Add debug_ila.xdc with MARK_DEBUG fallback attributes - Full regression clean: 13/13 suites, 266/266 checks, 2048/2048 golden match
This commit is contained in:
@@ -0,0 +1,131 @@
|
||||
`timescale 1ns / 1ps
|
||||
|
||||
// latency_buffer.v — Parameterized BRAM-based latency/delay buffer
|
||||
// Renamed from latency_buffer_2159 to latency_buffer (module name was
|
||||
// inconsistent with the actual LATENCY=3187 parameter).
|
||||
module latency_buffer #(
|
||||
parameter DATA_WIDTH = 32,
|
||||
parameter LATENCY = 3187
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset_n,
|
||||
input wire [DATA_WIDTH-1:0] data_in,
|
||||
input wire valid_in,
|
||||
output wire [DATA_WIDTH-1:0] data_out,
|
||||
output wire valid_out
|
||||
);
|
||||
|
||||
// ========== FIXED PARAMETERS ==========
|
||||
localparam ADDR_WIDTH = 12; // Enough for 4096 entries (>2159)
|
||||
|
||||
// ========== FIXED LOGIC ==========
|
||||
(* ram_style = "block" *) reg [DATA_WIDTH-1:0] bram [0:4095];
|
||||
reg [ADDR_WIDTH-1:0] write_ptr;
|
||||
reg [ADDR_WIDTH-1:0] read_ptr;
|
||||
reg valid_out_reg;
|
||||
|
||||
// Delay counter to track when LATENCY cycles have passed
|
||||
reg [ADDR_WIDTH-1:0] delay_counter;
|
||||
reg buffer_has_data; // Flag when buffer has accumulated LATENCY samples
|
||||
|
||||
// ========== FIXED INITIALIZATION ==========
|
||||
integer k;
|
||||
initial begin
|
||||
for (k = 0; k < 4096; k = k + 1) begin
|
||||
bram[k] = {DATA_WIDTH{1'b0}};
|
||||
end
|
||||
write_ptr = 0;
|
||||
read_ptr = 0;
|
||||
valid_out_reg = 0;
|
||||
delay_counter = 0;
|
||||
buffer_has_data = 0;
|
||||
end
|
||||
|
||||
// ========== BRAM WRITE (synchronous only, no async reset) ==========
|
||||
// Xilinx Block RAMs do not support asynchronous resets.
|
||||
// Separating the BRAM write into its own always block avoids Synth 8-3391.
|
||||
// The initial block above handles power-on initialization for FPGA.
|
||||
always @(posedge clk) begin
|
||||
if (valid_in) begin
|
||||
bram[write_ptr] <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
// ========== CONTROL LOGIC (with async reset) ==========
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
if (!reset_n) begin
|
||||
write_ptr <= 0;
|
||||
read_ptr <= 0;
|
||||
valid_out_reg <= 0;
|
||||
delay_counter <= 0;
|
||||
buffer_has_data <= 0;
|
||||
end else begin
|
||||
// Default: no valid output
|
||||
valid_out_reg <= 0;
|
||||
|
||||
// ===== WRITE SIDE =====
|
||||
if (valid_in) begin
|
||||
// Increment write pointer (wrap at 4095)
|
||||
if (write_ptr == 4095) begin
|
||||
write_ptr <= 0;
|
||||
end else begin
|
||||
write_ptr <= write_ptr + 1;
|
||||
end
|
||||
|
||||
// Count how many samples we've written
|
||||
if (delay_counter < LATENCY) begin
|
||||
delay_counter <= delay_counter + 1;
|
||||
|
||||
// When we've written LATENCY samples, buffer is "primed"
|
||||
if (delay_counter == LATENCY - 1) begin
|
||||
buffer_has_data <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ===== READ SIDE =====
|
||||
// Only start reading after we have LATENCY samples in buffer
|
||||
if (buffer_has_data && valid_in) begin
|
||||
// Read pointer follows write pointer with LATENCY delay
|
||||
// Calculate: read_ptr = (write_ptr - LATENCY) mod 4096
|
||||
|
||||
// Handle wrap-around correctly
|
||||
if (write_ptr >= LATENCY) begin
|
||||
read_ptr <= write_ptr - LATENCY;
|
||||
end else begin
|
||||
// Wrap around: 4096 + write_ptr - LATENCY
|
||||
read_ptr <= 4096 + write_ptr - LATENCY;
|
||||
end
|
||||
|
||||
// Output is valid
|
||||
valid_out_reg <= 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ========== BRAM READ (synchronous — required for Block RAM inference) ==========
|
||||
// Xilinx Block RAMs physically register the read output. An async read
|
||||
// (assign data_out = bram[addr]) forces Vivado to use distributed LUTRAM
|
||||
// instead, wasting ~704 LUTs. Registering the read adds 1 cycle of latency,
|
||||
// compensated by the valid pipeline stage below.
|
||||
reg [DATA_WIDTH-1:0] data_out_reg;
|
||||
|
||||
always @(posedge clk) begin
|
||||
data_out_reg <= bram[read_ptr];
|
||||
end
|
||||
|
||||
// Pipeline valid_out_reg by 1 cycle to align with registered BRAM read
|
||||
reg valid_out_pipe;
|
||||
always @(posedge clk or negedge reset_n) begin
|
||||
if (!reset_n)
|
||||
valid_out_pipe <= 1'b0;
|
||||
else
|
||||
valid_out_pipe <= valid_out_reg;
|
||||
end
|
||||
|
||||
assign data_out = data_out_reg;
|
||||
assign valid_out = valid_out_pipe;
|
||||
|
||||
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user