Optimize async FIFO critical path and add CLI build system

Replace chained adder in rptr_empty/wptr_full with parallel
pre-computation (rbin+1, rbin+2) and mux selection. This reduces
the critical path from ~9 to ~5-6 logic levels, improving clk_pixel
Fmax from 120.8 to 166.7 MHz (+38%).

Add build.sh/build.tcl for headless CLI builds via gw_sh with
timing-driven PnR and increased placement/routing effort.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Steve Markgraf 2026-03-07 23:37:22 +01:00
parent 97f0c4ffd1
commit 040bdb24a4
4 changed files with 106 additions and 16 deletions

23
build.sh Executable file
View file

@ -0,0 +1,23 @@
#!/bin/bash
# Build script for hsdaoh FPGA projects
# Usage: ./build.sh <project_file.gprj>
# e.g.: ./build.sh hsdaoh_nano20k_test.gprj
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# Auto-detect Gowin installation
GOWIN_DIR="${GOWIN_DIR:-$HOME/tools/Gowin_V1.9.11.03_Education_Linux/IDE}"
if [ ! -f "$GOWIN_DIR/bin/gw_sh" ]; then
echo "ERROR: gw_sh not found at $GOWIN_DIR/bin/gw_sh"
echo "Set GOWIN_DIR to your Gowin IDE directory"
exit 1
fi
export LD_LIBRARY_PATH="$GOWIN_DIR/lib/:$LD_LIBRARY_PATH"
export QT_QPA_PLATFORM=offscreen
echo "Using Gowin IDE: $GOWIN_DIR"
exec "$GOWIN_DIR/bin/gw_sh" "$SCRIPT_DIR/build.tcl" "$@"

37
build.tcl Normal file
View file

@ -0,0 +1,37 @@
# build.tcl - Command-line build script for hsdaoh FPGA projects
# Usage: gw_sh build.tcl <project_file.gprj>
# e.g.: gw_sh build.tcl hsdaoh_nano20k_test.gprj
if {$argc < 1} {
puts "Usage: gw_sh build.tcl <project_file.gprj>"
exit 1
}
set project_file [lindex $argv 0]
if {![file exists $project_file]} {
puts "ERROR: Project file '$project_file' not found"
exit 1
}
puts "============================================"
puts "Building: $project_file"
puts "============================================"
open_project $project_file
# Enable timing-driven PnR and generate text timing report
set_option -timing_driven 1
set_option -gen_text_timing_rpt 1
set_option -show_all_warn 1
set_option -print_all_synthesis_warning 1
# Increase PnR effort for better timing closure
set_option -place_option 1
set_option -route_option 1
run all
puts "============================================"
puts "Build complete!"
puts "============================================"

View file

@ -2,6 +2,10 @@
// distributed under the mit license
// https://opensource.org/licenses/mit-license.php
// Optimized: replaced chained adder (rbin+inc then +1) with parallel
// pre-computation of rbin+1 and rbin+2, selected via mux. This reduces
// the arempty critical path from ~9 to ~5-6 logic levels.
`timescale 1 ns / 1 ps
`default_nettype none
@ -21,9 +25,28 @@ module rptr_empty
);
reg [ADDRSIZE:0] rbin;
wire [ADDRSIZE:0] rgraynext, rbinnext, rgraynextm1;
wire [ADDRSIZE:0] rgraynext, rbinnext;
wire arempty_val, rempty_val;
// Pre-compute incremented values from registered rbin (parallel, not chained)
wire [ADDRSIZE:0] rbin_p1 = rbin + 1'b1;
wire [ADDRSIZE:0] rbin_p2 = rbin + 2'd2;
wire do_read = rinc & ~rempty;
// Gray code conversions from pre-computed values
wire [ADDRSIZE:0] rgray_p1 = (rbin_p1 >> 1) ^ rbin_p1;
wire [ADDRSIZE:0] rgray_p2 = (rbin_p2 >> 1) ^ rbin_p2;
// Select based on whether a read is happening this cycle
// When do_read: rbinnext = rbin+1, rgraynext = gray(rbin+1)
// When !do_read: rbinnext = rbin, rgraynext = gray(rbin) = rptr (already registered)
assign rbinnext = do_read ? rbin_p1 : rbin;
assign rgraynext = do_read ? rgray_p1 : rptr;
// "Almost empty" = will be empty after one more read from projected next position
// rgraynextm1 = gray(rbinnext + 1) = gray(do_read ? rbin+2 : rbin+1)
wire [ADDRSIZE:0] rgraynextm1 = do_read ? rgray_p2 : rgray_p1;
//-------------------
// GRAYSTYLE2 pointer
//-------------------
@ -37,10 +60,7 @@ module rptr_empty
end
// Memory read-address pointer (okay to use binary to address memory)
assign raddr = rbin[ADDRSIZE-1:0];
assign rbinnext = rbin + (rinc & ~rempty);
assign rgraynext = (rbinnext >> 1) ^ rbinnext;
assign rgraynextm1 = ((rbinnext + 1'b1) >> 1) ^ (rbinnext + 1'b1);
assign raddr = rbin[ADDRSIZE-1:0];
//---------------------------------------------------------------
// FIFO empty when the next rptr == synchronized wptr or on reset

View file

@ -2,6 +2,10 @@
// distributed under the mit license
// https://opensource.org/licenses/mit-license.php
// Optimized: replaced chained adder (wbin+inc then +1) with parallel
// pre-computation of wbin+1 and wbin+2, selected via mux. Same approach
// as rptr_empty optimization.
`timescale 1 ns / 1 ps
`default_nettype none
@ -21,9 +25,25 @@ module wptr_full
);
reg [ADDRSIZE:0] wbin;
wire [ADDRSIZE:0] wgraynext, wbinnext, wgraynextp1;
wire [ADDRSIZE:0] wgraynext, wbinnext;
wire awfull_val, wfull_val;
// Pre-compute incremented values from registered wbin (parallel, not chained)
wire [ADDRSIZE:0] wbin_p1 = wbin + 1'b1;
wire [ADDRSIZE:0] wbin_p2 = wbin + 2'd2;
wire do_write = winc & ~wfull;
// Gray code conversions from pre-computed values
wire [ADDRSIZE:0] wgray_p1 = (wbin_p1 >> 1) ^ wbin_p1;
wire [ADDRSIZE:0] wgray_p2 = (wbin_p2 >> 1) ^ wbin_p2;
// Select based on whether a write is happening this cycle
assign wbinnext = do_write ? wbin_p1 : wbin;
assign wgraynext = do_write ? wgray_p1 : wptr; // wptr = gray(wbin)
// "Almost full" look-ahead
wire [ADDRSIZE:0] wgraynextp1 = do_write ? wgray_p2 : wgray_p1;
// GRAYSTYLE2 pointer
always @(posedge wclk or negedge wrst_n) begin
@ -36,16 +56,6 @@ module wptr_full
// Memory write-address pointer (okay to use binary to address memory)
assign waddr = wbin[ADDRSIZE-1:0];
assign wbinnext = wbin + (winc & ~wfull);
assign wgraynext = (wbinnext >> 1) ^ wbinnext;
assign wgraynextp1 = ((wbinnext + 1'b1) >> 1) ^ (wbinnext + 1'b1);
//------------------------------------------------------------------
// Simplified version of the three necessary full-tests:
// assign wfull_val=((wgnext[ADDRSIZE] !=wq2_rptr[ADDRSIZE] ) &&
// (wgnext[ADDRSIZE-1] !=wq2_rptr[ADDRSIZE-1]) &&
// (wgnext[ADDRSIZE-2:0]==wq2_rptr[ADDRSIZE-2:0]));
//------------------------------------------------------------------
assign wfull_val = (wgraynext == {~wq2_rptr[ADDRSIZE:ADDRSIZE-1],wq2_rptr[ADDRSIZE-2:0]});
assign awfull_val = (wgraynextp1 == {~wq2_rptr[ADDRSIZE:ADDRSIZE-1],wq2_rptr[ADDRSIZE-2:0]});