mirror of
https://github.com/steve-m/hsdaoh-rp2350.git
synced 2025-12-10 07:44:39 +01:00
improve performance by using three DMA channels
Previously when using an HSTX clock > sysclk/2, there was a DMA underrun from time to time, which limited the achievable data rate to around 75 MByte/s. By using a third DMA channel and employing some trickery to be still able to use the DMA CRC sniffer, we now can achieve 128 MByte/s (or even more) by using sysclk/1 as HSTX clock. The counter example has been updated to generate those ~128 MByte/s.
This commit is contained in:
parent
ae01224d0d
commit
bae93b3a87
4 changed files with 61 additions and 52 deletions
|
|
@ -1,7 +1,7 @@
|
||||||
# hsdaoh-rp2350 - High Speed Data Acquisition over HDMI
|
# hsdaoh-rp2350 - High Speed Data Acquisition over HDMI
|
||||||
## Stream up to 75 MByte/s from your Raspberry Pi Pico2 to your PC
|
## Stream up to 128 MByte/s from your Raspberry Pi Pico2 to your PC
|
||||||
|
|
||||||
Using $5 USB3 HDMI capture sticks based on the MacroSilicon MS2130, this project allows to stream out up to 75 MByte/s of real time data from an RP2350 (with overclocking) to a host computer with USB3.
|
Using $5 USB3 HDMI capture sticks based on the MacroSilicon MS2130, this project allows to stream out up to 128 MByte/s of real time data from an RP2350 (with overclocking) to a host computer with USB3.
|
||||||
For more information and the host library, see the [main repository](https://github.com/steve-m/hsdaoh) and the [talk at OsmoDevcon '24](https://media.ccc.de/v/osmodevcon2024-200-low-cost-high-speed-data-acquisition-over-hdmi).
|
For more information and the host library, see the [main repository](https://github.com/steve-m/hsdaoh) and the [talk at OsmoDevcon '24](https://media.ccc.de/v/osmodevcon2024-200-low-cost-high-speed-data-acquisition-over-hdmi).
|
||||||
|
|
||||||

|

|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@
|
||||||
#include "picohsdaoh.h"
|
#include "picohsdaoh.h"
|
||||||
#include "counter.pio.h"
|
#include "counter.pio.h"
|
||||||
|
|
||||||
#define SYS_CLK 250000
|
#define SYS_CLK 336000
|
||||||
|
|
||||||
#define DMACH_PIO_PING 0
|
#define DMACH_PIO_PING 0
|
||||||
#define DMACH_PIO_PONG 1
|
#define DMACH_PIO_PONG 1
|
||||||
|
|
@ -116,12 +116,14 @@ void init_pio_input(void)
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
|
vreg_set_voltage(VREG_VOLTAGE_MAX);
|
||||||
|
sleep_ms(1);
|
||||||
set_sys_clock_khz(SYS_CLK, true);
|
set_sys_clock_khz(SYS_CLK, true);
|
||||||
|
|
||||||
/* set HSTX clock to sysclk/2 */
|
/* set HSTX clock to sysclk/1 */
|
||||||
hw_write_masked(
|
hw_write_masked(
|
||||||
&clocks_hw->clk[clk_hstx].div,
|
&clocks_hw->clk[clk_hstx].div,
|
||||||
2 << CLOCKS_CLK_HSTX_DIV_INT_LSB,
|
1 << CLOCKS_CLK_HSTX_DIV_INT_LSB,
|
||||||
CLOCKS_CLK_HSTX_DIV_INT_BITS
|
CLOCKS_CLK_HSTX_DIV_INT_BITS
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ static inline void counter_program_init(PIO pio, uint sm, uint offset)
|
||||||
// disable the TX FIFO to make the RX FIFO deeper.
|
// disable the TX FIFO to make the RX FIFO deeper.
|
||||||
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
|
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_RX);
|
||||||
|
|
||||||
sm_config_set_clkdiv(&c, 4.f);
|
sm_config_set_clkdiv(&c, 1.75f);
|
||||||
|
|
||||||
// Load our configuration, and start the program from the beginning
|
// Load our configuration, and start the program from the beginning
|
||||||
pio_sm_init(pio, sm, offset, &c);
|
pio_sm_init(pio, sm, offset, &c);
|
||||||
|
|
|
||||||
|
|
@ -152,13 +152,16 @@ void hsdaoh_update_head(int head)
|
||||||
fifo_head = head;
|
fifo_head = head;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DMACH_HSTX_PING 14
|
#define DMACH_HSTX_START 13
|
||||||
#define DMACH_HSTX_PONG 15
|
#define DMACH_HSTX_COUNT 3
|
||||||
#define CRC16_INIT 0xffff
|
#define CRC16_INIT 0xffff
|
||||||
|
|
||||||
static bool hstx_dma_pong = false;
|
static uint8_t hstx_dma_curchan = 0;
|
||||||
static uint v_scanline = 2;
|
static uint16_t saved_crc;
|
||||||
|
static uint v_scanline = 3;
|
||||||
static bool vactive_cmdlist_posted = false;
|
static bool vactive_cmdlist_posted = false;
|
||||||
|
static uint8_t dma_sniff_pipelined_ch = 0;
|
||||||
|
static bool dma_sniff_pipelined_disable = false;
|
||||||
|
|
||||||
enum crc_config {
|
enum crc_config {
|
||||||
CRC_NONE, /* No CRC, just 16 bit idle counter */
|
CRC_NONE, /* No CRC, just 16 bit idle counter */
|
||||||
|
|
@ -174,16 +177,32 @@ typedef struct
|
||||||
uint8_t crc_config;
|
uint8_t crc_config;
|
||||||
} __attribute__((packed, aligned(1))) metadata_t;
|
} __attribute__((packed, aligned(1))) metadata_t;
|
||||||
|
|
||||||
metadata_t metadata = (metadata_t) { .magic = 0xda7acab1, .crc_config = CRC16_1_LINE };
|
metadata_t metadata = (metadata_t) { .magic = 0xda7acab1, .crc_config = CRC16_2_LINE };
|
||||||
|
|
||||||
/* HSTX DMA IRQ handler, reconfigures the channel that just completed while
|
/* HSTX DMA IRQ handler, reconfigures the channel that just completed while
|
||||||
* ther other channel is currently busy */
|
* ther other channel is currently busy */
|
||||||
void __scratch_x("") hstx_dma_irq_handler()
|
void __scratch_x("") hstx_dma_irq_handler()
|
||||||
{
|
{
|
||||||
uint ch_num = hstx_dma_pong ? DMACH_HSTX_PONG : DMACH_HSTX_PING;
|
/* This is a bit tricky and time critical, we pipeline three DMA transfers to avoid an
|
||||||
|
* underrun, but the DMA sniffer that is used to calculate the CRC cannot be pipelined
|
||||||
|
* and needs to be reconfigured right before the DMA transfer starts - so we have to
|
||||||
|
* do that as fast as possible during blanking, before the next DMA transfer with
|
||||||
|
* active video data, which is right about to start. */
|
||||||
|
if (dma_sniff_pipelined_ch) {
|
||||||
|
/* (re)initialize DMA CRC sniffer */
|
||||||
|
saved_crc = dma_sniffer_get_data_accumulator() & 0xffff;
|
||||||
|
dma_sniffer_set_data_accumulator(CRC16_INIT);
|
||||||
|
dma_sniffer_enable(dma_sniff_pipelined_ch, DMA_SNIFF_CTRL_CALC_VALUE_CRC16, true);
|
||||||
|
dma_sniff_pipelined_ch = 0;
|
||||||
|
} else if (dma_sniff_pipelined_disable) {
|
||||||
|
dma_sniffer_disable();
|
||||||
|
dma_sniff_pipelined_disable = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint ch_num = hstx_dma_curchan + DMACH_HSTX_START;
|
||||||
|
hstx_dma_curchan = (hstx_dma_curchan + 1) % DMACH_HSTX_COUNT;
|
||||||
dma_channel_hw_t *ch = &dma_hw->ch[ch_num];
|
dma_channel_hw_t *ch = &dma_hw->ch[ch_num];
|
||||||
dma_hw->intr = 1u << ch_num;
|
dma_hw->intr = 1u << ch_num;
|
||||||
hstx_dma_pong = !hstx_dma_pong;
|
|
||||||
|
|
||||||
/* for raw commands we need to use 32 bit DMA transfers */
|
/* for raw commands we need to use 32 bit DMA transfers */
|
||||||
ch->al1_ctrl = (ch->al1_ctrl & ~DMA_CH0_CTRL_TRIG_DATA_SIZE_BITS) | (DMA_SIZE_32 << DMA_CH0_CTRL_TRIG_DATA_SIZE_LSB);
|
ch->al1_ctrl = (ch->al1_ctrl & ~DMA_CH0_CTRL_TRIG_DATA_SIZE_BITS) | (DMA_SIZE_32 << DMA_CH0_CTRL_TRIG_DATA_SIZE_LSB);
|
||||||
|
|
@ -191,7 +210,7 @@ void __scratch_x("") hstx_dma_irq_handler()
|
||||||
if (v_scanline >= MODE_V_FRONT_PORCH && v_scanline < (MODE_V_FRONT_PORCH + MODE_V_SYNC_WIDTH)) {
|
if (v_scanline >= MODE_V_FRONT_PORCH && v_scanline < (MODE_V_FRONT_PORCH + MODE_V_SYNC_WIDTH)) {
|
||||||
/* on first line of actual VSYNC, output data packet */
|
/* on first line of actual VSYNC, output data packet */
|
||||||
if (v_scanline == MODE_V_FRONT_PORCH) {
|
if (v_scanline == MODE_V_FRONT_PORCH) {
|
||||||
dma_sniffer_disable();
|
dma_sniff_pipelined_disable = true;
|
||||||
ch->read_addr = (uintptr_t)info_p;
|
ch->read_addr = (uintptr_t)info_p;
|
||||||
ch->transfer_count = info_len;
|
ch->transfer_count = info_len;
|
||||||
|
|
||||||
|
|
@ -235,12 +254,10 @@ void __scratch_x("") hstx_dma_irq_handler()
|
||||||
next_line[RBUF_SLICE_LEN - 1] |= ((met_p[cur_active_line/2] & 0x0f) << 12);
|
next_line[RBUF_SLICE_LEN - 1] |= ((met_p[cur_active_line/2] & 0x0f) << 12);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* on the second last word of the line, insert the CRC16 of the entire previous line */
|
/* on the second last word of the line, insert the CRC16 of the entire line before the last line */
|
||||||
next_line[RBUF_SLICE_LEN - 2] = dma_sniffer_get_data_accumulator() & 0xffff;
|
next_line[RBUF_SLICE_LEN - 2] = saved_crc;
|
||||||
|
|
||||||
/* (re)initialize DMA CRC sniffer */
|
dma_sniff_pipelined_ch = ch_num;
|
||||||
dma_sniffer_set_data_accumulator(CRC16_INIT);
|
|
||||||
dma_sniffer_enable(ch_num, DMA_SNIFF_CTRL_CALC_VALUE_CRC16, true);
|
|
||||||
|
|
||||||
/* switch to 16 bit DMA transfer size for the actual data,
|
/* switch to 16 bit DMA transfer size for the actual data,
|
||||||
* because for YCbCr422 TMDS channel 0 is unused */
|
* because for YCbCr422 TMDS channel 0 is unused */
|
||||||
|
|
@ -266,7 +283,7 @@ void core1_entry()
|
||||||
void hsdaoh_start(void)
|
void hsdaoh_start(void)
|
||||||
{
|
{
|
||||||
multicore_launch_core1(core1_entry);
|
multicore_launch_core1(core1_entry);
|
||||||
dma_channel_start(DMACH_HSTX_PING);
|
dma_channel_start(DMACH_HSTX_START);
|
||||||
}
|
}
|
||||||
|
|
||||||
void hsdaoh_init(uint16_t *ringbuf)//struct hsdaoh_inst *inst, uint16_t *ringbuf)
|
void hsdaoh_init(uint16_t *ringbuf)//struct hsdaoh_inst *inst, uint16_t *ringbuf)
|
||||||
|
|
@ -338,38 +355,28 @@ void hsdaoh_init(uint16_t *ringbuf)//struct hsdaoh_inst *inst, uint16_t *ringbuf
|
||||||
for (int i = 12; i <= 19; ++i)
|
for (int i = 12; i <= 19; ++i)
|
||||||
gpio_set_function(i, 0); // HSTX
|
gpio_set_function(i, 0); // HSTX
|
||||||
|
|
||||||
/* Both channels are set up identically, to transfer a whole scanline and
|
/* All channels are set up identically, to transfer a whole scanline and
|
||||||
* then chain to the opposite channel. Each time a channel finishes, we
|
* then chain to the net channel. Each time a channel finishes, we
|
||||||
* reconfigure the one that just finished, meanwhile the opposite channel
|
* reconfigure the one that just finished, meanwhile another channel
|
||||||
* is already making progress. */
|
* is already making progress. */
|
||||||
dma_channel_config c;
|
for (int i = 0; i < DMACH_HSTX_COUNT; i++) {
|
||||||
c = dma_channel_get_default_config(DMACH_HSTX_PING);
|
dma_channel_config c;
|
||||||
channel_config_set_chain_to(&c, DMACH_HSTX_PONG);
|
c = dma_channel_get_default_config(DMACH_HSTX_START + i);
|
||||||
channel_config_set_dreq(&c, DREQ_HSTX);
|
int chain_to_ch = DMACH_HSTX_START + ((i + 1) % DMACH_HSTX_COUNT);
|
||||||
channel_config_set_sniff_enable(&c, true);
|
channel_config_set_chain_to(&c, chain_to_ch);
|
||||||
dma_channel_configure(
|
channel_config_set_dreq(&c, DREQ_HSTX);
|
||||||
DMACH_HSTX_PING,
|
channel_config_set_sniff_enable(&c, true);
|
||||||
&c,
|
dma_channel_configure(
|
||||||
&hstx_fifo_hw->fifo,
|
DMACH_HSTX_START + i,
|
||||||
vblank_line_vsync_off,
|
&c,
|
||||||
count_of(vblank_line_vsync_off),
|
&hstx_fifo_hw->fifo,
|
||||||
false
|
vblank_line_vsync_off,
|
||||||
);
|
count_of(vblank_line_vsync_off),
|
||||||
c = dma_channel_get_default_config(DMACH_HSTX_PONG);
|
false
|
||||||
channel_config_set_chain_to(&c, DMACH_HSTX_PING);
|
);
|
||||||
channel_config_set_dreq(&c, DREQ_HSTX);
|
dma_hw->ints3 |= 1u << (DMACH_HSTX_START + i);
|
||||||
channel_config_set_sniff_enable(&c, true);
|
dma_hw->inte3 |= 1u << (DMACH_HSTX_START + i);
|
||||||
dma_channel_configure(
|
}
|
||||||
DMACH_HSTX_PONG,
|
|
||||||
&c,
|
|
||||||
&hstx_fifo_hw->fifo,
|
|
||||||
vblank_line_vsync_off,
|
|
||||||
count_of(vblank_line_vsync_off),
|
|
||||||
false
|
|
||||||
);
|
|
||||||
|
|
||||||
dma_hw->ints3 = (1u << DMACH_HSTX_PING) | (1u << DMACH_HSTX_PONG);
|
|
||||||
dma_hw->inte3 = (1u << DMACH_HSTX_PING) | (1u << DMACH_HSTX_PONG);
|
|
||||||
|
|
||||||
/* give the DMA the priority over the CPU on the bus */
|
/* give the DMA the priority over the CPU on the bus */
|
||||||
bus_ctrl_hw->priority = BUSCTRL_BUS_PRIORITY_DMA_W_BITS | BUSCTRL_BUS_PRIORITY_DMA_R_BITS;
|
bus_ctrl_hw->priority = BUSCTRL_BUS_PRIORITY_DMA_W_BITS | BUSCTRL_BUS_PRIORITY_DMA_R_BITS;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue