Nios® II Embedded Design Suite (EDS)
Support for Embedded Development Tools, Processors (SoCs and Nios® II processor), Embedded Development Suites (EDSs), Boot and Configuration, Operating Systems, C and C++
Announcements
Intel Support hours are Monday-Fridays, 8am-5pm PST, except Holidays. Thanks to our community members who provide support during our down time or before we get to your questions. We appreciate you!

Need Forum Guidance? Click here
Search our FPGA Knowledge Articles here.

MMC SPI Core with FIFO

Altera_Forum
Honored Contributor II
930 Views

Hi, 

 

The MMC SD card access using Altera's normal spi IP core is quite slow, because the core has only one byte Tx and Rx buffer, so the driver can send and receive only one byte at once. The simple way to improve this situation is adding FIFO buffers. 

To upload the modified IP directly will conflict the 'Copyright' of Altera, so I only show the idea of tampering and if necessary, please do it by yourself. 

At first, open the generated spi core 'mmc_spi.v' by SOPC builder with your editor and change the attribute of the signal RRDY like 

 

//reg RRDY; wire RRDY; and add some variables like 

 

wire tx_fifo_data_out; wire tx_fifo_count; wire tx_fifo_empty; wire tx_fifo_full; wire rx_fifo_data_out; wire rx_fifo_count; wire rx_fifo_empty; wire rx_fifo_full;

 

Also add FIFO buffers in somewhere of the spi module like 

 

mmc_spi_tx_fifio the_mmc_spi_tx_fifio ( .aclr (~reset_n), .clock (clk), .data (data_from_cpu), .rdreq (write_shift_reg), .sclr (status_wr_strobe), .wrreq (write_tx_holding), .empty (tx_fifo_empty), .full (tx_fifo_full), .q (tx_fifo_data_out), .usedw (tx_fifo_count) ); mmc_spi_rx_fifo the_mmc_spi_rx_fifo ( .aclr (~reset_n), .clock (clk), .data (shift_reg), .rdreq (data_rd_strobe), .sclr (status_wr_strobe), .wrreq ((state == 17) & (slowcount == 0)), .empty (rx_fifo_empty), .full (rx_fifo_full), .q (rx_fifo_data_out), .usedw (rx_fifo_count) ); (the files of these buffers are included in the attached gz file) and modify the logic like 

 

// assign TMT = ~transmitting & ~tx_holding_primed; assign TMT = ~transmitting & tx_fifo_empty; assign p1_data_to_cpu = ((mem_addr == 2))? spi_status : ((mem_addr == 3))? spi_control : ((mem_addr == 6))? endofpacketvalue_reg : ((mem_addr == 5))? spi_slave_select_reg : // rx_holding_reg; {rx_fifo_count & ~rx_fifo_empty, rx_fifo_data_out}; // As long as there's an empty spot somewhere, //it's safe to write data. // assign TRDY = ~(transmitting & tx_holding_primed); assign TRDY = ~tx_fifo_count; // Enable write to shift register. // assign write_shift_reg = tx_holding_primed & ~transmitting; assign write_shift_reg = ~tx_fifo_empty & ~transmitting; always @(posedge clk or negedge reset_n) begin if (reset_n == 0) begin shift_reg <= 0; rx_holding_reg <= 0; EOP <= 0; // RRDY <= 0; ROE <= 0; TOE <= 0; tx_holding_reg <= 0; tx_holding_primed <= 0; transmitting <= 0; SCLK_reg <= 0; MISO_reg <= 0; end if (write_shift_reg) begin // shift_reg <= tx_holding_reg; shift_reg <= tx_fifo_data_out; transmitting <= 1; end if (write_shift_reg & ~write_tx_holding) // Clear tx_holding_primed tx_holding_primed <= 0; // if (data_rd_strobe) // // On data read, clear the RRDY bit. // RRDY <= 0; if (status_wr_strobe) begin // On status write, clear all status bits (ignore the data). EOP <= 0; // RRDY <= 0; ROE <= 0; TOE <= 0; end if (slowclock) begin if (state == 17) begin transmitting <= 0; // RRDY <= 1; rx_holding_reg <= shift_reg; SCLK_reg <= 0; // if (RRDY) if (rx_fifo_full) ROE <= 1; end

 

The register mapping is changed like 

 

//Register map: //addr register type //0 rdata count | read data r //1 write data w //2 status r/w //3 control r/w //4 reserved //5 slave-enable r/w //6 end-of-packet-value r/w

 

Note that the upper 8 bits of register 0 indicates the received data's byte counts. 

 

This improved core can be driven by the normal Thomas Chou's driver, but to induce the effective functionality of FIFOs, we must rewrite it. Unfortunately, the driver must be shared with normal spi cores (which have non-FIFO buffers), thus I add a new spi mode ' SPI_FIFO' 

struct spi_device { struct device dev; struct spi_master *master; u32 max_speed_hz; u8 chip_select; u8 mode;# define SPI_CPHA 0x01 /* clock phase */# define SPI_CPOL 0x02 /* clock polarity */# define SPI_MODE_0 (0|0) /* (original MicroWire) */# define SPI_MODE_1 (0|SPI_CPHA)# define SPI_MODE_2 (SPI_CPOL|0)# define SPI_MODE_3 (SPI_CPOL|SPI_CPHA)# define SPI_CS_HIGH 0x04 /* chipselect active high? */# define SPI_LSB_FIRST 0x08 /* per-word bits-on-wire */# define SPI_3WIRE 0x10 /* SI/SO signals shared */# define SPI_LOOP 0x20 /* loopback mode */# define SPI_FIFO 0x40 /* FIFO mode */ u8 bits_per_word; int irq; in the file 'spi.h' and add a code in the file 'mmc_spi.c' to set this. For the details, please refer the attached files. 

By these improvements, the peak transfer rate goes up to 60 times faster than normal one on my NEEK. 

 

Kazu
0 Kudos
2 Replies
Altera_Forum
Honored Contributor II
115 Views

Hi, 

 

I forgot to mention that the signal 'TRDY' is treated as the threshold of 'tx' FIFO buffer. So we need special attentions around 'TRDY', for example, 

 

// assign readyfordata = TRDY; assign readyfordata = ~tx_fifo_full; // assign write_tx_holding = data_wr_strobe & TRDY; assign write_tx_holding = data_wr_strobe; // if (data_wr_strobe & ~TRDY) if (data_wr_strobe & tx_fifo_full) // You wrote when I wasn't ready. TOE <= 1; etc. 

 

And there was a bug in the driver 'altspi.c'. 

 

# ifdef MMC_SPI_FIFO if (hw->mode == SPI_FIFO) { if (hw->txd_count < hw->len) { if (spsta & ALTERA_SPI_STATUS_TRDY_MSK) { txd_limit = ((hw->len - hw->txd_count) > MMC_SPI_FIFO_DEPTH / 2) ? hw->txd_count + MMC_SPI_FIFO_DEPTH / 2 : hw->len; for (count = hw->txd_count; count < txd_limit; count++, hw->txd_count++) writel(hw_txbyte(hw, count), hw->base + ALTERA_SPI_TXDATA); } else { count = hw->txd_count++; // <--- Here! writel(hw_txbyte(hw, count), hw->base + ALTERA_SPI_TXDATA); } } else { if (hw->count == hw->len) { complete(&hw->done); } } } else { Kazu
Altera_Forum
Honored Contributor II
115 Views

Thank you for the really informative post! Several years and versions of Quartus have gone by and I was hoping you might be willing to update the instructions for a current system? 

 

I created a project in Quartus and then a working Nios system in QSYS with a spi port. I then added the two files from your tgz into the submodules directory and edited my spi.v file as per your instructions to generate the following diff: 

@@ -85,3 +85,3 @@ module ft3_proto_qsys_fpga_spi0 ( reg ROE; - reg RRDY; + wire RRDY; wire SCLK; @@ -138,2 +138,39 @@ module ft3_proto_qsys_fpga_spi0 ( wire write_tx_holding; + + wire tx_fifo_data_out; + wire tx_fifo_count; + wire tx_fifo_empty; + wire tx_fifo_full; + + wire rx_fifo_data_out; + wire rx_fifo_count; + wire rx_fifo_empty; + wire rx_fifo_full; + + mmc_spi_tx_fifio the_mmc_spi_tx_fifio ( + .aclr (~reset_n), + .clock (clk), + .data (data_from_cpu), + .rdreq (write_shift_reg), + .sclr (status_wr_strobe), + .wrreq (write_tx_holding), + .empty (tx_fifo_empty), + .full (tx_fifo_full), + .q (tx_fifo_data_out), + .usedw (tx_fifo_count) + ); + +mmc_spi_rx_fifo the_mmc_spi_rx_fifo ( + .aclr (~reset_n), + .clock (clk), + .data (shift_reg), + .rdreq (data_rd_strobe), + .sclr (status_wr_strobe), + .wrreq ((state == 17) & (slowcount == 0)), + .empty (rx_fifo_empty), + .full (rx_fifo_full), + .q (rx_fifo_data_out), + .usedw (rx_fifo_count) + ); + //spi_control_port, which is an e_avalon_slave @@ -185,3 +222,4 @@ module ft3_proto_qsys_fpga_spi0 ( assign endofpacketvalue_wr_strobe = wr_strobe & (mem_addr == 6); - assign TMT = ~transmitting & ~tx_holding_primed; +// assign TMT = ~transmitting & ~tx_holding_primed; + assign TMT = ~transmitting & tx_fifo_empty; assign E = ROE | TOE; @@ -192,3 +230,4 @@ module ft3_proto_qsys_fpga_spi0 ( // Ready to accept streaming data. - assign readyfordata = TRDY; +// assign readyfordata = TRDY; + assign readyfordata = ~tx_fifo_full; @@ -286,3 +325,4 @@ module ft3_proto_qsys_fpga_spi0 ( ((mem_addr == 5))? spi_slave_select_reg : - rx_holding_reg; +// rx_holding_reg; + {rx_fifo_count & ~rx_fifo_empty, rx_fifo_data_out}; @@ -324,9 +364,12 @@ module ft3_proto_qsys_fpga_spi0 ( //it's safe to write data. - assign TRDY = ~(transmitting & tx_holding_primed); +// assign TRDY = ~(transmitting & tx_holding_primed); + assign TRDY = ~tx_fifo_count; // Enable write to tx_holding_register. - assign write_tx_holding = data_wr_strobe & TRDY; +// assign write_tx_holding = data_wr_strobe & TRDY; + assign write_tx_holding = data_wr_strobe; // Enable write to shift register. - assign write_shift_reg = tx_holding_primed & ~transmitting; +// assign write_shift_reg = tx_holding_primed & ~transmitting; + assign write_shift_reg = ~tx_fifo_empty & ~transmitting; @@ -339,3 +382,3 @@ module ft3_proto_qsys_fpga_spi0 ( EOP <= 0; - RRDY <= 0; +// RRDY <= 0; ROE <= 0; @@ -355,3 +398,4 @@ module ft3_proto_qsys_fpga_spi0 ( end - if (data_wr_strobe & ~TRDY) +// if (data_wr_strobe & ~TRDY) + if (data_wr_strobe & tx_fifo_full) // You wrote when I wasn't ready. @@ -364,3 +408,4 @@ module ft3_proto_qsys_fpga_spi0 ( begin - shift_reg <= tx_holding_reg; +// shift_reg <= tx_holding_reg; + shift_reg <= tx_fifo_data_out; transmitting <= 1; @@ -371,5 +416,5 @@ module ft3_proto_qsys_fpga_spi0 ( - if (data_rd_strobe) - // On data read, clear the RRDY bit. - RRDY <= 0; +// if (data_rd_strobe) +// // On data read, clear the RRDY bit. +// RRDY <= 0; @@ -380,3 +425,3 @@ module ft3_proto_qsys_fpga_spi0 ( - RRDY <= 0; +// RRDY <= 0; ROE <= 0; @@ -389,6 +434,7 @@ module ft3_proto_qsys_fpga_spi0 ( transmitting <= 0; - RRDY <= 1; +// RRDY <= 1; rx_holding_reg <= shift_reg; SCLK_reg <= 0; - if (RRDY) +// if (RRDY) + if (rx_fifo_full) ROE <= 1;  

 

I have two questions: 

1) All I have to do now is re-compile the main project in Quartus (16) and then use it? 

2) Is there a more modern/current fifo I should be using instead? I noticed that lpm_fifo is still supported for compatibility but isn't the normal wizard generated FIFO anymore. 

 

Thank you again, 

Hunter
Reply