--- sys/dev/sbus/if_hme_sbus.c.orig Tue Mar 16 11:33:33 2004 +++ sys/dev/sbus/if_hme_sbus.c Wed Mar 17 11:22:39 2004 @@ -183,8 +183,14 @@ burst &= sbusburst; /* Translate into plain numerical format */ - sc->sc_burst = (burst & SBUS_BURST_32) ? 32 : - (burst & SBUS_BURST_16) ? 16 : 0; + if ((burst & SBUS_BURST_64)) + sc->sc_burst = 64; + else if ((burst & SBUS_BURST_32)) + sc->sc_burst = 32; + else if ((burst & SBUS_BURST_16)) + sc->sc_burst = 16; + else + sc->sc_burst = 0; sc->sc_pci = 0; /* XXXXX should all be done in bus_dma. */ hme_config(sc); --- sys/dev/ic/hme.c.orig Tue Mar 16 11:33:04 2004 +++ sys/dev/ic/hme.c Fri Apr 16 16:06:50 2004 @@ -67,6 +67,8 @@ #include #include #include +#include +#include #endif #if NBPFILTER > 0 @@ -117,6 +119,9 @@ int hme_eint(struct hme_softc *, u_int); int hme_rint(struct hme_softc *); int hme_tint(struct hme_softc *); +/* TCP/UDP checksum offloading support */ +void hme_txcksum(struct mbuf *, u_int32_t *); +void hme_rxcksum(struct mbuf *, u_int32_t); void hme_config(sc) @@ -241,7 +246,8 @@ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST; IFQ_SET_READY(&ifp->if_snd); - ifp->if_capabilities |= IFCAP_VLAN_MTU; + ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_CSUM_TCPv4 | + IFCAP_CSUM_UDPv4; /* Initialize ifmedia structures and MII info */ mii->mii_ifp = ifp; @@ -485,7 +491,7 @@ bus_space_handle_t erx = sc->sc_erx; bus_space_handle_t mac = sc->sc_mac; u_int8_t *ea; - u_int32_t v; + u_int32_t v, n; /* * Initialization sequence. The numbered steps below correspond @@ -548,7 +554,7 @@ /* step 8. Global Configuration & Interrupt Mask */ bus_space_write_4(t, seb, HME_SEBI_IMASK, - ~(HME_SEB_STAT_HOSTTOTX | HME_SEB_STAT_RXTOHOST | + ~(/*HME_SEB_STAT_HOSTTOTX |*/ HME_SEB_STAT_RXTOHOST | HME_SEB_STAT_TXALL | HME_SEB_STAT_TXPERR | HME_SEB_STAT_RCNTEXP | HME_SEB_STAT_ALL_ERRORS)); @@ -566,6 +572,16 @@ v = HME_SEB_CFG_BURST64; break; } + /* + * Blindly setting 64bit transfers may hang PCI cards.(Cheerio?) + * Allowing 64bit transfers breaks TX checksum offload as well. + * I noticed this from Windows's attachment of network driver + * via Samba. Mouning the drive with mount_smbfs(8) on FreeBSD + * didn't produce the problem though. + * + * if (sc->sc_pci == 0) + * v |= HME_SEB_CFG_64BIT; + */ bus_space_write_4(t, seb, HME_SEBI_CFG, v); /* step 9. ETX Configuration: use mostly default values */ @@ -594,6 +610,10 @@ #endif /* Enable DMA */ v |= HME_ERX_CFG_DMAENABLE | (HME_RX_OFFSET << 3); + /* RX TCP/UDP cksum offset */ + n = (ETHER_HDR_LEN + sizeof(struct ip)) / 2; + n = (n << HME_ERX_CFG_CSUM_SHIFT) & HME_ERX_CFG_CSUMSTART; + v |= n; bus_space_write_4(t, erx, HME_ERXI_CFG, v); /* step 11. XIF Configuration */ @@ -730,6 +750,105 @@ } /* + * XXX layering violation + * + * If we can have additional csum data member in 'struct pkthdr' for + * these incomplete checksum offload capable hardware, things would be + * much simpler. That member variable will carry partial checksum + * data and it may be evaluated in TCP/UDP input handler after + * computing pseudo header checksumming. + */ +void +hme_rxcksum(struct mbuf *m, u_int32_t flags) +{ + struct ether_header *eh; + struct ip *ip; + struct udphdr *uh; + int32_t hlen, len, pktlen; + u_int16_t cksum, flag_bad, flag_ok, *opts; + u_int32_t temp32; + union pseudoh { + struct hdr { + u_int16_t len; + u_int8_t ttl; + u_int8_t proto; + u_int32_t src; + u_int32_t dst; + } h; + u_int16_t w[6]; + } ph; + + pktlen = m->m_pkthdr.len; + if (pktlen < sizeof(struct ether_header)) + return; + eh = mtod(m, struct ether_header *); + if (eh->ether_type != htons(ETHERTYPE_IP)) + return; + ip = (struct ip *)(eh + 1); + if (ip->ip_v != IPVERSION) + return; + + hlen = ip->ip_hl << 2; + pktlen -= sizeof(struct ether_header); + if (hlen < sizeof(struct ip)) + return; + if (ntohs(ip->ip_len) < hlen) + return; + if (ntohs(ip->ip_len) != pktlen) + return; + if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) + return; /* can't handle fragmented packet */ + + switch (ip->ip_p) { + case IPPROTO_TCP: + if (pktlen < (hlen + sizeof(struct tcphdr))) + return; + flag_ok = M_TCP_CSUM_IN_OK; + flag_bad = M_TCP_CSUM_IN_BAD; + break; + case IPPROTO_UDP: + if (pktlen < (hlen + sizeof(struct udphdr))) + return; + uh = (struct udphdr *)((caddr_t)ip + hlen); + if (uh->uh_sum == 0) + return; /* no checksum */ + flag_ok = M_UDP_CSUM_IN_OK; + flag_bad = M_UDP_CSUM_IN_BAD; + break; + default: + return; + } + + cksum = ~(flags & HME_XD_RXCKSUM); + /* cksum fixup for IP options */ + len = hlen - sizeof(struct ip); + if (len > 0) { + opts = (u_int16_t *)(ip + 1); + for (; len > 0; len -= sizeof(u_int16_t), opts++) { + temp32 = cksum - *opts; + temp32 = (temp32 >> 16) + (temp32 & 65535); + cksum = temp32 & 65535; + } + } + /* cksum fixup for pseudo-header, replace with in_cksum_phdr()? */ + ph.h.len = htons(ntohs(ip->ip_len) - hlen); + ph.h.ttl = 0; + ph.h.proto = ip->ip_p; + ph.h.src = ip->ip_src.s_addr; + ph.h.dst = ip->ip_dst.s_addr; + temp32 = cksum; + opts = &ph.w[0]; + temp32 += opts[0] + opts[1] + opts[2] + opts[3] + opts[4] + opts[5]; + temp32 = (temp32 >> 16) + (temp32 & 65535); + temp32 += (temp32 >> 16); + cksum = ~temp32; + if (cksum != 0) + m->m_pkthdr.csum |= flag_bad; + else + m->m_pkthdr.csum |= flag_ok; +} + +/* * Receive interrupt. */ int @@ -780,6 +899,7 @@ #endif ifp->if_ipackets++; + hme_rxcksum(m, flags); ether_input_mbuf(ifp, m); again: @@ -1302,6 +1422,74 @@ bus_space_write_4(t, mac, HME_MACI_RXCFG, v); } +/* + * XXX layering violation + * + * If there is a way to carry IP header length, there is no need to + * search ip_hl in mbuf. Packet tagging may be one way but it will + * add addtional overhead in ip_output(). If we can have additional + * csum data member in 'struct pkthdr' for these incomplete checksum + * offload capable hardware, things would be much simpler. + * Or a new flag like M_IPV4_CSUM_STDH which tells it has no IP options + * will make a short circuit for majority case. + * + * + * Happy Meal Ethernet's UDP checksum offload has one deficiency. + * i.e. It does not flip the checksum if the computed value is 0x0000 + * which means 'no checksum' even if the hardware compute the checksum. + * I don't know this is sufficient condition to disable UDP + * checksum offload completly.(The other end will accept the UDP + * datagram as if it had not take cksum computation.) + */ +void +hme_txcksum(struct mbuf *m, u_int32_t *cflags) +{ + struct ip *ip; + u_int32_t offset, offset2, csumflag; + caddr_t p; + + if ((m->m_pkthdr.csum & M_TCPV4_CSUM_OUT)) { + offset2 = offsetof(struct tcphdr, th_sum); + csumflag = HME_XD_TCPCKSUM; + } else if((m->m_pkthdr.csum & M_UDPV4_CSUM_OUT)) { + offset2 = offsetof(struct udphdr, uh_sum); + csumflag = HME_XD_UDPCKSUM; + } + else + return; + + for(; m && m->m_len == 0; m = m->m_next) + ; + if (m == NULL || m->m_len < ETHER_HDR_LEN) { + printf("hme_txcksum: m_len < ETHER_HDR_LEN\n"); + return; /* cksum will be corrupted */ + } + if (m->m_len < ETHER_HDR_LEN + sizeof(u_int32_t)) { + if (m->m_len != ETHER_HDR_LEN) { + printf("hme_txcksum: m_len != ETHER_HDR_LEN\n"); + return; /* cksum will be corrupted */ + } + /* XXX */ + for(m = m->m_next; m && m->m_len == 0; m = m->m_next) + ; + if (m == NULL) + return; /* cksum will be corrupted */ + ip = mtod(m, struct ip *); + } else { + p = mtod(m, caddr_t); + p += ETHER_HDR_LEN; + ip = (struct ip *)p; + } + if ((ip->ip_hl << 2) == sizeof(*ip)) + *cflags = csumflag; + else { + offset = (ip->ip_hl << 2) + ETHER_HDR_LEN; + *cflags = offset << HME_XD_TXCKSUM_SSHIFT; + *cflags |= ((offset + offset2) << HME_XD_TXCKSUM_OSHIFT); + *cflags |= HME_XD_TXCKSUM; + } +} + int hme_encap(sc, mhead, bixp) struct hme_softc *sc; @@ -1311,12 +1499,13 @@ struct hme_sxd *sd; struct mbuf *m; int frag, cur, cnt = 0; - u_int32_t flags; + u_int32_t flags, cflags = 0; struct hme_ring *hr = &sc->sc_rb; cur = frag = *bixp; sd = &sc->sc_txd[frag]; + hme_txcksum(mhead, &cflags); for (m = mhead; m != NULL; m = m->m_next) { if (m->m_len == 0) continue; @@ -1335,6 +1524,7 @@ sd->sd_mbuf = NULL; flags = HME_XD_ENCODE_TSIZE(m->m_len); + flags |= cflags; if (cnt == 0) flags |= HME_XD_SOP; else --- sys/dev/ic/hmereg.h.orig Tue Mar 16 11:33:06 2004 +++ sys/dev/ic/hmereg.h Fri Apr 16 14:54:07 2004 @@ -53,8 +53,9 @@ #define HME_SEB_CFG_BURST16 0x00000000 /* 16 byte bursts */ #define HME_SEB_CFG_BURST32 0x00000001 /* 32 byte bursts */ #define HME_SEB_CFG_BURST64 0x00000002 /* 64 byte bursts */ -#define HME_SEB_CFG_64BIT 0x00000004 /* ? */ -#define HME_SEB_CFG_PARITY 0x00000008 /* ? */ +#define HME_SEB_CFG_64BIT 0x00000004 /* extended transfer mode */ +#define HME_SEB_CFG_PARITY 0x00000008 /* parity check for DVMA/PIO */ +#define HME_SEB_CFG_CHANMSK 0xf0000000 /* ethernet channel version */ #define HME_SEB_STAT_GOTFRAME 0x00000001 /* frame received */ #define HME_SEB_STAT_RCNTEXP 0x00000002 /* rx frame count expired */ @@ -104,7 +105,7 @@ HME_SEB_STAT_DTIMEXP | HME_SEB_STAT_FCNTEXP | HME_SEB_STAT_LCCNTEXP |\ HME_SEB_STAT_ECNTEXP | HME_SEB_STAT_NCNTEXP | HME_SEB_STAT_MAXPKTERR|\ HME_SEB_STAT_TFIFO_UND| HME_SEB_STAT_STSTERR | HME_SEB_STAT_CVCNTEXP |\ - HME_SEB_STAT_LCNTEXP | HME_SEB_STAT_CCNTEXP| HME_SEB_STAT_ACNTEXP) + HME_SEB_STAT_LCNTEXP | HME_SEB_STAT_CCNTEXP | HME_SEB_STAT_ACNTEXP) #define HME_SEB_STAT_VLAN_ERRORS \ (HME_SEB_STAT_SLVPERR | HME_SEB_STAT_SLVERR | HME_SEB_STAT_TXTERR |\ @@ -114,7 +115,7 @@ HME_SEB_STAT_DTIMEXP | HME_SEB_STAT_FCNTEXP | HME_SEB_STAT_LCCNTEXP |\ HME_SEB_STAT_ECNTEXP | HME_SEB_STAT_NCNTEXP | \ HME_SEB_STAT_TFIFO_UND| HME_SEB_STAT_STSTERR | HME_SEB_STAT_CVCNTEXP |\ - HME_SEB_STAT_LCNTEXP | HME_SEB_STAT_CCNTEXP | HME_SEB_STAT_ACNTEXP) + HME_SEB_STAT_LCNTEXP | HME_SEB_STAT_CCNTEXP | HME_SEB_STAT_ACNTEXP) /* * HME Transmitter register offsets @@ -153,7 +154,7 @@ #define HME_ERXI_FIFO_WPTR (3*4) /* FIFO write pointer */ #define HME_ERXI_FIFO_SWPTR (4*4) /* FIFO shadow write pointer */ #define HME_ERXI_FIFO_RPTR (5*4) /* FIFO read pointer */ -#define HME_ERXI_FIFO_SRPTR (6*4) /* FIFO shadow read pointer */ +#define HME_ERXI_FIFO_PKTCNT (6*4) /* FIFO packet counter */ #define HME_ERXI_STATEMACHINE (7*4) /* State machine */ /* RXI_CFG bits */ @@ -164,6 +165,7 @@ #define HME_ERX_CFG_RINGSIZE128 0x00000400 /* Descriptor ring size: 128 */ #define HME_ERX_CFG_RINGSIZE256 0x00000600 /* Descriptor ring size: 256 */ #define HME_ERX_CFG_CSUMSTART 0x007f0000 /* cksum offset */ +#define HME_ERX_CFG_CSUM_SHIFT 16 /* * HME MAC-core register offsets @@ -292,7 +294,11 @@ #define HME_XD_RXLENMSK 0x3fff0000 /* packet length mask (rx) */ #define HME_XD_RXLENSHIFT 16 #define HME_XD_TXLENMSK 0x00003fff /* packet length mask (tx) */ +#define HME_XD_TXCKSUM_SSHIFT 14 +#define HME_XD_TXCKSUM_OSHIFT 20 #define HME_XD_RXCKSUM 0x0000ffff /* packet checksum (rx) */ +#define HME_XD_TCPCKSUM 0x13288000 /* precomputed tcp cksum */ +#define HME_XD_UDPCKSUM 0x12888000 /* precomputed udp cksum */ /* Macros to encode/decode the receive buffer size from the flags field */ #define HME_XD_ENCODE_RSIZE(sz) \ --- sys/dev/ic/hmevar.h.orig Tue Mar 16 11:33:06 2004 +++ sys/dev/ic/hmevar.h Wed Apr 7 11:31:17 2004 @@ -39,8 +39,8 @@ #include -#define HME_TX_RING_SIZE 64 -#define HME_RX_RING_SIZE 64 +#define HME_TX_RING_SIZE 128 +#define HME_RX_RING_SIZE 128 #define HME_RX_RING_MAX 256 #define HME_TX_RING_MAX 256 #define HME_RX_PKTSIZE 1600