Chapter 36: Networking
- How the kernel implements network communication
- The network stack layers: Ethernet, IP, UDP, TCP
- The virtio-net driver for QEMU
- Network buffer management (sk_buff)
- How sockets work
- Our networking implementation
36.1 Network Stack Overview
Networking is organized in layers. Our kernel implements a minimal but functional TCP/IP stack:
Application: sockets interface
Transport: UDP, TCP (in progress)
Network: IPv4, ICMP, ARP
Link: Ethernet, virtio-net driver
Hardware: Network interface card (virtio-net on QEMU)
36.2 Network Buffer (sk_buff)
All network data flows through a shared buffer structure:
/* Network buffer (simplified sk_buff) */
#define MAX_PACKET_SIZE 2048
struct sk_buff {
uint8_t data[MAX_PACKET_SIZE];
int len; /* Total packet length */
int head; /* Start of protocol headers */
int tail; /* Start of payload */
struct net_device *dev; /* Source/destination device */
struct sk_buff *next; /* For queueing */
};
/* Push a protocol header */
void skb_push(struct sk_buff *skb, int size) {
skb->head -= size;
skb->len += size;
}
/* Pull a protocol header (at receive) */
void skb_pull(struct sk_buff *skb, int size) {
skb->head += size;
skb->len -= size;
}
/* Reserve space for headers */
void skb_reserve(struct sk_buff *skb, int size) {
skb->head = size;
skb->tail = size;
skb->len = 0;
}
36.3 The virtio-net Driver
QEMU virt provides networking via virtio-net, a paravirtualized network device:
/* virtio-net device */
struct virtio_net {
uint64_t mmio_base;
struct virtqueue *rx_vq; /* Receive queue */
struct virtqueue *tx_vq; /* Transmit queue */
uint8_t mac[6]; /* MAC address */
};
/* Initialize virtio-net */
int virtio_net_init(struct device *dev) {
struct virtio_net *net = kmalloc(sizeof(*net));
net->mmio_base = dev->mmio_base;
/* Get MAC address from device tree */
uint8_t *mac = of_get_mac(dev->of_node);
if (mac) memcpy(net->mac, mac, 6);
/* Initialize virtqueues */
net->rx_vq = virtqueue_init(net->mmio_base, 0, 256);
net->tx_vq = virtqueue_init(net->mmio_base, 1, 256);
/* Fill receive queue with empty buffers */
for (int i = 0; i < 256; i++) {
struct sk_buff *skb = alloc_skb(MAX_PACKET_SIZE);
virtqueue_add(net->rx_vq, skb->data, MAX_PACKET_SIZE);
}
dev->private_data = net;
return 0;
}
/* Transmit a packet */
int virtio_net_xmit(struct net_device *ndev, struct sk_buff *skb) {
struct virtio_net *net = ndev->private_data;
virtqueue_add(net->tx_vq, skb->data, skb->len);
writel(net->mmio_base, VIRTIO_MMIO_QUEUE_NOTIFY, 1);
free_skb(skb);
return 0;
}
/* Receive interrupt handler */
void virtio_net_rx(struct virtio_net *net) {
struct sk_buff *skb;
while ((skb = virtqueue_get_buf(net->rx_vq))) {
skb->dev = &net_dev;
netif_receive_skb(skb); /* Pass up the stack */
/* Replace with new empty buffer */
struct sk_buff *new_skb = alloc_skb(MAX_PACKET_SIZE);
virtqueue_add(net->rx_vq, new_skb->data, MAX_PACKET_SIZE);
}
}
36.4 Ethernet Layer
/* Ethernet header */
struct eth_hdr {
uint8_t dst_mac[6];
uint8_t src_mac[6];
uint16_t ethertype;
} __attribute__((packed));
#define ETH_TYPE_IP 0x0800
#define ETH_TYPE_ARP 0x0806
/* Receive an Ethernet frame */
void eth_receive(struct sk_buff *skb) {
struct eth_hdr *eth = (struct eth_hdr *)(skb->data + skb->head);
skb_pull(skb, sizeof(struct eth_hdr));
uint16_t type = ntohs(eth->ethertype);
switch (type) {
case ETH_TYPE_IP:
ip_receive(skb);
break;
case ETH_TYPE_ARP:
arp_receive(skb);
break;
}
}
36.5 IPv4 Layer
/* IPv4 header */
struct ip_hdr {
uint8_t version_ihl;
uint8_t dscp_ecn;
uint16_t total_length;
uint16_t id;
uint16_t flags_frag;
uint8_t ttl;
uint8_t protocol;
uint16_t checksum;
uint32_t src_ip;
uint32_t dst_ip;
} __attribute__((packed));
#define IP_PROTO_ICMP 1
#define IP_PROTO_UDP 17
/* Receive an IP packet */
void ip_receive(struct sk_buff *skb) {
struct ip_hdr *ip = (struct ip_hdr *)(skb->data + skb->head);
skb_pull(skb, sizeof(struct ip_hdr));
switch (ip->protocol) {
case IP_PROTO_ICMP:
icmp_receive(skb, ip);
break;
case IP_PROTO_UDP:
udp_receive(skb, ip);
break;
}
}
/* Send an IP packet */
void ip_send(struct sk_buff *skb, uint32_t src_ip, uint32_t dst_ip,
uint8_t protocol) {
struct ip_hdr *ip = (struct ip_hdr *)skb_push(skb, sizeof(struct ip_hdr));
ip->version_ihl = 0x45; /* IPv4, 20 byte header */
ip->total_length = htons(skb->len);
ip->ttl = 64;
ip->protocol = protocol;
ip->src_ip = src_ip;
ip->dst_ip = dst_ip;
ip->checksum = ip_checksum(ip, sizeof(struct ip_hdr));
/* Resolve MAC via ARP and send */
arp_resolve(dst_ip, skb);
}
36.6 Sockets Interface
User-space programs use the socket API for network communication:
struct socket {
int domain; /* AF_INET, AF_UNIX */
int type; /* SOCK_DGRAM, SOCK_STREAM */
int protocol; /* IPPROTO_UDP, IPPROTO_TCP */
uint32_t local_ip;
uint16_t local_port;
uint32_t remote_ip;
uint16_t remote_port;
struct sk_buff *recv_queue; /* Received data queue */
struct semaphore recv_sem; /* Wait for data */
};
int sys_socket(int domain, int type, int protocol) {
struct socket *sock = kmalloc(sizeof(*sock));
sock->domain = domain;
sock->type = type;
sock->protocol = protocol;
sock->recv_queue = NULL;
sem_init(&sock->recv_sem, 0);
int fd = alloc_fd(sock, FD_TYPE_SOCKET);
return fd;
}
int sys_bind(int fd, uint32_t ip, uint16_t port) {
struct socket *sock = get_fd_data(fd);
sock->local_ip = ip;
sock->local_port = port;
udp_bind(port, sock);
return 0;
}
int sys_sendto(int fd, const void *buf, int len,
uint32_t dst_ip, uint16_t dst_port) {
struct socket *sock = get_fd_data(fd);
struct sk_buff *skb = alloc_skb(len + 64);
memcpy(skb_put(skb, len), buf, len);
udp_send(skb, sock->local_ip, sock->local_port, dst_ip, dst_port);
return len;
}
int sys_recvfrom(int fd, void *buf, int len) {
struct socket *sock = get_fd_data(fd);
sem_wait(&sock->recv_sem);
struct sk_buff *skb = sock->recv_queue;
sock->recv_queue = skb->next;
int copy_len = min(len, skb->len);
memcpy(buf, skb->data + skb->tail, copy_len);
free_skb(skb);
return copy_len;
}
36.7 Our Implementation
Our networking stack (net/) provides:
- virtio-net driver: packet send/receive via virtqueues
- Ethernet: framing, ARP resolution, MAC address handling
- IPv4: packet send/receive, routing to localhost or gateway
- UDP: connectionless datagram transport
- ICMP: ping (echo request/reply) for testing
- Sockets: AF_INET/SOCK_DGRAM interface for user-space programs
TCP is under development (requires connection state machines, retransmission, windowing). Basic UDP-based services (DHCP client, DNS resolver, simple HTTP client) are functional.
36.8 Exercises
Exercise 1: ARP Cache
Implement an ARP cache that maps IP addresses to MAC addresses. When a cache entry expires (60 seconds), send a new ARP request.
Exercise 2: UDP Echo Server
Write a user-space program that creates a UDP socket on port 7 (echo port) and echoes received datagrams back to the sender.
36.9 Summary
Our networking stack implements a layered architecture from the hardware driver (virtio-net) up to the socket API. Packets flow through Ethernet, ARP, IPv4, and UDP layers, each adding or processing headers. The sk_buff structure carries packets between layers without copying data. The socket interface provides the familiar Berkeley sockets API for user-space programs. Basic network services (echo, ping) are functional, with TCP planned for future development.