Linux Kernel Network Programming - struct net_device data-structure - Code-walk, ExampleLinux Kernel Network Programming - struct net_device data-structure - Code-walk, Example

Linux Kernel struct net_device ↗

Here is the copy paste of struct net_device (include/linux/netdevice.h) from the Kernel-source version 6.6 for quick reference:

 *	struct net_device - The DEVICE structure.
 *	Actually, this whole structure is a big mistake.  It mixes I/O
 *	data with strictly "high-level" data, and it has to know about
 *	almost every data structure used in the INET module.
 *	@name:	This is the first field of the "visible" part of this structure
 *		(i.e. as seen by users in the "Space.c" file).  It is the name
 *		of the interface.
 *	@name_node:	Name hashlist node
 *	@ifalias:	SNMP alias
 *	@mem_end:	Shared memory end
 *	@mem_start:	Shared memory start
 *	@base_addr:	Device I/O address
 *	@irq:		Device IRQ number
 *	@state:		Generic network queuing layer state, see netdev_state_t
 *	@dev_list:	The global list of network devices
 *	@napi_list:	List entry used for polling NAPI devices
 *	@unreg_list:	List entry  when we are unregistering the
 *			device; see the function unregister_netdev
 *	@close_list:	List entry used when we are closing the device
 *	@ptype_all:     Device-specific packet handlers for all protocols
 *	@ptype_specific: Device-specific, protocol-specific packet handlers
 *	@adj_list:	Directly linked devices, like slaves for bonding
 *	@features:	Currently active device features
 *	@hw_features:	User-changeable features
 *	@wanted_features:	User-requested features
 *	@vlan_features:		Mask of features inheritable by VLAN devices
 *	@hw_enc_features:	Mask of features inherited by encapsulating devices
 *				This field indicates what encapsulation
 *				offloads the hardware is capable of doing,
 *				and drivers will need to set them appropriately.
 *	@mpls_features:	Mask of features inheritable by MPLS
 *	@gso_partial_features: value(s) from NETIF_F_GSO\*
 *	@ifindex:	interface index
 *	@group:		The group the device belongs to
 *	@stats:		Statistics struct, which was left as a legacy, use
 *			rtnl_link_stats64 instead
 *	@core_stats:	core networking counters,
 *			do not use this in drivers
 *	@carrier_up_count:	Number of times the carrier has been up
 *	@carrier_down_count:	Number of times the carrier has been down
 *	@wireless_handlers:	List of functions to handle Wireless Extensions,
 *				instead of ioctl,
 *				see <net/iw_handler.h> for details.
 *	@wireless_data:	Instance data managed by the core of wireless extensions
 *	@netdev_ops:	Includes several pointers to callbacks,
 *			if one wants to override the ndo_*() functions
 *	@xdp_metadata_ops:	Includes pointers to XDP metadata callbacks.
 *	@ethtool_ops:	Management operations
 *	@l3mdev_ops:	Layer 3 master device operations
 *	@ndisc_ops:	Includes callbacks for different IPv6 neighbour
 *			discovery handling. Necessary for e.g. 6LoWPAN.
 *	@xfrmdev_ops:	Transformation offload operations
 *	@tlsdev_ops:	Transport Layer Security offload operations
 *	@header_ops:	Includes callbacks for creating,parsing,caching,etc
 *			of Layer 2 headers.
 *	@flags:		Interface flags (a la BSD)
 *	@xdp_features:	XDP capability supported by the device
 *	@priv_flags:	Like 'flags' but invisible to userspace,
 *			see if.h for the definitions
 *	@gflags:	Global flags ( kept as legacy )
 *	@padded:	How much padding added by alloc_netdev()
 *	@operstate:	RFC2863 operstate
 *	@link_mode:	Mapping policy to operstate
 *	@if_port:	Selectable AUI, TP, ...
 *	@dma:		DMA channel
 *	@mtu:		Interface MTU value
 *	@min_mtu:	Interface Minimum MTU value
 *	@max_mtu:	Interface Maximum MTU value
 *	@type:		Interface hardware type
 *	@hard_header_len: Maximum hardware header length.
 *	@min_header_len:  Minimum hardware header length
 *	@needed_headroom: Extra headroom the hardware may need, but not in all
 *			  cases can this be guaranteed
 *	@needed_tailroom: Extra tailroom the hardware may need, but not in all
 *			  cases can this be guaranteed. Some cases also use
 *			  LL_MAX_HEADER instead to allocate the skb
 *	interface address info:
 * 	@perm_addr:		Permanent hw address
 * 	@addr_assign_type:	Hw address assignment type
 * 	@addr_len:		Hardware address length
 *	@upper_level:		Maximum depth level of upper devices.
 *	@lower_level:		Maximum depth level of lower devices.
 *	@neigh_priv_len:	Used in neigh_alloc()
 * 	@dev_id:		Used to differentiate devices that share
 * 				the same link layer address
 * 	@dev_port:		Used to differentiate devices that share
 * 				the same function
 *	@addr_list_lock:	XXX: need comments on this one
 *	@name_assign_type:	network interface name assignment type
 *	@uc_promisc:		Counter that indicates promiscuous mode
 *				has been enabled due to the need to listen to
 *				additional unicast addresses in a device that
 *				does not implement ndo_set_rx_mode()
 *	@uc:			unicast mac addresses
 *	@mc:			multicast mac addresses
 *	@dev_addrs:		list of device hw addresses
 *	@queues_kset:		Group of all Kobjects in the Tx and RX queues
 *	@promiscuity:		Number of times the NIC is told to work in
 *				promiscuous mode; if it becomes 0 the NIC will
 *				exit promiscuous mode
 *	@allmulti:		Counter, enables or disables allmulticast mode
 *	@vlan_info:	VLAN info
 *	@dsa_ptr:	dsa specific data
 *	@tipc_ptr:	TIPC specific data
 *	@atalk_ptr:	AppleTalk link
 *	@ip_ptr:	IPv4 specific data
 *	@ip6_ptr:	IPv6 specific data
 *	@ax25_ptr:	AX.25 specific data
 *	@ieee80211_ptr:	IEEE 802.11 specific data, assign before registering
 *	@ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network
 *			 device struct
 *	@mpls_ptr:	mpls_dev struct pointer
 *	@mctp_ptr:	MCTP specific data
 *	@dev_addr:	Hw address (before bcast,
 *			because most packets are unicast)
 *	@_rx:			Array of RX queues
 *	@num_rx_queues:		Number of RX queues
 *				allocated at register_netdev() time
 *	@real_num_rx_queues: 	Number of RX queues currently active in device
 *	@xdp_prog:		XDP sockets filter program pointer
 *	@gro_flush_timeout:	timeout for GRO layer in NAPI
 *	@napi_defer_hard_irqs:	If not zero, provides a counter that would
 *				allow to avoid NIC hard IRQ, on busy queues.
 *	@rx_handler:		handler for received packets
 *	@rx_handler_data: 	XXX: need comments on this one
 *	@tcx_ingress:		BPF & clsact qdisc specific data for ingress processing
 *	@ingress_queue:		XXX: need comments on this one
 *	@nf_hooks_ingress:	netfilter hooks executed for ingress packets
 *	@broadcast:		hw bcast address
 *	@rx_cpu_rmap:	CPU reverse-mapping for RX completion interrupts,
 *			indexed by RX queue number. Assigned by driver.
 *			This must only be set if the ndo_rx_flow_steer
 *			operation is defined
 *	@index_hlist:		Device index hash chain
 *	@_tx:			Array of TX queues
 *	@num_tx_queues:		Number of TX queues allocated at alloc_netdev_mq() time
 *	@real_num_tx_queues: 	Number of TX queues currently active in device
 *	@qdisc:			Root qdisc from userspace point of view
 *	@tx_queue_len:		Max frames per queue allowed
 *	@tx_global_lock: 	XXX: need comments on this one
 *	@xdp_bulkq:		XDP device bulk queue
 *	@xps_maps:		all CPUs/RXQs maps for XPS device
 *	@xps_maps:	XXX: need comments on this one
 *	@tcx_egress:		BPF & clsact qdisc specific data for egress processing
 *	@nf_hooks_egress:	netfilter hooks executed for egress packets
 *	@qdisc_hash:		qdisc hash table
 *	@watchdog_timeo:	Represents the timeout that is used by
 *				the watchdog (see dev_watchdog())
 *	@watchdog_timer:	List of timers
 *	@proto_down_reason:	reason a netdev interface is held down
 *	@pcpu_refcnt:		Number of references to this device
 *	@dev_refcnt:		Number of references to this device
 *	@refcnt_tracker:	Tracker directory for tracked references to this device
 *	@todo_list:		Delayed register/unregister
 *	@link_watch_list:	XXX: need comments on this one
 *	@reg_state:		Register/unregister state machine
 *	@dismantle:		Device is going to be freed
 *	@rtnl_link_state:	This enum represents the phases of creating
 *				a new link
 *	@needs_free_netdev:	Should unregister perform free_netdev?
 *	@priv_destructor:	Called from unregister
 *	@npinfo:		XXX: need comments on this one
 * 	@nd_net:		Network namespace this network device is inside
 * 	@ml_priv:	Mid-layer private
 *	@ml_priv_type:  Mid-layer private type
 * 	@lstats:	Loopback statistics
 * 	@tstats:	Tunnel statistics
 * 	@dstats:	Dummy statistics
 * 	@vstats:	Virtual ethernet statistics
 *	@garp_port:	GARP
 *	@mrp_port:	MRP
 *	@dm_private:	Drop monitor private
 *	@dev:		Class/net/name entry
 *	@sysfs_groups:	Space for optional device, statistics and wireless
 *			sysfs groups
 *	@sysfs_rx_queue_group:	Space for optional per-rx queue attributes
 *	@rtnl_link_ops:	Rtnl_link_ops
 *	@gso_max_size:	Maximum size of generic segmentation offload
 *	@tso_max_size:	Device (as in HW) limit on the max TSO request size
 *	@gso_max_segs:	Maximum number of segments that can be passed to the
 *			NIC for GSO
 *	@tso_max_segs:	Device (as in HW) limit on the max TSO segment count
 * 	@gso_ipv4_max_size:	Maximum size of generic segmentation offload,
 * 				for IPv4.
 *	@dcbnl_ops:	Data Center Bridging netlink ops
 *	@num_tc:	Number of traffic classes in the net device
 *	@tc_to_txq:	XXX: need comments on this one
 *	@prio_tc_map:	XXX: need comments on this one
 *	@fcoe_ddp_xid:	Max exchange id for FCoE LRO by ddp
 *	@priomap:	XXX: need comments on this one
 *	@phydev:	Physical device may attach itself
 *			for hardware timestamping
 *	@sfp_bus:	attached &struct sfp_bus structure.
 *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
 *	@proto_down:	protocol port state information can be sent to the
 *			switch driver and used to set the phys state of the
 *			switch port.
 *	@wol_enabled:	Wake-on-LAN is enabled
 *	@threaded:	napi threaded mode is enabled
 *	@net_notifier_list:	List of per-net netdev notifier block
 *				that follow this device when it is moved
 *				to another network namespace.
 *	@macsec_ops:    MACsec offloading ops
 *	@udp_tunnel_nic_info:	static structure describing the UDP tunnel
 *				offload capabilities of the device
 *	@udp_tunnel_nic:	UDP tunnel offload state
 *	@xdp_state:		stores info on attached XDP BPF programs
 *	@nested_level:	Used as a parameter of spin_lock_nested() of
 *			dev->addr_list_lock.
 *	@unlink_list:	As netif_addr_lock() can be called recursively,
 *			keep a list of interfaces to be deleted.
 *	@gro_max_size:	Maximum size of aggregated packet in generic
 *			receive offload (GRO)
 * 	@gro_ipv4_max_size:	Maximum size of aggregated packet in generic
 * 				receive offload (GRO), for IPv4.
 *	@xdp_zc_max_segs:	Maximum number of segments supported by AF_XDP
 *				zero copy driver
 *	@dev_addr_shadow:	Copy of @dev_addr to catch direct writes.
 *	@linkwatch_dev_tracker:	refcount tracker used by linkwatch.
 *	@watchdog_dev_tracker:	refcount tracker used by watchdog.
 *	@dev_registered_tracker:	tracker for reference held while
 *					registered
 *	@offload_xstats_l3:	L3 HW stats for this netdevice.
 *	@devlink_port:	Pointer to related devlink port structure.
 *			Assigned by a driver before netdev registration using
 *			SET_NETDEV_DEVLINK_PORT macro. This pointer is static
 *			during the time netdevice is registered.
 *	FIXME: cleanup struct net_device such that network protocol info
 *	moves out.

struct net_device {
	char			name[IFNAMSIZ];
	struct netdev_name_node	*name_node;
	struct dev_ifalias	__rcu *ifalias;
	 *	I/O specific fields
	 *	FIXME: Merge these and struct ifmap into one
	unsigned long		mem_end;
	unsigned long		mem_start;
	unsigned long		base_addr;

	 *	Some hardware also needs these fields (state,dev_list,
	 *	napi_list,unreg_list,close_list) but they are not
	 *	part of the usual set specified in Space.c.

	unsigned long		state;

	struct list_head	dev_list;
	struct list_head	napi_list;
	struct list_head	unreg_list;
	struct list_head	close_list;
	struct list_head	ptype_all;
	struct list_head	ptype_specific;

	struct {
		struct list_head upper;
		struct list_head lower;
	} adj_list;

	/* Read-mostly cache-line for fast-path access */
	unsigned int		flags;
	xdp_features_t		xdp_features;
	unsigned long long	priv_flags;
	const struct net_device_ops *netdev_ops;
	const struct xdp_metadata_ops *xdp_metadata_ops;
	int			ifindex;
	unsigned short		gflags;
	unsigned short		hard_header_len;

	/* Note : dev->mtu is often read without holding a lock.
	 * Writers usually hold RTNL.
	 * It is recommended to use READ_ONCE() to annotate the reads,
	 * and to use WRITE_ONCE() to annotate the writes.
	unsigned int		mtu;
	unsigned short		needed_headroom;
	unsigned short		needed_tailroom;

	netdev_features_t	features;
	netdev_features_t	hw_features;
	netdev_features_t	wanted_features;
	netdev_features_t	vlan_features;
	netdev_features_t	hw_enc_features;
	netdev_features_t	mpls_features;
	netdev_features_t	gso_partial_features;

	unsigned int		min_mtu;
	unsigned int		max_mtu;
	unsigned short		type;
	unsigned char		min_header_len;
	unsigned char		name_assign_type;

	int			group;

	struct net_device_stats	stats; /* not used by modern drivers */

	struct net_device_core_stats __percpu *core_stats;

	/* Stats to monitor link on/off, flapping */
	atomic_t		carrier_up_count;
	atomic_t		carrier_down_count;

	const struct iw_handler_def *wireless_handlers;
	struct iw_public_data	*wireless_data;
	const struct ethtool_ops *ethtool_ops;
	const struct l3mdev_ops	*l3mdev_ops;
	const struct ndisc_ops *ndisc_ops;

	const struct xfrmdev_ops *xfrmdev_ops;

	const struct tlsdev_ops *tlsdev_ops;

	const struct header_ops *header_ops;

	unsigned char		operstate;
	unsigned char		link_mode;

	unsigned char		if_port;
	unsigned char		dma;

	/* Interface address info. */
	unsigned char		perm_addr[MAX_ADDR_LEN];
	unsigned char		addr_assign_type;
	unsigned char		addr_len;
	unsigned char		upper_level;
	unsigned char		lower_level;

	unsigned short		neigh_priv_len;
	unsigned short          dev_id;
	unsigned short          dev_port;
	unsigned short		padded;

	spinlock_t		addr_list_lock;
	int			irq;

	struct netdev_hw_addr_list	uc;
	struct netdev_hw_addr_list	mc;
	struct netdev_hw_addr_list	dev_addrs;

	struct kset		*queues_kset;
	struct list_head	unlink_list;
	unsigned int		promiscuity;
	unsigned int		allmulti;
	bool			uc_promisc;
	unsigned char		nested_level;

	/* Protocol-specific pointers */

	struct in_device __rcu	*ip_ptr;
	struct inet6_dev __rcu	*ip6_ptr;
	struct vlan_info __rcu	*vlan_info;
	struct dsa_port		*dsa_ptr;
	struct tipc_bearer __rcu *tipc_ptr;
	void 			*atalk_ptr;
	void			*ax25_ptr;
	struct wireless_dev	*ieee80211_ptr;
	struct wpan_dev		*ieee802154_ptr;
	struct mpls_dev __rcu	*mpls_ptr;
	struct mctp_dev __rcu	*mctp_ptr;

 * Cache lines mostly used on receive path (including eth_type_trans())
	/* Interface address info used in eth_type_trans() */
	const unsigned char	*dev_addr;

	struct netdev_rx_queue	*_rx;
	unsigned int		num_rx_queues;
	unsigned int		real_num_rx_queues;

	struct bpf_prog __rcu	*xdp_prog;
	unsigned long		gro_flush_timeout;
	int			napi_defer_hard_irqs;
#define GRO_LEGACY_MAX_SIZE	65536u
/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE),
 * and shinfo->gso_segs is a 16bit field.
#define GRO_MAX_SIZE		(8 * 65535u)
	unsigned int		gro_max_size;
	unsigned int		gro_ipv4_max_size;
	unsigned int		xdp_zc_max_segs;
	rx_handler_func_t __rcu	*rx_handler;
	void __rcu		*rx_handler_data;
	struct bpf_mprog_entry __rcu *tcx_ingress;
	struct netdev_queue __rcu *ingress_queue;
	struct nf_hook_entries __rcu *nf_hooks_ingress;

	unsigned char		broadcast[MAX_ADDR_LEN];
	struct cpu_rmap		*rx_cpu_rmap;
	struct hlist_node	index_hlist;

 * Cache lines mostly used on transmit path
	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
	unsigned int		num_tx_queues;
	unsigned int		real_num_tx_queues;
	struct Qdisc __rcu	*qdisc;
	unsigned int		tx_queue_len;
	spinlock_t		tx_global_lock;

	struct xdp_dev_bulk_queue __percpu *xdp_bulkq;

	struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
	struct bpf_mprog_entry __rcu *tcx_egress;
	struct nf_hook_entries __rcu *nf_hooks_egress;

	DECLARE_HASHTABLE	(qdisc_hash, 4);
	/* These may be needed for future network-power-down code. */
	struct timer_list	watchdog_timer;
	int			watchdog_timeo;

	u32                     proto_down_reason;

	struct list_head	todo_list;

	int __percpu		*pcpu_refcnt;
	refcount_t		dev_refcnt;
	struct ref_tracker_dir	refcnt_tracker;

	struct list_head	link_watch_list;

	       NETREG_REGISTERED,	/* completed register_netdevice */
	       NETREG_UNREGISTERING,	/* called unregister_netdevice */
	       NETREG_UNREGISTERED,	/* completed unregister todo */
	       NETREG_RELEASED,		/* called free_netdev */
	       NETREG_DUMMY,		/* dummy device for NAPI poll */
	} reg_state:8;

	bool dismantle;

	enum {
	} rtnl_link_state:16;

	bool needs_free_netdev;
	void (*priv_destructor)(struct net_device *dev);

	struct netpoll_info __rcu	*npinfo;

	possible_net_t			nd_net;

	/* mid-layer private */
	void				*ml_priv;
	enum netdev_ml_priv_type	ml_priv_type;

	union {
		struct pcpu_lstats __percpu		*lstats;
		struct pcpu_sw_netstats __percpu	*tstats;
		struct pcpu_dstats __percpu		*dstats;

	struct garp_port __rcu	*garp_port;
	struct mrp_port __rcu	*mrp_port;
	struct dm_hw_stat_delta __rcu *dm_private;
	struct device		dev;
	const struct attribute_group *sysfs_groups[4];
	const struct attribute_group *sysfs_rx_queue_group;

	const struct rtnl_link_ops *rtnl_link_ops;

	/* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SEGS		65535u
#define GSO_LEGACY_MAX_SIZE	65536u
/* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE),
 * and shinfo->gso_segs is a 16bit field.
#define GSO_MAX_SIZE		(8 * GSO_MAX_SEGS)

	unsigned int		gso_max_size;
#define TSO_LEGACY_MAX_SIZE	65536
	unsigned int		tso_max_size;
	u16			gso_max_segs;
#define TSO_MAX_SEGS		U16_MAX
	u16			tso_max_segs;
	unsigned int		gso_ipv4_max_size;

	const struct dcbnl_rtnl_ops *dcbnl_ops;
	s16			num_tc;
	struct netdev_tc_txq	tc_to_txq[TC_MAX_QUEUE];
	u8			prio_tc_map[TC_BITMASK + 1];

	unsigned int		fcoe_ddp_xid;
	struct netprio_map __rcu *priomap;
	struct phy_device	*phydev;
	struct sfp_bus		*sfp_bus;
	struct lock_class_key	*qdisc_tx_busylock;
	bool			proto_down;
	unsigned		wol_enabled:1;
	unsigned		threaded:1;

	struct list_head	net_notifier_list;

	/* MACsec management functions */
	const struct macsec_ops *macsec_ops;
	const struct udp_tunnel_nic_info	*udp_tunnel_nic_info;
	struct udp_tunnel_nic	*udp_tunnel_nic;

	/* protected by rtnl_lock */
	struct bpf_xdp_entity	xdp_state[__MAX_XDP_MODE];

	u8 dev_addr_shadow[MAX_ADDR_LEN];
	netdevice_tracker	linkwatch_dev_tracker;
	netdevice_tracker	watchdog_dev_tracker;
	netdevice_tracker	dev_registered_tracker;
	struct rtnl_hw_stats64	*offload_xstats_l3;

	struct devlink_port	*devlink_port;