ebtables -t broute -I BROUTING
ebtables -t broute -I BROUTING --log --log-prefix 'ctc/ebtable/broute-BROUTING' --log-level debug
此条规则在内核生效点是?
static const struct nf_hook_ops ebt_ops_broute = { .hook = ebt_broute, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_FIRST, }; static int __net_init broute_net_init(struct net *net) { return ebt_register_table(net, &broute_table, &ebt_ops_broute, &net->xt.broute_table); }
/* Bridge Hooks */ /* After promisc drops, checksum checks. */ #define NF_BR_PRE_ROUTING 0 /* If the packet is destined for this box. */ #define NF_BR_LOCAL_IN 1 /* If the packet is destined for another interface. */ #define NF_BR_FORWARD 2 /* Packets coming from a local process. */ #define NF_BR_LOCAL_OUT 3 /* Packets about to hit the wire. */ #define NF_BR_POST_ROUTING 4 /* Not really a hook, but used for the ebtables broute table */ #define NF_BR_BROUTING 5 #define NF_BR_NUMHOOKS 6
在NF_BR_PRE_ROUTING 入口的NF_BR_PRI_FIRST 第一个执行;static unsigned int ebt_broute(void *priv, struct sk_buff *skb,
const struct nf_hook_state *s) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); struct nf_hook_state state; unsigned char *dest; int ret; if (!p || p->state != BR_STATE_FORWARDING) return NF_ACCEPT; nf_hook_state_init(&state, NF_BR_BROUTING, NFPROTO_BRIDGE, s->in, NULL, NULL, s->net, NULL); ret = ebt_do_table(skb, &state, state.net->xt.broute_table); if (ret != NF_DROP) return ret;// 返回如果是ebt_accept对应的nf_accept,则表示继续执行优先级低的规则比如NF_BR_PRI_BRNF,继续bridge //z这一步表示nf_drop 最后达标br_netfilter_broute =1
/* DROP in ebtables -t broute means that the * skb should be routed, not bridged. * This is awkward, but can't be changed for compatibility * reasons. * * We map DROP to ACCEPT and set the ->br_netfilter_broute flag. */ BR_INPUT_SKB_CB(skb)->br_netfilter_broute = 1; /* undo PACKET_HOST mangling done in br_input in case the dst * address matches the logical bridge but not the port. */ dest = eth_hdr(skb)->h_dest; if (skb->pkt_type == PACKET_HOST && !ether_addr_equal(skb->dev->dev_addr, dest) && ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_OTHERHOST; return NF_ACCEPT; //此时继续执行 }
ebt_broute 作用
在内核 bridge 子系统中,broute 表是在帧被桥接之前,做“是否应该改为路由”的判断用的。
-
如果匹配上规则,并执行
RETURN,则继续桥接(L2)。不进入 IP 协议栈 -
如果执行
DROP,则代表“不要桥接”,转入 L3 网络协议栈进行路由处理(例如给本地 IP 层)
/* EBT_ACCEPT means the frame will be bridged * EBT_DROP means the frame will be routed */ static struct ebt_entries initial_chain = { .name = "BROUTING", .policy = EBT_ACCEPT, };
实际使用场景
假设你机器上有一张桥接网卡(br0),你希望来自某 IP 的流量不走桥,而是让主机处理:
这表示:源 IP 为 192.168.1.100 的 IPv4 流量不桥接,交给主机路由栈处理。
如果继续bridge则也就是EBT_ACCEPT返回则
此时接着执行NFPROTO_BRIDGE ----》 NF_BR_PRE_ROUTING---->NF_BR_PRI_BRNF--.br_nf_pre_routing
static const struct nf_hook_ops br_nf_ops[] = {
{
.hook = br_nf_pre_routing,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
}
/* Direct IPv6 traffic to br_nf_pre_routing_ipv6. * Replicate the checks that IPv4 does on packet reception. * Set skb->dev to the bridge device (i.e. parent of the * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ static unsigned int br_nf_pre_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge; struct net_bridge_port *p; struct net_bridge *br; __u32 len = nf_bridge_encap_header_len(skb); struct brnf_net *brnet; p = br_port_get_rcu(state->in); br = p->br; brnet = net_generic(state->net, brnf_net_id); if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES)) return NF_ACCEPT; //如果这两个都为 false,说明没有配置要把 bridge 流量转交 iptables nf_bridge = nf_bridge_info_get(skb); nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IP); skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, br_nf_pre_routing_finish); return NF_STOLEN; } //将桥接过来的 IPv4 包“上送”到 netfilter 框架的 PREROUTING 链处理,并交由回调函数 br_nf_pre_routing_finish() 来继续处理流程
return NF_STOLEN;
这一句告诉 eb:
“我把这个 skb 包交出去了,由 netfilter 异步处理,当前这个钩子函数不再继续处理它。”
也就是说,这个 skb 的“所有权”已经被 Netfilter 接管,由它决定是否丢弃、修改、接受,或者继续桥接。
回调函数:br_nf_pre_routing_finish()
当 PREROUTING 链处理完成后,Netfilter 会异步调用 br_nf_pre_routing_finish() 来继续处理逻辑,它通常会:
-
检查是否有 DNAT 修改目标地址
-
判断数据包是要送入本地还是桥接出去
-
根据 netfilter 处理结果决定是否继续桥接转发或丢弃
Netfilter 桥接和路由冲突问题
在开启 bridge-nf-call-iptables=1 后,桥接(L2)流量进入了 Netfilter 的 L3 处理流程(如 iptables PREROUTING),此时可以修改目标 IP(即 DNAT)或目标端口(REDIRECT)等。
但如果你在 PREROUTING 链上对数据包执行了 DNAT,它本来应该继续桥接转发,但现在由于 IP 发生变化,你可能希望它不再桥接,而是送进本地协议栈进行路由处理。
也就是
1、调用br_handle_frame_finish 进行bridge上二层转发
2、neigh = dst_neigh_lookup_skb(dst, skb);neigh->output(neigh, skb);
br_handle_frame
br_handle_frame 为bridge 报文input入口,根据
详细代码就不看了,就是二层交换机处理逻辑,但是有个特殊的地方就是 br_netfilter_broute标志位的处理
static int nf_hook_bridge_pre(struct sk_buff *skb, struct sk_buff **pskb) { nf_hook_state_init(&state, NF_BR_PRE_ROUTING, NFPROTO_BRIDGE, skb->dev, NULL, NULL, net, br_handle_frame_finish); for (i = 0; i < e->num_hook_entries; i++) { verdict = nf_hook_entry_hookfn(&e->hooks[i], skb, &state); switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: if (BR_INPUT_SKB_CB(skb)->br_netfilter_broute) { *pskb = skb; return RX_HANDLER_PASS; // 返回 RX_HANDLER_PASS,让该包“绕过桥接”进入协议栈处理(如 IP 层) } break; case NF_DROP: kfree_skb(skb); return RX_HANDLER_CONSUMED; case NF_QUEUE: ret = nf_queue(skb, &state, i, verdict); if (ret == 1) continue; return RX_HANDLER_CONSUMED; default: /* STOLEN */ return RX_HANDLER_CONSUMED; } } br_handle_frame_finish(dev_net(skb->dev), NULL, skb); }
这段代码做的是:
-
如果 Netfilter 的 verdict 是
NF_ACCEPT -
并且 skb 的
br_netfilter_broute标志为 1(说明命中过 broute 表)
那么就:
✅ 返回 RX_HANDLER_PASS,让该包“绕过桥接”进入协议栈处理(如 IP 层)
static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, struct packet_type **ppt_prev) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct sk_buff *skb = *pskb; struct net_device *orig_dev; bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; net_timestamp_check(!netdev_tstamp_prequeue, skb); rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: ret = NET_RX_SUCCESS; goto out; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: deliver_exact = true; case RX_HANDLER_PASS: break; default: BUG(); } } type = skb->protocol; deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type, &orig_dev->ptype_specific);out: /* The invariant here is that if *ppt_prev is not NULL * then skb should also be non-NULL. * * Apparently *ppt_prev assignment above holds this invariant due to * skb dereferencing near it. */ *pskb = skb; return ret; }
可以看到在收包入口处:
1、如果是bridge则调用rx_handler 处理,如果返回的是RX_HANDLER_PASS 则进入协议栈比如ip层处理。
2、对于bridge的rx_handler 只会返回RX_HANDLER_CONSUMED 或者RX_HANDLER_PASS
static unsigned int ebt_broute(void *priv, struct sk_buff *skb, const struct nf_hook_state *s){struct net_bridge_port *p = br_port_get_rcu(skb->dev);struct nf_hook_state state;unsigned char *dest;int ret;
if (!p || p->state != BR_STATE_FORWARDING)return NF_ACCEPT;
nf_hook_state_init(&state, NF_BR_BROUTING, NFPROTO_BRIDGE, s->in, NULL, NULL, s->net, NULL);
ret = ebt_do_table(skb, &state, state.net->xt.broute_table);
if (ret != NF_DROP)return ret;
/* DROP in ebtables -t broute means that the * skb should be routed, not bridged. * This is awkward, but can't be changed for compatibility * reasons. * * We map DROP to ACCEPT and set the ->br_netfilter_broute flag. */BR_INPUT_SKB_CB(skb)->br_netfilter_broute = 1;
/* undo PACKET_HOST mangling done in br_input in case the dst * address matches the logical bridge but not the port. */dest = eth_hdr(skb)->h_dest;if (skb->pkt_type == PACKET_HOST && !ether_addr_equal(skb->dev->dev_addr, dest) && ether_addr_equal(p->br->dev->dev_addr, dest))skb->pkt_type = PACKET_OTHERHOST;
return NF_ACCEPT;}

浙公网安备 33010602011771号