commit 322c15598a483ba80d2ba3ced9a62f9e7a9a14a9 Author: Ilya Maximets Date: Fri Feb 17 21:09:59 2023 +0100 classifier: Fix missing masks on a final stage with ports trie. Flow lookup doesn't include masks of the final stage in a resulting flow wildcards in case that stage had L4 ports match. Only the result of ports trie lookup is added to the mask. It might be sufficient in many cases, but it's not correct, because ports trie is not how we decided that the packet didn't match in this subtable. In fact, we used a full subtable mask in order to determine that, so all the subtable mask bits has to be added. Ports trie can still be used to adjust ports' mask, but it is not sufficient to determine that the packet didn't match. Assuming we have following 2 OpenFlow rules on the bridge: table=0, priority=10,tcp,tp_dst=80,tcp_flags=+psh actions=drop table=0, priority=0 actions=output(1) The first high priority rule supposed to drop all the TCP data traffic sent on port 80. The handshake, however, is allowed for forwarding. Both 'tcp_flags' and 'tp_dst' are on the final stage in the flow. Since the stage mask from that stage is not incorporated into the flow wildcards and only ports mask is getting updated, we have the following megaflow for the SYN packet that has no match on 'tcp_flags': $ ovs-appctl ofproto/trace br0 "in_port=br0,tcp,tp_dst=80,tcp_flags=syn" Megaflow: recirc_id=0,eth,tcp,in_port=LOCAL,nw_frag=no,tp_dst=80 Datapath actions: 1 If this flow is getting installed into datapath flow table, all the packets for port 80, regardless of TCP flags, will be forwarded. Incorporating all the looked at bits from the final stage into the stages map in order to get all the necessary wildcards. Ports mask has to be updated as a last step, because it doesn't cover the full 64-bit slot in the flowmap. With this change, in the example above, OVS is producing correct flow wildcards including match on TCP flags: Megaflow: recirc_id=0,eth,tcp,in_port=LOCAL,nw_frag=no,tp_dst=80,tcp_flags=-psh Datapath actions: 1 This way only -psh packets will be forwarded, as expected. This issue affects all other fields on stage 4, not only TCP flags. Tests included to cover tcp_flags, nd_target and ct_tp_src/dst. First two are frequently used, ct ones are sharing the same flowmap slot with L4 ports, so important to test. Before the pre-computation of stage masks, flow wildcards were updated during lookup, so there was no issue. The bits of the final stage was lost with introduction of 'stages_map'. Recent adjustment of segment boundaries exposed 'tcp_flags' to the issue. Reported-at: https://github.com/openvswitch/ovs-issues/issues/272 Fixes: ca44218515f0 ("classifier: Adjust segment boundary to execute prerequisite processing.") Fixes: fa2fdbf8d0c1 ("classifier: Pre-compute stage masks.") Acked-by: Aaron Conole Signed-off-by: Ilya Maximets diff --git a/lib/classifier.c b/lib/classifier.c index c4790ee6b..f6a86b662 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -1695,6 +1695,8 @@ find_match_wc(const struct cls_subtable *subtable, ovs_version_t version, const struct cls_match *rule = NULL; struct flowmap stages_map = FLOWMAP_EMPTY_INITIALIZER; unsigned int mask_offset = 0; + bool adjust_ports_mask = false; + ovs_be32 ports_mask; int i; /* Try to finish early by checking fields in segments. */ @@ -1722,6 +1724,9 @@ find_match_wc(const struct cls_subtable *subtable, ovs_version_t version, subtable->index_maps[i], flow, wc)) { goto no_match; } + /* Accumulate the map used so far. */ + stages_map = flowmap_or(stages_map, subtable->index_maps[i]); + hash = flow_hash_in_minimask_range(flow, &subtable->mask, subtable->index_maps[i], &mask_offset, &basis); @@ -1731,14 +1736,16 @@ find_match_wc(const struct cls_subtable *subtable, ovs_version_t version, * unwildcarding all the ports bits, use the ports trie to figure out a * smaller set of bits to unwildcard. */ unsigned int mbits; - ovs_be32 value, plens, mask; + ovs_be32 value, plens; - mask = miniflow_get_ports(&subtable->mask.masks); - value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask; + ports_mask = miniflow_get_ports(&subtable->mask.masks); + value = ((OVS_FORCE ovs_be32 *) flow)[TP_PORTS_OFS32] & ports_mask; mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32); - ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |= - mask & be32_prefix_mask(mbits); + ports_mask &= be32_prefix_mask(mbits); + ports_mask |= ((OVS_FORCE ovs_be32 *) &wc->masks)[TP_PORTS_OFS32]; + + adjust_ports_mask = true; goto no_match; } @@ -1751,6 +1758,14 @@ no_match: /* Unwildcard the bits in stages so far, as they were used in determining * there is no match. */ flow_wildcards_fold_minimask_in_map(wc, &subtable->mask, stages_map); + if (adjust_ports_mask) { + /* This has to be done after updating flow wildcards to overwrite + * the ports mask back. We can't simply disable the corresponding bit + * in the stages map, because it has 64-bit resolution, i.e. one + * bit covers not only tp_src/dst, but also ct_tp_src/dst, which are + * not covered by the trie. */ + ((OVS_FORCE ovs_be32 *) &wc->masks)[TP_PORTS_OFS32] = ports_mask; + } return NULL; } diff --git a/tests/classifier.at b/tests/classifier.at index f652b5983..de2705653 100644 --- a/tests/classifier.at +++ b/tests/classifier.at @@ -65,6 +65,94 @@ Datapath actions: 2 OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([flow classifier - lookup segmentation - final stage]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 3 +AT_DATA([flows.txt], [dnl +table=0 in_port=1 priority=33,tcp,tp_dst=80,tcp_flags=+psh,action=output(2) +table=0 in_port=1 priority=0,ip,action=drop +table=0 in_port=2 priority=16,icmp6,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=1000::1 ,action=output(1) +table=0 in_port=2 priority=0,ip,action=drop +table=0 in_port=3 action=resubmit(,1) +table=1 in_port=3 priority=45,ct_state=+trk+rpl,ct_nw_proto=6,ct_tp_src=3/0x1,tcp,tp_dst=80,tcp_flags=+psh,action=output(2) +table=1 in_port=3 priority=10,ip,action=drop +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80,tcp_flags=syn'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=80,tcp_flags=-psh +Datapath actions: drop +]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80,tcp_flags=syn|ack'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=80,tcp_flags=-psh +Datapath actions: drop +]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80,tcp_flags=ack|psh'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=80,tcp_flags=+psh +Datapath actions: 2 +]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=80,tcp_flags=-psh +Datapath actions: drop +]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=0x40/0xfff0,tcp_flags=-psh +Datapath actions: drop +]) + +dnl Having both the port and the tcp flags in the resulting megaflow below +dnl is redundant, but that is how ports trie logic is implemented. +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=81'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=81,tcp_flags=-psh +Datapath actions: drop +]) + +dnl nd_target is redundant in the megaflow below and it is also not relevant +dnl for an icmp reply. Datapath may discard that match, but it is OK as long +dnl as we have prerequisites (icmp_type) in the match as well. +AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=2,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,nw_ttl=255,icmpv6_type=128,icmpv6_code=0"], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,icmp6,in_port=2,nw_ttl=255,nw_frag=no,icmp_type=0x80/0xfc,nd_target=:: +Datapath actions: drop +]) + +AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=2,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,nw_ttl=255,icmpv6_type=135,icmpv6_code=0"], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,icmp6,in_port=2,nw_ttl=255,nw_frag=no,icmp_type=0x87/0xff,icmp_code=0x0/0xff,nd_target=:: +Datapath actions: drop +]) +AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=2,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,nw_ttl=255,icmpv6_type=135,icmpv6_code=0,nd_target=1000::1"], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,icmp6,in_port=2,nw_ttl=255,nw_frag=no,icmp_type=0x87/0xff,icmp_code=0x0/0xff,nd_target=1000::1 +Datapath actions: 1 +]) +AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=2,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,nw_ttl=255,icmpv6_type=135,icmpv6_code=0,nd_target=1000::2"], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,icmp6,in_port=2,nw_ttl=255,nw_frag=no,icmp_type=0x87/0xff,icmp_code=0x0/0xff,nd_target=1000::2 +Datapath actions: drop +]) + +dnl Check that ports' mask doesn't affect ct ports. +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=3,ct_state=trk|rpl,ct_nw_proto=6,ct_tp_src=3,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80,tcp_flags=psh'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,ct_state=+rpl+trk,ct_nw_proto=6,ct_tp_src=0x1/0x1,eth,tcp,in_port=3,nw_frag=no,tp_dst=80,tcp_flags=+psh +Datapath actions: 2 +]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=3,ct_state=trk|rpl,ct_nw_proto=6,ct_tp_src=3,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79,tcp_flags=psh'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,ct_state=+rpl+trk,ct_nw_proto=6,ct_tp_src=0x1/0x1,eth,tcp,in_port=3,nw_frag=no,tp_dst=0x40/0xfff0,tcp_flags=+psh +Datapath actions: drop +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_BANNER([flow classifier prefix lookup]) AT_SETUP([flow classifier - prefix lookup]) OVS_VSWITCHD_START