__attribute__((section("calico_connect_v4"))) //section 1:在建立connection时,做nat转发,将请求转发至后端的pod intcali_ctlb_v4(struct bpf_sock_addr *ctx) { CALI_DEBUG("calico_connect_v4\n"); /* do not process anything non-TCP or non-UDP, but do not block it, will be * dealt with somewhere else. */ if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) { CALI_INFO("unexpected sock type %d\n", ctx->type); goto out; } uint8_t ip_proto; switch (ctx->type) { case SOCK_STREAM: CALI_DEBUG("SOCK_STREAM -> assuming TCP\n"); ip_proto = IPPROTO_TCP; break; case SOCK_DGRAM: CALI_DEBUG("SOCK_DGRAM -> assuming UDP\n"); ip_proto = IPPROTO_UDP; break; default: CALI_DEBUG("Unknown socket type: %d\n", (int)ctx->type); goto out; } do_nat_common(ctx, ip_proto); out: return1; } __attribute__((section("calico_sendmsg_v4"))) //section2 : sendmsg只处理udp相关的发包操作,tcp忽略,因为tcp为面向连接的传输 intcali_ctlb_sendmsg_v4(struct bpf_sock_addr *ctx) { CALI_DEBUG("sendmsg_v4 %x:%d\n", be32_to_host(ctx->user_ip4), be32_to_host(ctx->user_port)>>16); if (ctx->type != SOCK_DGRAM) { CALI_INFO("unexpected sock type %d\n", ctx->type); goto out; } do_nat_common(ctx, IPPROTO_UDP); out: return1; } __attribute__((section("calico_recvmsg_v4"))) // section3: recvmsg也只处理udp相关的收包操作,tcp忽略。因为tcp为面向连接的传输 intcali_ctlb_recvmsg_v4(struct bpf_sock_addr *ctx) { CALI_DEBUG("recvmsg_v4 %x:%d\n", be32_to_host(ctx->user_ip4), ctx_port_to_host(ctx->user_port)); if (ctx->type != SOCK_DGRAM) { CALI_INFO("unexpected sock type %d\n", ctx->type); goto out; } uint64_t cookie = bpf_get_socket_cookie(ctx); CALI_DEBUG("Lookup: ip=%x port=%d(BE) cookie=%x",ctx->user_ip4, ctx->user_port, cookie); structsendrecv4_keykey = { .ip = ctx->user_ip4, .port = ctx->user_port, .cookie = cookie, }; structsendrecv4_val *revnat = cali_v4_srmsg_lookup_elem(&key); if (revnat == NULL) { CALI_DEBUG("revnat miss for %x:%d\n", be32_to_host(ctx->user_ip4), ctx_port_to_host(ctx->user_port)); /* we are past policy and the packet was allowed. Either the * mapping does not exist anymore and if the app cares, it * should check the addresses. It is more likely a packet sent * to server from outside and no mapping is expected. */ goto out; } ctx->user_ip4 = revnat->ip; ctx->user_port = revnat->port; CALI_DEBUG("recvmsg_v4 rev nat to %x:%d\n", be32_to_host(ctx->user_ip4), ctx_port_to_host(ctx->user_port)); out: return1; }
static CALI_BPF_INLINE voiddo_nat_common(struct bpf_sock_addr *ctx, uint8_t proto) { /* We do not know what the source address is yet, we only know that it * is the localhost, so we might just use 0.0.0.0. That would not * conflict with traffic from elsewhere. * * XXX it means that all workloads that use the cgroup hook have the * XXX same affinity, which (a) is sub-optimal and (b) leaks info between * XXX workloads. */ nat_lookup_result res = NAT_LOOKUP_ALLOW; uint16_t dport_he = (uint16_t)(be32_to_host(ctx->user_port)>>16); structcalico_nat_dest *nat_dest; nat_dest = calico_v4_nat_lookup(0, ctx->user_ip4, proto, dport_he, &res); //从map中查找k8s service相关的信息 if (!nat_dest) { CALI_INFO("NAT miss.\n"); goto out; } uint32_t dport_be = host_to_ctx_port(nat_dest->port); ctx->user_ip4 = nat_dest->addr; //修改socket地址的目的地址为后端pod ctx->user_port = dport_be; out: return; }