/* * Internet Control Message Protocol (ICMPv6) * Linux INET6 implementation * * Authors: * Pedro Roque * * $Id: icmpv6.c,v 1.3 2009/02/24 01:21:16 sthibaul Exp $ * * Based on net/ipv4/icmp.c * * RFC 1885 * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ /* * Changes: * * Andi Kleen : exception handling * Andi Kleen add rate limits. never reply to a icmp. * add more length checks and other fixes. */ #define __NO_VERSION__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct icmpv6_mib icmpv6_statistics; /* * ICMP socket for flow control. */ struct socket *icmpv6_socket; int icmpv6_rcv(struct sk_buff *skb, unsigned long len); static struct inet6_protocol icmpv6_protocol = { icmpv6_rcv, /* handler */ NULL, /* error control */ NULL, /* next */ IPPROTO_ICMPV6, /* protocol ID */ 0, /* copy */ NULL, /* data */ "ICMPv6" /* name */ }; struct icmpv6_msg { struct icmp6hdr icmph; __u8 *data; struct in6_addr *daddr; int len; __u32 csum; }; /* * getfrag callback */ static int icmpv6_getfrag(const void *data, struct in6_addr *saddr, char *buff, unsigned int offset, unsigned int len) { struct icmpv6_msg *msg = (struct icmpv6_msg *) data; struct icmp6hdr *icmph; __u32 csum; /* * in theory offset must be 0 since we never send more * than IPV6_MIN_MTU bytes on an error or more than the path mtu * on an echo reply. (those are the rules on RFC 1883) * * Luckily, this statement is obsolete after * draft-ietf-ipngwg-icmp-v2-00 --ANK (980730) */ if (offset) { csum = csum_partial_copy((void *) msg->data + offset - sizeof(struct icmp6hdr), buff, len, msg->csum); msg->csum = csum; return 0; } csum = csum_partial_copy((void *) &msg->icmph, buff, sizeof(struct icmp6hdr), msg->csum); csum = csum_partial_copy((void *) msg->data, buff + sizeof(struct icmp6hdr), len - sizeof(struct icmp6hdr), csum); icmph = (struct icmp6hdr *) buff; icmph->icmp6_cksum = csum_ipv6_magic(saddr, msg->daddr, msg->len, IPPROTO_ICMPV6, csum); return 0; } /* * Slightly more convenient version of icmpv6_send. */ void icmpv6_param_prob(struct sk_buff *skb, int code, void *pos) { int offset = (u8*)pos - (u8*)skb->nh.ipv6h; icmpv6_send(skb, ICMPV6_PARAMPROB, code, offset, skb->dev); kfree_skb(skb); } /* * Figure out, may we reply to this packet with icmp error. * * We do not reply, if: * - it was icmp error message. * - it is truncated, so that it is known, that protocol is ICMPV6 * (i.e. in the middle of some exthdr) * - it is not the first fragment. BTW IPv6 specs say nothing about * this case, but it is clear, that our reply would be useless * for sender. * * --ANK (980726) */ static int is_ineligible(struct ipv6hdr *hdr, int len) { u8 *ptr; __u8 nexthdr = hdr->nexthdr; if (len < (int)sizeof(*hdr)) return 1; ptr = ipv6_skip_exthdr((struct ipv6_opt_hdr *)(hdr+1), &nexthdr, len - sizeof(*hdr)); if (!ptr) return 0; if (nexthdr == IPPROTO_ICMPV6) { struct icmp6hdr *ihdr = (struct icmp6hdr *)ptr; return (ptr - (u8*)hdr) > len || !(ihdr->icmp6_type & 0x80); } return nexthdr == NEXTHDR_FRAGMENT; } int sysctl_icmpv6_time = 1*HZ; /* * Check the ICMP output rate limit */ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, struct flowi *fl) { struct dst_entry *dst; int res = 0; /* Informational messages are not limited. */ if (type & 0x80) return 1; /* Do not limit pmtu discovery, it would break it. */ if (type == ICMPV6_PKT_TOOBIG) return 1; /* * Look up the output route. * XXX: perhaps the expire for routing entries cloned by * this lookup should be more aggressive (not longer than timeout). */ dst = ip6_route_output(sk, fl); if (dst->error) { ipv6_statistics.Ip6OutNoRoutes++; } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = 1; } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = sysctl_icmpv6_time; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); res = xrlim_allow(dst, tmo); } dst_release(dst); return res; } /* * an inline helper for the "simple" if statement below * checks if parameter problem report is caused by an * unrecognized IPv6 option that has the Option Type * highest-order two bits set to 10 */ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) { u8 *buff = skb->nh.raw; return ( ( *(buff + offset) & 0xC0 ) == 0x80 ); } /* * Send an ICMP message in response to a packet in error */ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, struct device *dev) { struct ipv6hdr *hdr = skb->nh.ipv6h; struct sock *sk = icmpv6_socket->sk; struct in6_addr *saddr = NULL; int iif = 0; struct icmpv6_msg msg; struct flowi fl; int addr_type = 0; int len; /* * sanity check pointer in case of parameter problem */ if (type == ICMPV6_PARAMPROB && (info > (skb->tail - ((unsigned char *) hdr)))) { printk(KERN_DEBUG "icmpv6_send: bug! pointer > skb\n"); return; } /* * Make sure we respect the rules * i.e. RFC 1885 2.4(e) * Rule (e.1) is enforced by not using icmpv6_send * in any code that processes icmp errors. */ addr_type = ipv6_addr_type(&hdr->daddr); if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0)) saddr = &hdr->daddr; /* * Dest addr check */ if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) { if (type != ICMPV6_PKT_TOOBIG && !(type == ICMPV6_PARAMPROB && code == ICMPV6_UNK_OPTION && (opt_unrec(skb, info)))) return; saddr = NULL; } addr_type = ipv6_addr_type(&hdr->saddr); /* * Source addr check */ if (addr_type & IPV6_ADDR_LINKLOCAL) iif = skb->dev->ifindex; /* * Must not send if we know that source is Anycast also. * for now we don't know that. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); return; } /* * Never answer to a ICMP packet. */ if (is_ineligible(hdr, (u8*)skb->tail - (u8*)hdr)) { if (net_ratelimit()) printk(KERN_DEBUG "icmpv6_send: no reply to icmp error/fragment\n"); return; } fl.proto = IPPROTO_ICMPV6; fl.nl_u.ip6_u.daddr = &hdr->saddr; fl.nl_u.ip6_u.saddr = saddr; fl.oif = iif; fl.fl6_flowlabel = 0; fl.uli_u.icmpt.type = type; fl.uli_u.icmpt.code = code; if (!icmpv6_xrlim_allow(sk, type, &fl)) return; /* * ok. kick it. checksum will be provided by the * getfrag_t callback. */ msg.icmph.icmp6_type = type; msg.icmph.icmp6_code = code; msg.icmph.icmp6_cksum = 0; msg.icmph.icmp6_pointer = htonl(info); msg.data = skb->nh.raw; msg.csum = 0; msg.daddr = &hdr->saddr; len = min((skb->tail - ((unsigned char *) hdr)) + sizeof(struct icmp6hdr), IPV6_MIN_MTU - sizeof(struct ipv6hdr)); if (len < 0) { printk(KERN_DEBUG "icmp: len problem\n"); return; } msg.len = len; ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1, MSG_DONTWAIT); if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB) (&icmpv6_statistics.Icmp6OutDestUnreachs)[type-1]++; icmpv6_statistics.Icmp6OutMsgs++; } static void icmpv6_echo_reply(struct sk_buff *skb) { struct sock *sk = icmpv6_socket->sk; struct ipv6hdr *hdr = skb->nh.ipv6h; struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw; struct in6_addr *saddr; struct icmpv6_msg msg; struct flowi fl; unsigned char *data; int len; data = (char *) (icmph + 1); saddr = &hdr->daddr; if (ipv6_addr_type(saddr) & IPV6_ADDR_MULTICAST) saddr = NULL; len = skb->tail - data; len += sizeof(struct icmp6hdr); msg.icmph.icmp6_type = ICMPV6_ECHO_REPLY; msg.icmph.icmp6_code = 0; msg.icmph.icmp6_cksum = 0; msg.icmph.icmp6_identifier = icmph->icmp6_identifier; msg.icmph.icmp6_sequence = icmph->icmp6_sequence; msg.data = data; msg.csum = 0; msg.len = len; msg.daddr = &hdr->saddr; fl.proto = IPPROTO_ICMPV6; fl.nl_u.ip6_u.daddr = &hdr->saddr; fl.nl_u.ip6_u.saddr = saddr; fl.oif = skb->dev->ifindex; fl.fl6_flowlabel = 0; fl.uli_u.icmpt.type = ICMPV6_ECHO_REPLY; fl.uli_u.icmpt.code = 0; ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1, MSG_DONTWAIT); icmpv6_statistics.Icmp6OutEchoReplies++; icmpv6_statistics.Icmp6OutMsgs++; } static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info, unsigned char *buff, int len) { struct in6_addr *saddr = &skb->nh.ipv6h->saddr; struct in6_addr *daddr = &skb->nh.ipv6h->daddr; struct ipv6hdr *hdr = (struct ipv6hdr *) buff; struct inet6_protocol *ipprot; struct sock *sk; u8 *pb; int hash; u8 nexthdr; nexthdr = hdr->nexthdr; len -= sizeof(struct ipv6hdr); if (len < 0) return; /* now skip over extension headers */ pb = ipv6_skip_exthdr((struct ipv6_opt_hdr *) (hdr + 1), &nexthdr, len); if (!pb) return; /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. Without this we will not able f.e. to make source routed pmtu discovery. Corresponding argument (opt) to notifiers is already added. --ANK (980726) */ hash = nexthdr & (MAX_INET_PROTOS - 1); for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; ipprot != NULL; ipprot=(struct inet6_protocol *)ipprot->next) { if (ipprot->protocol != nexthdr) continue; if (ipprot->err_handler) ipprot->err_handler(skb, hdr, NULL, type, code, pb, info); } sk = raw_v6_htable[hash]; if (sk == NULL) return; while((sk = raw_v6_lookup(sk, nexthdr, daddr, saddr))) { rawv6_err(sk, skb, hdr, NULL, type, code, pb, info); sk = sk->next; } } /* * Handle icmp messages */ int icmpv6_rcv(struct sk_buff *skb, unsigned long len) { struct device *dev = skb->dev; struct in6_addr *saddr = &skb->nh.ipv6h->saddr; struct in6_addr *daddr = &skb->nh.ipv6h->daddr; struct ipv6hdr *orig_hdr; struct icmp6hdr *hdr = (struct icmp6hdr *) skb->h.raw; int ulen; int type; icmpv6_statistics.Icmp6InMsgs++; if (len < sizeof(struct icmp6hdr)) goto discard_it; /* Perform checksum. */ switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = csum_partial((char *)hdr, len, 0); case CHECKSUM_HW: if (csum_ipv6_magic(saddr, daddr, len, IPPROTO_ICMPV6, skb->csum)) { printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", ntohs(saddr->__in6_u.__u6_addr16[0]), ntohs(saddr->__in6_u.__u6_addr16[1]), ntohs(saddr->__in6_u.__u6_addr16[2]), ntohs(saddr->__in6_u.__u6_addr16[3]), ntohs(saddr->__in6_u.__u6_addr16[4]), ntohs(saddr->__in6_u.__u6_addr16[5]), ntohs(saddr->__in6_u.__u6_addr16[6]), ntohs(saddr->__in6_u.__u6_addr16[7]), ntohs(daddr->__in6_u.__u6_addr16[0]), ntohs(daddr->__in6_u.__u6_addr16[1]), ntohs(daddr->__in6_u.__u6_addr16[2]), ntohs(daddr->__in6_u.__u6_addr16[3]), ntohs(daddr->__in6_u.__u6_addr16[4]), ntohs(daddr->__in6_u.__u6_addr16[5]), ntohs(daddr->__in6_u.__u6_addr16[6]), ntohs(daddr->__in6_u.__u6_addr16[7])); goto discard_it; } default: /* CHECKSUM_UNNECESSARY */ break; }; /* * length of original packet carried in skb */ ulen = skb->tail - (unsigned char *) (hdr + 1); type = hdr->icmp6_type; if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB) (&icmpv6_statistics.Icmp6InDestUnreachs)[type-ICMPV6_DEST_UNREACH]++; else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT) (&icmpv6_statistics.Icmp6InEchos)[type-ICMPV6_ECHO_REQUEST]++; switch (type) { case ICMPV6_ECHO_REQUEST: icmpv6_echo_reply(skb); break; case ICMPV6_ECHO_REPLY: /* we coulnd't care less */ break; case ICMPV6_PKT_TOOBIG: /* BUGGG_FUTURE: if packet contains rthdr, we cannot update standard destination cache. Seems, only "advanced" destination cache will allow to solve this problem --ANK (980726) */ orig_hdr = (struct ipv6hdr *) (hdr + 1); if (ulen >= sizeof(struct ipv6hdr)) rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev, ntohl(hdr->icmp6_mtu)); /* * Drop through to notify */ case ICMPV6_DEST_UNREACH: case ICMPV6_TIME_EXCEED: case ICMPV6_PARAMPROB: icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu, (char *) (hdr + 1), ulen); break; case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: ndisc_rcv(skb, len); break; case ICMPV6_MGM_QUERY: igmp6_event_query(skb, hdr, len); break; case ICMPV6_MGM_REPORT: igmp6_event_report(skb, hdr, len); break; case ICMPV6_MGM_REDUCTION: break; default: if (net_ratelimit()) printk(KERN_DEBUG "icmpv6: msg of unknown type\n"); /* informational */ if (type & 0x80) break; /* * error of unknown type. * must pass to upper level */ icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu, (char *) (hdr + 1), ulen); }; kfree_skb(skb); return 0; discard_it: icmpv6_statistics.Icmp6InErrors++; kfree_skb(skb); return 0; } int __init icmpv6_init(struct net_proto_family *ops) { struct sock *sk; int err; icmpv6_socket = sock_alloc(); if (icmpv6_socket == NULL) { printk(KERN_ERR "Failed to create the ICMP6 control socket.\n"); return -1; } #ifndef _HURD_ icmpv6_socket->inode->i_uid = 0; icmpv6_socket->inode->i_gid = 0; #endif icmpv6_socket->type = SOCK_RAW; if ((err = ops->create(icmpv6_socket, IPPROTO_ICMPV6)) < 0) { printk(KERN_ERR "Failed to initialize the ICMP6 control socket (err %d).\n", err); sock_release(icmpv6_socket); icmpv6_socket = NULL; /* for safety */ return err; } sk = icmpv6_socket->sk; sk->allocation = GFP_ATOMIC; sk->num = 256; /* Don't receive any data */ inet6_add_protocol(&icmpv6_protocol); return 0; } void icmpv6_cleanup(void) { sock_release(icmpv6_socket); icmpv6_socket = NULL; /* For safety. */ inet6_del_protocol(&icmpv6_protocol); } static struct icmp6_err { int err; int fatal; } tab_unreach[] = { { ENETUNREACH, 0}, /* NOROUTE */ { EACCES, 1}, /* ADM_PROHIBITED */ { EHOSTUNREACH, 0}, /* Was NOT_NEIGHBOUR, now reserved */ { EHOSTUNREACH, 0}, /* ADDR_UNREACH */ { ECONNREFUSED, 1}, /* PORT_UNREACH */ }; int icmpv6_err_convert(int type, int code, int *err) { int fatal = 0; *err = EPROTO; switch (type) { case ICMPV6_DEST_UNREACH: fatal = 1; if (code <= ICMPV6_PORT_UNREACH) { *err = tab_unreach[code].err; fatal = tab_unreach[code].fatal; } break; case ICMPV6_PKT_TOOBIG: *err = EMSGSIZE; break; case ICMPV6_PARAMPROB: *err = EPROTO; fatal = 1; break; case ICMPV6_TIME_EXCEED: *err = EHOSTUNREACH; break; }; return fatal; }