diff --git a/debian/arch/amd64/config.amd64-vyatta b/debian/arch/amd64/config.amd64-vyatta index 7a7a7ff..4eeb6e6 100644 --- a/debian/arch/amd64/config.amd64-vyatta +++ b/debian/arch/amd64/config.amd64-vyatta @@ -698,6 +698,7 @@ CONFIG_IPV6_SUBTREES=y CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y +CONFIG_IPV6_MAP=m # CONFIG_NETLABEL is not set CONFIG_NETWORK_SECMARK=y CONFIG_NETWORK_PHY_TIMESTAMPING=y diff --git a/debian/arch/amd64/config.amd64-vyatta-virt b/debian/arch/amd64/config.amd64-vyatta-virt index 7a7a7ff..4eeb6e6 100644 --- a/debian/arch/amd64/config.amd64-vyatta-virt +++ b/debian/arch/amd64/config.amd64-vyatta-virt @@ -698,6 +698,7 @@ CONFIG_IPV6_SUBTREES=y CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y +CONFIG_IPV6_MAP=m # CONFIG_NETLABEL is not set CONFIG_NETWORK_SECMARK=y CONFIG_NETWORK_PHY_TIMESTAMPING=y diff --git a/debian/arch/i386/config.586-vyatta b/debian/arch/i386/config.586-vyatta index 057a68c..a1e507a 100644 --- a/debian/arch/i386/config.586-vyatta +++ b/debian/arch/i386/config.586-vyatta @@ -725,6 +725,7 @@ CONFIG_IPV6_SUBTREES=y CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y +CONFIG_IPV6_MAP=m # CONFIG_NETLABEL is not set CONFIG_NETWORK_SECMARK=y CONFIG_NETWORK_PHY_TIMESTAMPING=y diff --git a/debian/arch/i386/config.586-vyatta-virt b/debian/arch/i386/config.586-vyatta-virt index 20288b5..422aa25 100644 --- a/debian/arch/i386/config.586-vyatta-virt +++ b/debian/arch/i386/config.586-vyatta-virt @@ -730,6 +730,7 @@ CONFIG_IPV6_SUBTREES=y CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y +CONFIG_IPV6_MAP=m # CONFIG_NETLABEL is not set CONFIG_NETWORK_SECMARK=y CONFIG_NETWORK_PHY_TIMESTAMPING=y diff --git a/include/linux/if.h b/include/linux/if.h index 06b6ef6..4189713b 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -80,6 +80,7 @@ * skbs on transmit */ #define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */ #define IFF_TEAM_PORT 0x40000 /* device used as team port */ +#define IFF_MAP 0x80000 /* MAP device */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/if_map.h b/include/linux/if_map.h index e69de29..b1569d2 100644 --- a/include/linux/if_map.h +++ b/include/linux/if_map.h @@ -0,0 +1,121 @@ +/* + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * wrote this file. As long as you retain this notice + * you can do whatever you want with this stuff. If we meet some day, and you + * think this stuff is worth it, you can buy me a beer in return Masakazu Asama + * ---------------------------------------------------------------------------- + */ + +#ifndef _IF_MAP_H_ +#define _IF_MAP_H_ + +#include +#include + +#define SIOCGETMAP (SIOCDEVPRIVATE + 0) +#define SIOCADDMAP (SIOCDEVPRIVATE + 1) +#define SIOCDELMAP (SIOCDEVPRIVATE + 2) +#define SIOCCHGMAP (SIOCDEVPRIVATE + 3) + +#define SIOCGETMAPRULES (SIOCDEVPRIVATE + 4) +#define SIOCADDMAPRULES (SIOCDEVPRIVATE + 5) +#define SIOCDELMAPRULES (SIOCDEVPRIVATE + 6) +#define SIOCCHGMAPRULES (SIOCDEVPRIVATE + 7) + +#define SIOCGETMAPCURRNUM (SIOCDEVPRIVATE + 8) +#define SIOCGETMAPCURR (SIOCDEVPRIVATE + 9) +#define SIOCGETMAPNAPTNUM (SIOCDEVPRIVATE + 10) +#define SIOCGETMAPNAPT (SIOCDEVPRIVATE + 11) + +#define SIOCGETMAPPOOLS (SIOCDEVPRIVATE + 12) +#define SIOCADDMAPPOOLS (SIOCDEVPRIVATE + 13) +#define SIOCDELMAPPOOLS (SIOCDEVPRIVATE + 14) +#define SIOCCHGMAPPOOLS (SIOCDEVPRIVATE + 15) + +#define MAP_ROLE_BR (1 << 0) +#define MAP_ROLE_CE (1 << 1) + +#define MAP_FORWARDING_MODE_T (1 << 0) +#define MAP_FORWARDING_MODE_E (1 << 1) + +#define MAP_FORWARDING_RULE_T (1 << 0) +#define MAP_FORWARDING_RULE_F (1 << 1) + +#define MAP_NAPT_ALWAYS_T (1 << 0) +#define MAP_NAPT_ALWAYS_F (1 << 1) + +#define MAP_NAPT_FORCE_RECYCLE_T (1 << 0) +#define MAP_NAPT_FORCE_RECYCLE_F (1 << 1) + +#define MAP_IPV4_FRAG_INNER_T (1 << 0) +#define MAP_IPV4_FRAG_INNER_F (1 << 1) + +struct map_rule_parm { + struct in6_addr ipv6_prefix; + __u8 ipv6_prefix_length; + __be32 ipv4_prefix; + __u8 ipv4_prefix_length; + __u16 psid_prefix; + __u8 psid_prefix_length; + __u8 ea_length; + __u8 psid_offset; + __u8 forwarding_mode; + __u8 forwarding_rule; +}; + +struct map_pool_parm { + __be32 pool_prefix; + __u8 pool_prefix_length; +}; + +struct map_parm { + char name[IFNAMSIZ]; + int tunnel_source; + struct in6_addr br_address; + __u8 br_address_length; + __u8 role; + __u8 default_forwarding_mode; + __u8 default_forwarding_rule; + int ipv6_fragment_size; + __u8 ipv4_fragment_inner; + __u8 napt_always; + __u8 napt_force_recycle; + unsigned long rule_num; + unsigned long pool_num; + struct map_rule_parm rule[0]; + struct map_pool_parm pool[0]; +}; + +struct map_napt_node_parm { + __be32 raddr, laddr, maddr; + __be16 rport, lport, mport; + struct in6_addr laddr6; + __u8 proto; + __u8 flags; + struct timespec last_used; +}; + +struct map_napt_parm { + struct timespec current_time; + unsigned long napt_node_num; + struct map_napt_node_parm napt_node[0]; +}; + +struct map_napt_block { + __u16 min, max; +}; + +struct map_current_parm { + int has_bmr; + struct map_rule_parm bmrp; + struct in6_addr map_ipv6_address; + __u8 map_ipv6_address_length; + __be32 laddr4; + __u16 psid; + int psid_length; + int port_range_length; + struct map_napt_block port_range[0]; +}; + +#endif /* _IF_MAP_H_ */ diff --git a/include/net/ip.h b/include/net/ip.h index 83ed6a2..cb73d78 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -99,7 +99,7 @@ extern int ip_local_deliver(struct sk_buff *skb); extern int ip_mr_input(struct sk_buff *skb); extern int ip_output(struct sk_buff *skb); extern int ip_mc_output(struct sk_buff *skb); -extern int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); +extern int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *, void *arg), void *arg); extern int ip_do_nat(struct sk_buff *skb); extern void ip_send_check(struct iphdr *ip); extern int __ip_local_out(struct sk_buff *skb); @@ -409,6 +409,8 @@ enum ip_defrag_users { IP_DEFRAG_VS_FWD, IP_DEFRAG_AF_PACKET, IP_DEFRAG_MACVLAN, + IP_DEFRAG_MAP46, + IP_DEFRAG_MAP64, }; int ip_defrag(struct sk_buff *skb, u32 user); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 2ad92ca..f39880f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -83,6 +83,7 @@ extern int ipv6_route_ioctl(struct net *net, extern int ip6_route_add(struct fib6_config *cfg); extern int ip6_ins_rt(struct rt6_info *); extern int ip6_del_rt(struct rt6_info *); +extern int ip6_route_del(struct fib6_config *cfg); extern int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, diff --git a/include/net/map.h b/include/net/map.h index e69de29..c889a5f 100644 --- a/include/net/map.h +++ b/include/net/map.h @@ -0,0 +1,364 @@ +/* + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * wrote this file. As long as you retain this notice + * you can do whatever you want with this stuff. If we meet some day, and you + * think this stuff is worth it, you can buy me a beer in return Masakazu Asama + * ---------------------------------------------------------------------------- + */ + +#ifndef _MAP_H_ +#define _MAP_H_ + +#include +#include +#include +#include +#include + +#define MAP_NAPT_HASH_LOOKUP_SIZE (1<<9) +#define MAP_NAPT_HASH_CREATE_SIZE (1<<7) +#define MAP_NAPT_EXPIRES_TCP (3*24*60*60*HZ) +#define MAP_NAPT_EXPIRES_OTHER (3*60*60*HZ) +#define MAP_NAPT_GC_THRESHOLD (60*HZ) + +#define MAP_DEFRAG6_HASH_SIZE (1<<7) +#define MAP_DEFRAG6_EXPIRES (60*HZ) + +#define MAP_NAPT_F_O_SYN (1<<0) +#define MAP_NAPT_F_I_SYN_ACK (1<<1) +#define MAP_NAPT_F_O_ACK (1<<2) +#define MAP_NAPT_F_O_FIN (1<<3) +#define MAP_NAPT_F_I_FIN_ACK (1<<4) +#define MAP_NAPT_F_I_FIN (1<<5) +#define MAP_NAPT_F_O_FIN_ACK (1<<6) +#define MAP_NAPT_F_RST (1<<7) +#define MAP_NAPT_F_FIN (MAP_NAPT_F_I_FIN | \ + MAP_NAPT_F_I_FIN_ACK | \ + MAP_NAPT_F_O_FIN | \ + MAP_NAPT_F_O_FIN_ACK) +#define MAP_NAPT_F_EST (MAP_NAPT_F_O_SYN | \ + MAP_NAPT_F_I_SYN_ACK | \ + MAP_NAPT_F_O_ACK) + +#define MAP_NAPT_TCP_F_SYN (1<<0) +#define MAP_NAPT_TCP_F_ACK (1<<1) +#define MAP_NAPT_TCP_F_FIN (1<<2) +#define MAP_NAPT_TCP_F_RST (1<<3) + +struct map_defrag6_node { + struct hlist_node dn_hash; + struct list_head dn_list; + struct hlist_node dn_pending; + struct sk_buff *skb; + struct in6_addr *saddr; + struct in6_addr *daddr; + __be32 id; + __be16 payload_len; + __be16 frag_off; + u32 h; + unsigned long received; +}; + +/* +struct map_napt_node_parm { + __be32 raddr, laddr, maddr; + __be16 rport, lport, mport; + struct in6_addr laddr6; + __u8 proto; + __u8 flags; + struct timespec last_used; +}; + +struct map_napt_parm { + struct timespec current_time; + unsigned long napt_node_num; + struct map_napt_node_parm napt_node[0]; +}; + +struct map_napt_block { + __u16 min, max; +}; +*/ + +struct map_napt_node { + struct hlist_node nn_hash_lup0, nn_hash_lup1, nn_hash_crat; + struct list_head nn_list, nn_gc_list; + __be32 raddr, laddr, maddr; + __be16 rport, lport, mport; + struct in6_addr laddr6; + __u8 proto; + __u8 flags; + unsigned long last_used; +}; + +struct mrtree_node { + __u32 val[4]; + int len; + struct map_rule *mr; + struct mrtree_node *children[2]; + struct mrtree_node *parent; +}; + +/* +struct map_rule_parm { + struct in6_addr ipv6_prefix; + __u8 ipv6_prefix_length; + __be32 ipv4_prefix; + __u8 ipv4_prefix_length; + __u16 psid_prefix; + __u8 psid_prefix_length; + __u8 ea_length; + __u8 psid_offset; + __u8 forwarding_mode; + __u8 forwarding_rule; +}; +*/ + +struct map_rule { + struct list_head list; + struct mrtree_node *mrtn_ipv6addr; + struct mrtree_node *mrtn_ipv4addrport; + struct map_rule_parm p; +}; + +/* +struct map_pool_parm { + __be32 pool_prefix; + __u8 pool_prefix_length; +}; +*/ + +struct map_pool { + struct list_head list; + struct map_pool_parm p; +}; + +/* +struct map_parm { + char name[IFNAMSIZ]; + int tunnel_source; + struct in6_addr br_address; + __u8 br_address_length; + __u8 role; + __u8 default_forwarding_mode; + __u8 default_forwarding_rule; + int ipv6_fragment_size; + __u8 ipv4_fragment_inner; + __u8 napt_always; + __u8 napt_force_recycle; + unsigned long rule_num; + unsigned long pool_num; + struct map_rule_parm rule[0]; + struct map_pool_parm pool[0]; +}; +*/ + +struct map { + struct list_head list; + struct map_parm p; + struct net_device *dev; + struct list_head rule_list; + struct mrtree_node *mrtn_root_ipv6addr; + struct mrtree_node *mrtn_root_ipv4addrport; + rwlock_t rule_lock; + struct list_head pool_list; + rwlock_t pool_lock; + struct map_rule *bmr; + struct in6_addr map_ipv6_address; + __u8 map_ipv6_address_length; + __be32 laddr4; + __u16 psid; + int psid_length; + struct map_napt_block *port_range; + rwlock_t port_range_lock; + int port_range_length; + // int ipv6_fragment_size; + // int ipv4_fragment_size; + struct hlist_head napt_hash_lup0[MAP_NAPT_HASH_LOOKUP_SIZE]; + struct hlist_head napt_hash_lup1[MAP_NAPT_HASH_LOOKUP_SIZE]; + struct hlist_head napt_hash_crat[MAP_NAPT_HASH_CREATE_SIZE]; + struct list_head napt_list; + struct list_head napt_gc_list; + rwlock_t napt_lock; + unsigned long napt_last_gc; + struct hlist_head defrag6_hash[MAP_DEFRAG6_HASH_SIZE]; + struct list_head defrag6_list; + rwlock_t defrag6_lock; + unsigned long defrag6_last_gc; + int psid_offset_nums[17]; + rwlock_t psid_offset_nums_lock; +}; + +/* +struct map_current_parm { + int has_bmr; + struct map_rule_parm bmrp; + struct in6_addr map_ipv6_address; + __u8 map_ipv6_address_length; + __be32 laddr4; + __u16 psid; + int psid_length; + int port_range_length; + struct map_napt_block port_range[0]; +}; +*/ + +struct map_net { + struct net_device *map_fb_dev; + struct list_head map_list; + rwlock_t map_list_lock; +}; + +int map_gen_addr6(struct in6_addr *addr6, __be32 addr4, __be16 port4, struct map_rule *mr, int trans); +int map_get_addrport(struct iphdr *iph, __be32 *saddr4, __be32 *daddr4, __be16 *sport4, __be16 *dport4, __u8 *proto, int *icmperr); +int map_get_map_ipv6_address(struct map_rule *mr, struct in6_addr *ipv6addr, struct in6_addr *map_ipv6_address); + +struct map_rule *map_rule_find_by_ipv6addr(struct map *m, struct in6_addr *ipv6addr); +struct map_rule *map_rule_find_by_ipv4addrport(struct map *m, __be32* ipv4addr, __be16* port, int fro); +int map_rule_free(struct map *m, struct map_rule *mr); +int map_rule_add(struct map *m, struct map_rule_parm *mrp); +int map_rule_change(struct map *m, struct map_rule_parm *mrp); +int map_rule_delete(struct map *m, struct map_rule_parm *mrp); +void mrtree_node_dump(struct mrtree_node *root); +int map_rule_init(void); +void map_rule_exit(void); + +int map_trans_validate_src(struct sk_buff *skb, struct map *m, __be32 *saddr4, int *fb); +int map_trans_validate_dst(struct sk_buff *skb, struct map *m, __be32 *daddr4); +int map_trans_forward_v6v4(struct sk_buff *skb, struct map *m, __be32 *saddr4, __be32 *daddr4, int fb, int frag); +int map_trans_forward_v4v6(struct sk_buff *skb, struct map *m, struct map_rule *mr, int fb, int df); + +int map_encap_validate_src(struct sk_buff *skb, struct map *m, __be32 *saddr4, int *fb); +int map_encap_validate_dst(struct sk_buff *skb, struct map *m, __be32 *daddr4); +int map_encap_forward_v6v4(struct sk_buff *skb, struct map *m, __be32 *saddr4, __be32 *daddr4, int fb); +int map_encap_forward_v4v6(struct sk_buff *skb, struct map *m, struct map_rule *mr, int fb); + +int map_napt_hairpin(struct sk_buff *skb, struct map *m, __be32 *daddrp, __be16 *dportp, struct in6_addr *saddr6, int fb); +void map_napt_nn_gc(struct map *m); +int map_napt(struct iphdr *iph, int dir, struct map *m, __be32 **saddrpp, __be16 **sportpp, __sum16 **checkpp, struct in6_addr *saddr6, int fb); +int map_napt_init(void); +void map_napt_exit(void); + +struct sk_buff *map_defrag6(struct sk_buff *skb, struct map *m); +int map_defrag6_init(void); +void map_defrag6_exit(void); + +void map_napt_debug_pool(struct map *m); + +static inline void map_debug_print_skb(const char *func, struct sk_buff *skb) +{ + struct iphdr *iph = NULL; + struct ipv6hdr *ipv6h = NULL; + struct tcphdr *tcph = NULL; + struct udphdr *udph = NULL; + struct icmphdr *icmph = NULL; + struct icmp6hdr *icmp6h = NULL; + __u8 nexthdr; + u8 *ptr; + + if (!skb) { + printk(KERN_NOTICE "%s: skb == NULL\n", func); + return; + } + + switch (ntohs(skb->protocol)) { + case ETH_P_IP: + iph = ip_hdr(skb); + printk(KERN_NOTICE "%s: ipv4 src:%d.%d.%d.%d dst:%d.%d.%d.%d\n", func, + ((ntohl(iph->saddr) >> 24) & 0xff), + ((ntohl(iph->saddr) >> 16) & 0xff), + ((ntohl(iph->saddr) >> 8) & 0xff), + ((ntohl(iph->saddr)) & 0xff), + ((ntohl(iph->daddr) >> 24) & 0xff), + ((ntohl(iph->daddr) >> 16) & 0xff), + ((ntohl(iph->daddr) >> 8) & 0xff), + ((ntohl(iph->daddr)) & 0xff)); + switch (iph->protocol) { + case IPPROTO_TCP: + tcph = (struct tcphdr *)(((u8 *)iph) + iph->ihl * 4); + break; + case IPPROTO_UDP: + udph = (struct udphdr *)(((u8 *)iph) + iph->ihl * 4); + break; + case IPPROTO_ICMP: + icmph = (struct icmphdr *)(((u8 *)iph) + iph->ihl * 4); + break; + default: + printk(KERN_NOTICE "%s: unknown transport\n", func); + return; + } + break; + case ETH_P_IPV6: + ipv6h = ipv6_hdr(skb); + printk(KERN_NOTICE "%s: ipv6 src:%08x%08x%08x%08x dst:%08x%08x%08x%08x\n", func, + (ntohl(ipv6h->saddr.s6_addr32[0])), + (ntohl(ipv6h->saddr.s6_addr32[1])), + (ntohl(ipv6h->saddr.s6_addr32[2])), + (ntohl(ipv6h->saddr.s6_addr32[3])), + (ntohl(ipv6h->daddr.s6_addr32[0])), + (ntohl(ipv6h->daddr.s6_addr32[1])), + (ntohl(ipv6h->daddr.s6_addr32[2])), + (ntohl(ipv6h->daddr.s6_addr32[3]))); + ptr = (u8 *)ipv6h; + nexthdr = ipv6h->nexthdr; + ptr += sizeof(struct ipv6hdr); + if (nexthdr == IPPROTO_FRAGMENT) { + printk(KERN_NOTICE "%s: IPPROTO_FRAGMENT\n", func); + nexthdr = ((struct frag_hdr*)ptr)->nexthdr; + ptr += sizeof(struct frag_hdr); + } + switch (nexthdr) { + case IPPROTO_TCP: + tcph = (struct tcphdr *)(((u8 *)ipv6h) + sizeof(struct ipv6hdr)); + break; + case IPPROTO_UDP: + udph = (struct udphdr *)(((u8 *)ipv6h) + sizeof(struct ipv6hdr)); + break; + case IPPROTO_ICMPV6: + icmp6h = (struct icmp6hdr *)(((u8 *)ipv6h) + sizeof(struct ipv6hdr)); + break; + default: + printk(KERN_NOTICE "%s: unknown transport\n", func); + return; + } + break; + default: + printk(KERN_NOTICE "%s: skb->protocol unknown\n", func); + return; + } + + if (tcph) { + printk(KERN_NOTICE "%s: tcp src:%d(0x%04x) dst:%d(0x%04x)\n", func, + ntohs(tcph->source), + ntohs(tcph->source), + ntohs(tcph->dest), + ntohs(tcph->dest)); + } + + if (udph) { + printk(KERN_NOTICE "%s: udp src:%d(0x%04x) dst:%d(0x%04x)\n", func, + ntohs(udph->source), + ntohs(udph->source), + ntohs(udph->dest), + ntohs(udph->dest)); + } + + if (icmph) { + printk(KERN_NOTICE "%s: icmp type:%d code:%d id:%d(0x%04x)\n", func, + icmph->type, + icmph->code, + ntohs(icmph->un.echo.id), + ntohs(icmph->un.echo.id)); + } + + if (icmp6h) { + printk(KERN_NOTICE "%s: icmpv6 type:%d code:%d id:%d(0x%04x)\n", func, + icmp6h->icmp6_type, + icmp6h->icmp6_code, + ntohs(icmp6h->icmp6_dataun.u_echo.identifier), + ntohs(icmp6h->icmp6_dataun.u_echo.identifier)); + } +} + +#endif /* _MAP_H_ */ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 61f6534..caa7784 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -39,7 +39,7 @@ static inline unsigned packet_length(const struct sk_buff *skb) return skb->len - (skb->protocol == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); } -int br_dev_queue_push_xmit(struct sk_buff *skb) +int br_dev_queue_push_xmit(struct sk_buff *skb, void *arg) { /* ip_fragment doesn't copy the MAC header */ if (nf_bridge_maybe_copy_header(skb) || diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index dec4f38..ba0fa93 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -830,16 +830,16 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; - ret = ip_fragment(skb, br_dev_queue_push_xmit); + ret = ip_fragment(skb, br_dev_queue_push_xmit, NULL); } else - ret = br_dev_queue_push_xmit(skb); + ret = br_dev_queue_push_xmit(skb, NULL); return ret; } #else static int br_nf_dev_queue_xmit(struct sk_buff *skb) { - return br_dev_queue_push_xmit(skb); + return br_dev_queue_push_xmit(skb, NULL); } #endif diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e1d8822..0b6cff8 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -366,7 +366,7 @@ extern int br_fdb_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); /* br_forward.c */ extern void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); -extern int br_dev_queue_push_xmit(struct sk_buff *skb); +extern int br_dev_queue_push_xmit(struct sk_buff *skb, void *arg); extern void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); extern int br_forward_finish(struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index 1f7e42c..5d1797d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2579,7 +2579,7 @@ int dev_queue_xmit(struct sk_buff *skb) * unfortunately */ recursion_alert: - if (net_ratelimit()) + if (net_ratelimit() && !(dev->priv_flags & IFF_MAP)) printk(KERN_CRIT "Dead loop on virtual device " "%s, fix it urgently!\n", dev->name); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ff302bd..966eaf2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -177,7 +177,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, } EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); -static inline int ip_finish_output2(struct sk_buff *skb) +static inline int ip_finish_output2(struct sk_buff *skb, void *arg) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = (struct rtable *)dst; @@ -239,9 +239,9 @@ static int ip_finish_output(struct sk_buff *skb) } #endif if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) - return ip_fragment(skb, ip_finish_output2); + return ip_fragment(skb, ip_finish_output2, NULL); else - return ip_finish_output2(skb); + return ip_finish_output2(skb, NULL); } int ip_mc_output(struct sk_buff *skb) @@ -457,7 +457,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *, void *arg), void *arg) { struct iphdr *iph; int ptr; @@ -567,7 +567,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ip_send_check(iph); } - err = output(skb); + err = output(skb, arg); if (!err) IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); @@ -704,7 +704,7 @@ slow_path: ip_send_check(iph); - err = output(skb2); + err = output(skb2, arg); if (err) goto fail; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 36d7437..66b974c 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -250,4 +250,10 @@ config IPV6_PIMSM_V2 Support for IPv6 PIM multicast routing protocol PIM-SMv2. If unsure, say N. +config IPV6_MAP + tristate "IPv6: MAP driver (draft-ietf-softwire-map)" + select INET6_TUNNEL + ---help--- + Support for MAP(draft-ietf-softwire-map). + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 686934a..a51691a 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -37,6 +37,9 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o +obj-$(CONFIG_IPV6_MAP) += map.o +map-objs := map_main.o map_rule.o map_encap.o map_trans.o map_napt.o map_defrag6.o + obj-y += addrconf_core.o exthdrs_core.o obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1342c76..d2bfca7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -901,6 +901,7 @@ fail: kfree_skb(skb); return err; } +EXPORT_SYMBOL(ip6_fragment); static inline int ip6_rt_check(const struct rt6key *rt_key, const struct in6_addr *fl_addr, diff --git a/net/ipv6/map_defrag6.c b/net/ipv6/map_defrag6.c index e69de29..601f8bb 100644 --- a/net/ipv6/map_defrag6.c +++ b/net/ipv6/map_defrag6.c @@ -0,0 +1,372 @@ +/* + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * wrote this file. As long as you retain this notice + * you can do whatever you want with this stuff. If we meet some day, and you + * think this stuff is worth it, you can buy me a beer in return Masakazu Asama + * ---------------------------------------------------------------------------- + */ +/* + * MAP IPv6 packet defragment(reassemble) function + * + * Authors: + * Masakazu Asama + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct kmem_cache *dn_kmem __read_mostly; +static int dn_kmem_alloced = 0; + +static inline u32 +map_defrag6_dn_hash(struct in6_addr *saddr, struct in6_addr *daddr, __be32 id) +{ + /* XXX: atode yoku kanngaeru */ + u32 h = id + + saddr->s6_addr32[0] + + saddr->s6_addr32[1] + + saddr->s6_addr32[2] + + saddr->s6_addr32[3] + + daddr->s6_addr32[0] + + daddr->s6_addr32[1] + + daddr->s6_addr32[2] + + daddr->s6_addr32[3]; + h ^= (h >> 20); + h ^= (h >> 10); + h ^= (h >> 5); + h &= (MAP_DEFRAG6_HASH_SIZE - 1); + /* + printk(KERN_NOTICE "map_defrag6_dn_hash: %08x\n", h); + */ + return h; +} + +static int +map_defrag6_collect(struct hlist_head *dnlist, struct in6_addr *saddr, + struct in6_addr *daddr, __be32 id, struct map *m) +{ + struct map_defrag6_node *dn, *dntmp; + struct hlist_node *node, *nodetmp; + u32 h = map_defrag6_dn_hash(saddr, daddr, id); + + hlist_for_each_entry (dn, node, &m->defrag6_hash[h], dn_hash) { + if (ipv6_addr_equal(saddr, dn->saddr) && + ipv6_addr_equal(saddr, dn->saddr) && + id == dn->id) { + /* + printk(KERN_NOTICE "map_defrag6_collect: match: " + "%08x%08x%08x%08x %08x%08x%08x%08x %08x " + "%d:%d %d\n", + ntohl(dn->saddr->s6_addr32[0]), + ntohl(dn->saddr->s6_addr32[1]), + ntohl(dn->saddr->s6_addr32[2]), + ntohl(dn->saddr->s6_addr32[3]), + ntohl(dn->daddr->s6_addr32[0]), + ntohl(dn->daddr->s6_addr32[1]), + ntohl(dn->daddr->s6_addr32[2]), + ntohl(dn->daddr->s6_addr32[3]), + ntohl(dn->id), + (ntohs(dn->frag_off) & 0xfff8), + ntohs(dn->payload_len), + (ntohs(dn->frag_off) & 0x7)); + */ + dntmp = NULL; + hlist_for_each_entry (dntmp, nodetmp, dnlist, + dn_pending) { + if ((ntohs(dn->frag_off) & 0xfff8) < + (ntohs(dntmp->frag_off) & 0xfff8)) { + break; + } + } + if (dntmp) { + /* + printk(KERN_NOTICE "map_defrag6_collect: dntmp: " + "%08x%08x%08x%08x %08x%08x%08x%08x %08x " + "%d:%d %d\n", + ntohl(dntmp->saddr->s6_addr32[0]), + ntohl(dntmp->saddr->s6_addr32[1]), + ntohl(dntmp->saddr->s6_addr32[2]), + ntohl(dntmp->saddr->s6_addr32[3]), + ntohl(dntmp->daddr->s6_addr32[0]), + ntohl(dntmp->daddr->s6_addr32[1]), + ntohl(dntmp->daddr->s6_addr32[2]), + ntohl(dntmp->daddr->s6_addr32[3]), + ntohl(dntmp->id), + (ntohs(dntmp->frag_off) & 0xfff8), + ntohs(dntmp->payload_len), + (ntohs(dntmp->frag_off) & 0x7)); + */ + if ((ntohs(dn->frag_off) & 0xfff8) < + (ntohs(dntmp->frag_off) & 0xfff8)) + hlist_add_before(&dn->dn_pending, + &dntmp->dn_pending); + else + hlist_add_after(&dntmp->dn_pending, + &dn->dn_pending); + } else + hlist_add_head(&dn->dn_pending, dnlist); + } + } + + /* + { + int i = 1; + hlist_for_each_entry (dntmp, nodetmp, dnlist, dn_pending) { + printk(KERN_NOTICE "map_defrag6_collect: %2d : " + "%08x%08x%08x%08x %08x%08x%08x%08x %08x " + "%d:%d %d\n", + i, + ntohl(dntmp->saddr->s6_addr32[0]), + ntohl(dntmp->saddr->s6_addr32[1]), + ntohl(dntmp->saddr->s6_addr32[2]), + ntohl(dntmp->saddr->s6_addr32[3]), + ntohl(dntmp->daddr->s6_addr32[0]), + ntohl(dntmp->daddr->s6_addr32[1]), + ntohl(dntmp->daddr->s6_addr32[2]), + ntohl(dntmp->daddr->s6_addr32[3]), + ntohl(dntmp->id), + (ntohs(dntmp->frag_off) & 0xfff8), + ntohs(dntmp->payload_len), + (ntohs(dntmp->frag_off) & 0x7)); + ++i; + } + } + */ + + return 0; +} + +static inline int +map_defrag6_dn_expired(struct map_defrag6_node *dn) +{ + return time_is_before_jiffies(dn->received + MAP_DEFRAG6_EXPIRES); +} + +static inline void +map_defrag6_dn_destroy(struct map_defrag6_node *dn) +{ + hlist_del(&dn->dn_hash); + list_del(&dn->dn_list); + kfree_skb(dn->skb); + kmem_cache_free(dn_kmem, dn); + --dn_kmem_alloced; +} + +static inline void +map_defrag6_dn_gc(struct map *m) +{ + struct map_defrag6_node *dn, *dn_node; + list_for_each_entry_safe (dn, dn_node, &m->defrag6_list, dn_list) { + if (!map_defrag6_dn_expired(dn)) + break; + map_defrag6_dn_destroy(dn); + } + m->defrag6_last_gc = jiffies; +} + +static struct map_defrag6_node * +map_defrag6_dn_create(struct sk_buff *skb, struct map *m) +{ + struct map_defrag6_node *dn; + struct ipv6hdr *ipv6h; + struct frag_hdr *fragh; + + ipv6h = ipv6_hdr(skb); + fragh = (struct frag_hdr *)(ipv6h + 1); + + dn = kmem_cache_alloc(dn_kmem, GFP_KERNEL); + if (!dn) + return NULL; + + map_defrag6_dn_gc(m); + ++dn_kmem_alloced; + dn->skb = skb; + dn->saddr = &ipv6h->saddr; + dn->daddr = &ipv6h->daddr; + dn->id = fragh->identification; + dn->payload_len = ipv6h->payload_len; + dn->frag_off = fragh->frag_off; + dn->h = map_defrag6_dn_hash(dn->saddr, dn->daddr, dn->id); + dn->received = jiffies; + hlist_add_head(&dn->dn_hash, &m->defrag6_hash[dn->h]); + list_add_tail(&dn->dn_list, &m->defrag6_list); + INIT_HLIST_NODE(&dn->dn_pending); + + return dn; +} + +static int +map_defrag6_complete(struct hlist_head *dnlist) +{ + struct map_defrag6_node *dn; + struct hlist_node *node; + int frag_off, total_len = 0; + + hlist_for_each_entry (dn, node, dnlist, dn_pending) { + frag_off = ntohs(dn->frag_off) & 0xfff8; + if (frag_off != total_len) + return 0; + total_len += ntohs(dn->payload_len) - sizeof(struct frag_hdr); + } + + if (dn->frag_off & htons(IP6_MF)) + return 0; + + return total_len; +} + +static struct sk_buff * +map_defrag6_rebuild_skb(struct hlist_head *dnlist, int total_len) +{ + struct map_defrag6_node *dn; + struct hlist_node *node, *n; + struct sk_buff *nskb = NULL; + struct ipv6hdr *ipv6h = NULL; + struct frag_hdr *fragh = NULL; + void *ptr = NULL; + int len; + int offset; + + nskb = alloc_skb(total_len + sizeof(struct ipv6hdr) + LL_MAX_HEADER, + GFP_ATOMIC); + if (!nskb) { + printk(KERN_NOTICE "map_defrag6_rebuild_skb: " + "alloc_skb failed.\n"); + goto err; + } + skb_reserve(nskb, LL_MAX_HEADER); + skb_put(nskb, total_len + sizeof(struct ipv6hdr)); + + dn = hlist_entry(dnlist->first, struct map_defrag6_node, dn_pending); + nskb->dev = dn->skb->dev; + ipv6h = ipv6_hdr(dn->skb); + fragh = (struct frag_hdr *)(ipv6h + 1); + ptr = ipv6h; + skb_copy_to_linear_data(nskb, ptr, sizeof(struct ipv6hdr)); + nskb->protocol = htons(ETH_P_IPV6); + skb_reset_network_header(nskb); + ipv6h = ipv6_hdr(nskb); + ipv6h->nexthdr = fragh->nexthdr; + ipv6h->payload_len = htons(total_len); + hlist_for_each_entry_safe (dn, node, n, dnlist, dn_pending) { + ptr = ipv6_hdr(dn->skb); + ptr += sizeof(struct ipv6hdr) + sizeof(struct frag_hdr); + len = ntohs(dn->payload_len) - sizeof(struct frag_hdr); + offset = (ntohs(dn->frag_off) & 0xfff8) + + sizeof(struct ipv6hdr); + skb_copy_to_linear_data_offset(nskb, offset, ptr, len); + } + +err: + hlist_for_each_entry_safe (dn, node, n, dnlist, dn_pending) { + hlist_del_init(&dn->dn_pending); + map_defrag6_dn_destroy(dn); + } + + return nskb; +} + +struct sk_buff * +map_defrag6(struct sk_buff *skb, struct map *m) +{ + struct ipv6hdr *ipv6h; + struct frag_hdr *fragh; + struct map_defrag6_node *dn; + struct hlist_node *node, *n; + struct hlist_head dnlist; + int total_len; + + /* + printk(KERN_NOTICE "map_defrag6:\n"); + */ + + ipv6h = ipv6_hdr(skb); + fragh = (struct frag_hdr *)(ipv6h + 1); + + if (ipv6h->nexthdr != IPPROTO_FRAGMENT) { + /* + printk(KERN_NOTICE "map_defrag6: " + "ipv6h->nexthdr != IPPROTO_FRAGMENT\n"); + */ + return skb; + } + + write_lock_bh(&m->defrag6_lock); + + dn = map_defrag6_dn_create(skb, m); + if (!dn) { + printk(KERN_NOTICE "map_defrag6: " + "map_defrag6_dn_create failed.\n"); + write_unlock_bh(&m->defrag6_lock); + goto err_kfree_skb; + } + + INIT_HLIST_HEAD(&dnlist); + map_defrag6_collect(&dnlist, &ipv6h->saddr, &ipv6h->daddr, + fragh->identification, m); + + total_len = map_defrag6_complete(&dnlist); + if (total_len > 0) { + skb = map_defrag6_rebuild_skb(&dnlist, total_len); + } else { + hlist_for_each_entry_safe (dn, node, n, &dnlist, dn_pending) { + hlist_del_init(&dn->dn_pending); + } + skb = NULL; + } + + write_unlock_bh(&m->defrag6_lock); + + return skb; + +err_kfree_skb: + kfree_skb(skb); + + return NULL; +} + +int +map_defrag6_init(void) +{ + dn_kmem = kmem_cache_create("map_defrag6_node", + sizeof(struct map_defrag6_node), 0, SLAB_HWCACHE_ALIGN, NULL); + if (!dn_kmem) + return -1; + + return 0; +} + +void +map_defrag6_exit(void) +{ + kmem_cache_destroy(dn_kmem); +} diff --git a/net/ipv6/map_encap.c b/net/ipv6/map_encap.c index e69de29..931ae72 100644 --- a/net/ipv6/map_encap.c +++ b/net/ipv6/map_encap.c @@ -0,0 +1,569 @@ +/* + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * wrote this file. As long as you retain this notice + * you can do whatever you want with this stuff. If we meet some day, and you + * think this stuff is worth it, you can buy me a beer in return Masakazu Asama + * ---------------------------------------------------------------------------- + */ +/* + * MAP-E function + * + * Authors: + * Masakazu Asama + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* XXX: */ + +int +map_encap_validate_src(struct sk_buff *skb, struct map *m, __be32 *saddr4, int *fb) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct map_rule *mr; + u8 *ptr; + struct iphdr *iph, *icmpiph; + struct tcphdr *tcph, *icmptcph; + struct udphdr *udph, *icmpudph; + struct icmphdr *icmph, *icmpicmph; + __u8 proto; + __be32 saddr; + __be16 sport; + struct in6_addr addr6; + int err = 0; + + proto = ipv6h->nexthdr; + ptr = (u8 *)ipv6h; + ptr += sizeof(struct ipv6hdr); + if (proto == IPPROTO_FRAGMENT) { + proto = ((struct frag_hdr *)ptr)->nexthdr; + ptr += sizeof(struct frag_hdr); + } + + if (proto != IPPROTO_IPIP) { + printk(KERN_NOTICE "map_encap_validate_src: " + "is this encaped?\n"); + err = -1; + goto err; + } + + iph = (struct iphdr*)ptr; + + if (m->p.role == MAP_ROLE_CE && + ipv6_addr_equal(&ipv6h->saddr, &m->p.br_address)) { + *saddr4 = iph->saddr; + return 0; + } + + saddr = iph->saddr; + ptr += iph->ihl * 4; + + switch (iph->protocol) { + case IPPROTO_ICMP: + icmph = (struct icmphdr *)ptr; + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + ptr = (u8 *)icmph; + ptr += sizeof(struct icmphdr); + icmpiph = (struct iphdr*)ptr; + saddr = icmpiph->daddr; + ptr += icmpiph->ihl * 4; + switch (icmpiph->protocol) { + case IPPROTO_TCP: + icmptcph = (struct tcphdr *)ptr; + sport = icmptcph->dest; + break; + case IPPROTO_UDP: + icmpudph = (struct udphdr *)ptr; + sport = icmpudph->dest; + break; + case IPPROTO_ICMP: + icmpicmph = (struct icmphdr *)ptr; + sport = icmpicmph->un.echo.id; + break; + default: + printk(KERN_NOTICE "map_encap_validate_src: " + "unknown proto encaped in icmp error.\n"); + err = -1; + goto err; + } + break; + default: + sport = icmph->un.echo.id; + break; + } + break; + case IPPROTO_TCP: + tcph = (struct tcphdr *)ptr; + sport = tcph->source; + break; + case IPPROTO_UDP: + udph = (struct udphdr *)ptr; + sport = udph->source; + break; + default: + printk(KERN_NOTICE "map_encap_validate_src: " + "unknown encaped.\n"); + err = -1; + goto err; + } + + mr = map_rule_find_by_ipv6addr(m, &ipv6h->saddr); + if (!mr) { + if (m->p.role == MAP_ROLE_BR) { + *fb = 1; + goto fallback; + } else { + err = -1; + goto err; + } + } + + if (map_gen_addr6(&addr6, saddr, sport, mr, 0)) { + if (m->p.role == MAP_ROLE_BR) { + *fb = 1; + goto fallback; + } else { + err = -1; + goto err; + } + } + + if (!ipv6_addr_equal(&addr6, &ipv6h->saddr)) { + if (m->p.role == MAP_ROLE_BR) { + *fb = 1; + goto fallback; + } else { + printk(KERN_NOTICE "map_encap_validate_src: " + "validation failed.\n"); + err = -1; + goto err_icmpv6_send; + } + } + +fallback: + *saddr4 = iph->saddr; + + return 0; + +err_icmpv6_send: + printk(KERN_NOTICE "map_encap_validate_src: " + "icmpv6_send(skb, ICMPV6_DEST_UNREACH, " + "5 /* Source address failed ingress/egress policy */, 0);\n"); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, + 5 /* Source address failed ingress/egress policy */, 0); +err: + map_debug_print_skb("map_encap_validate_src", skb); + return err; +} + +int +map_encap_validate_dst(struct sk_buff *skb, struct map *m, __be32 *daddr4) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + u8 *ptr; + struct iphdr *iph, *icmpiph; + struct tcphdr *tcph, *icmptcph; + struct udphdr *udph, *icmpudph; + struct icmphdr *icmph, *icmpicmph; + __u8 proto; + __be32 daddr; + __be16 dport; + struct in6_addr addr6; + int err = 0; + + proto = ipv6h->nexthdr; + ptr = (u8 *)ipv6h; + ptr += sizeof(struct ipv6hdr); + if (proto == IPPROTO_FRAGMENT) { + proto = ((struct frag_hdr *)ptr)->nexthdr; + ptr += sizeof(struct frag_hdr); + } + + if (proto != IPPROTO_IPIP) { + printk(KERN_NOTICE "map_encap_validate_dst: " + "is this encaped?\n"); + err = -1; + goto err; + } + + iph = (struct iphdr*)ptr; + + if (!ipv6_addr_equal(&ipv6h->daddr, &m->map_ipv6_address)) { + printk(KERN_NOTICE "map_encap_validate_dst: " + "not match my address.\n"); + err = -1; + goto err; + } + + if (m->p.role == MAP_ROLE_BR || (m->p.role == MAP_ROLE_CE && !m->bmr)) { + *daddr4 = iph->daddr; + return 0; + } + + if (!m->bmr) { + printk(KERN_NOTICE "map_encap_validate_dst: m->bmr is null.\n"); + err = -1; + goto err; + } + + daddr = iph->daddr; + ptr += iph->ihl * 4; + switch (iph->protocol) { + case IPPROTO_ICMP: + icmph = (struct icmphdr *)ptr; + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + ptr = (u8 *)icmph; + ptr += sizeof(struct icmphdr); + icmpiph = (struct iphdr*)ptr; + daddr = icmpiph->saddr; + ptr += icmpiph->ihl * 4; + switch (icmpiph->protocol) { + case IPPROTO_TCP: + icmptcph = (struct tcphdr *)ptr; + dport = icmptcph->source; + break; + case IPPROTO_UDP: + icmpudph = (struct udphdr *)ptr; + dport = icmpudph->source; + break; + case IPPROTO_ICMP: + icmpicmph = (struct icmphdr *)ptr; + dport = icmpicmph->un.echo.id; + break; + default: + printk(KERN_NOTICE "map_encap_validate_dst: " + "unknown proto encaped in icmp error.\n"); + err = -1; + goto err; + } + break; + default: + dport = icmph->un.echo.id; + break; + } + break; + case IPPROTO_TCP: + tcph = (struct tcphdr *)ptr; + dport = tcph->dest; + break; + case IPPROTO_UDP: + udph = (struct udphdr *)ptr; + dport = udph->dest; + break; + default: + printk(KERN_NOTICE "map_encap_validate_dst: " + "unknown encaped.\n"); + err = -1; + goto err; + } + + read_lock(&m->rule_lock); + if (!m->bmr) { + read_unlock(&m->rule_lock); + printk(KERN_NOTICE "map_encap_validate_dst: bmr is null..\n"); + err = -1; + goto err; + } + if (map_gen_addr6(&addr6, daddr, dport, m->bmr, 0)) { + read_unlock(&m->rule_lock); + printk(KERN_NOTICE "map_encap_validate_dst: " + "map_gen_addr6 failed.\n"); + err = -1; + goto err; + } + read_unlock(&m->rule_lock); + + if (!ipv6_addr_equal(&addr6, &ipv6h->daddr)) { + printk(KERN_NOTICE "map_encap_validate_dst: " + "validation failed.\n"); + printk(KERN_NOTICE "map_encap_validate_dst: " + "addr6 = %08x%08x%08x%08x\n", + ntohl(addr6.s6_addr32[0]), + ntohl(addr6.s6_addr32[1]), + ntohl(addr6.s6_addr32[2]), + ntohl(addr6.s6_addr32[3])); + printk(KERN_NOTICE "map_encap_validate_dst: " + "ipv6h->daddr = %08x%08x%08x%08x\n", + ntohl(ipv6h->daddr.s6_addr32[0]), + ntohl(ipv6h->daddr.s6_addr32[1]), + ntohl(ipv6h->daddr.s6_addr32[2]), + ntohl(ipv6h->daddr.s6_addr32[3])); + printk(KERN_NOTICE "map_encap_validate_dst: " + "daddr = %d.%d.%d.%d dport = %d(%04x)\n", + ((ntohl(daddr) >> 24) & 0xff), + ((ntohl(daddr) >> 16) & 0xff), + ((ntohl(daddr) >> 8) & 0xff), + (ntohl(daddr) & 0xff), + ntohs(dport), ntohs(dport)); + err = -1; + goto err_icmpv6_send; + } + + *daddr4 = iph->daddr; + + return 0; + +err_icmpv6_send: + printk(KERN_NOTICE "map_encap_validate_dst: " + "icmpv6_send(skb, ICMPV6_DEST_UNREACH, " + "ICMPV6_ADDR_UNREACH, 0);\n"); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); +err: + map_debug_print_skb("map_encap_validate_dst", skb); + return err; +} + +int +map_encap_forward_v6v4(struct sk_buff *skb, struct map *m, __be32 *saddr4, + __be32 *daddr4, int fb) +{ + struct ipv6hdr orig_ipv6h = {}, *ipv6h; + struct frag_hdr orig_fragh = {}, *fragh; + int hsize; + __u8 nexthdr; + struct iphdr *iph; + __be32 *saddrp = NULL; + __be16 *sportp = NULL; + __sum16 *checkp = NULL; + struct in6_addr *saddr6; + u8 *ptr; + int err = 0; + + ipv6h = ipv6_hdr(skb); + + memcpy(&orig_ipv6h, ipv6h, sizeof(orig_ipv6h)); + saddr6 = &orig_ipv6h.saddr; + hsize = sizeof(orig_ipv6h); + nexthdr = orig_ipv6h.nexthdr; + if (orig_ipv6h.nexthdr == IPPROTO_FRAGMENT) { + ptr = (u8 *)ipv6h; + ptr += sizeof(*ipv6h); + fragh = (struct frag_hdr *)ptr; + memcpy(&orig_fragh, fragh, sizeof(orig_fragh)); + hsize += sizeof(orig_fragh); + nexthdr = orig_fragh.nexthdr; + } + + if (nexthdr != IPPROTO_IPIP) { + printk(KERN_NOTICE "map_encap_forward_v6v4: " + "this packet is not ipip.\n"); + err = -1; + goto err; + } + + skb_dst_drop(skb); + skb_pull(skb, hsize); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + iph = ip_hdr(skb); + + if (m->p.role == MAP_ROLE_BR && fb) { + err = map_napt(iph, 0, m, &saddrp, &sportp, &checkp, saddr6, fb); + if (err) + goto err; + /* NAPT Hairpinning */ + if (map_napt_hairpin(skb, m, saddrp, sportp, saddr6, fb)) + goto out; + } else + err = map_napt(iph, 1, m, &saddrp, &sportp, &checkp, NULL, fb); + if (err) { + printk(KERN_NOTICE "map_encap_forward_v6v4: " + "saddr:%d.%d.%d.%d daddr:%d.%d.%d.%d\n", + ((ntohl(iph->saddr) >> 24) & 0xff), + ((ntohl(iph->saddr) >> 16) & 0xff), + ((ntohl(iph->saddr) >> 8) & 0xff), + ((ntohl(iph->saddr)) & 0xff), + ((ntohl(iph->daddr) >> 24) & 0xff), + ((ntohl(iph->daddr) >> 16) & 0xff), + ((ntohl(iph->daddr) >> 8) & 0xff), + ((ntohl(iph->daddr)) & 0xff)); + goto err; + } + + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + skb->dev = m->dev; + skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); + skb_dst_drop(skb); + nf_reset(skb); + + netif_rx(skb); + + return 0; + +err: + map_debug_print_skb("map_encap_forward_v6v4", skb); +out: + return err; +} + +struct map_encap_forward_v4v6_arg { + struct flowi6 *fl6; + struct dst_entry *dst; +}; + +static int +map_encap_forward_v4v6_finish(struct sk_buff *skb, void *arg) +{ + struct map_encap_forward_v4v6_arg *a = arg; + struct flowi6 *fl6 = a->fl6; + struct dst_entry *dst = a->dst; + struct iphdr *iph; + struct ipv6hdr *ipv6h; + int pkt_len; + unsigned int max_headroom; + struct iphdr orig_iph; + int err = 0; + + max_headroom = sizeof(struct ipv6hdr) + sizeof(struct frag_hdr) + 20; + + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { + struct sk_buff *new_skb; + + if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) + goto tx_err_dst_release; + + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + kfree_skb(skb); + skb = new_skb; + } + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + iph = ip_hdr(skb); + + memcpy(&orig_iph, iph, sizeof(orig_iph)); + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IPV6); + ipv6h = ipv6_hdr(skb); + + ipv6h->version = 6; + ipv6h->priority = 0; /* XXX: */ + ipv6h->flow_lbl[0] = 0; + ipv6h->flow_lbl[1] = 0; + ipv6h->flow_lbl[2] = 0; + ipv6h->payload_len = orig_iph.tot_len; + ipv6h->hop_limit = orig_iph.ttl - 1; /* XXX: */ + memcpy(&ipv6h->saddr, &fl6->saddr, sizeof(struct in6_addr)); + memcpy(&ipv6h->daddr, &fl6->daddr, sizeof(struct in6_addr)); + ipv6h->nexthdr = IPPROTO_IPIP; + + pkt_len = skb->len; + + skb->local_df = 1; + + err = ip6_local_out(skb); + + return 0; + +tx_err_dst_release: + printk(KERN_NOTICE "map_encap_forward_v4v6: tx_err_dst_release:\n"); + // dst_release(dst); /* XXX: */ + return err; +} + +int +map_encap_forward_v4v6(struct sk_buff *skb, struct map *m, struct map_rule *mr, int fb) +{ + struct flowi6 fl6; + struct in6_addr saddr6, daddr6; + struct net *net = dev_net(m->dev); + struct dst_entry *dst; + struct iphdr *iph; + __be32 *daddrp = NULL; + __be16 *dportp = NULL; + __sum16 *checkp = NULL; + int ipv4_fragment_size; + int ret; + int err = 0; + struct map_encap_forward_v4v6_arg arg; + + iph = ip_hdr(skb); + + err = map_napt(iph, 0, m, &daddrp, &dportp, &checkp, NULL, 0); + if (err) + goto err; + /* NAPT Hairpinning */ + if (map_napt_hairpin(skb, m, daddrp, dportp, NULL, 0)) + goto out; + + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + saddr6.s6_addr32[0] = m->map_ipv6_address.s6_addr32[0]; + saddr6.s6_addr32[1] = m->map_ipv6_address.s6_addr32[1]; + saddr6.s6_addr32[2] = m->map_ipv6_address.s6_addr32[2]; + saddr6.s6_addr32[3] = m->map_ipv6_address.s6_addr32[3]; + + if (mr) { + map_gen_addr6(&daddr6, *daddrp, *dportp, mr, 0); + } else { + daddr6.s6_addr32[0] = m->p.br_address.s6_addr32[0]; + daddr6.s6_addr32[1] = m->p.br_address.s6_addr32[1]; + daddr6.s6_addr32[2] = m->p.br_address.s6_addr32[2]; + daddr6.s6_addr32[3] = m->p.br_address.s6_addr32[3]; + } + + if (m->p.role == MAP_ROLE_BR && fb) { + err = map_napt(iph, 1, m, &daddrp, &dportp, &checkp, &daddr6, fb); + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + if (err) + goto err; + } + + memset(&fl6, 0, sizeof(fl6)); + fl6.saddr = saddr6; + fl6.daddr = daddr6; + fl6.flowi6_oif = m->dev->ifindex; + fl6.flowlabel = 0; + + dst = ip6_route_output(net, NULL, &fl6); + // dst_metric_set(dst, RTAX_MTU, 1280); + if (dst_mtu(dst) > m->p.ipv6_fragment_size) + dst_metric_set(dst, RTAX_MTU, m->p.ipv6_fragment_size); + + arg.fl6 = &fl6; + arg.dst = dst; + + ipv4_fragment_size = dst_mtu(dst) - 40; + if (m->p.ipv4_fragment_inner == MAP_IPV4_FRAG_INNER_T && skb->len > ipv4_fragment_size) { + dst_metric_set(skb_dst(skb), RTAX_MTU, ipv4_fragment_size); + ret = ip_fragment(skb, map_encap_forward_v4v6_finish, &arg); + } else { + ret = map_encap_forward_v4v6_finish(skb, &arg); + } + + return ret; + +err: + map_debug_print_skb("map_encap_forward_v4v6", skb); +out: + return err; +} diff --git a/net/ipv6/map_main.c b/net/ipv6/map_main.c index e69de29..e6b6624 100644 --- a/net/ipv6/map_main.c +++ b/net/ipv6/map_main.c @@ -0,0 +1,1976 @@ +/* + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * wrote this file. As long as you retain this notice + * you can do whatever you want with this stuff. If we meet some day, and you + * think this stuff is worth it, you can buy me a beer in return Masakazu Asama + * ---------------------------------------------------------------------------- + */ +/* + * MAP device + * + * Authors: + * Masakazu Asama + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +// #include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Masakazu Asama"); +MODULE_DESCRIPTION("MAP device"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETDEV("mapfb"); + +static int map_net_id __read_mostly; + +struct pcpu_tstats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; +}; + +static struct net_device_stats * +map_get_stats(struct net_device *dev) +{ + struct pcpu_tstats sum = { 0 }; + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + + sum.rx_packets += tstats->rx_packets; + sum.rx_bytes += tstats->rx_bytes; + sum.tx_packets += tstats->tx_packets; + sum.tx_bytes += tstats->tx_bytes; + } + + dev->stats.rx_packets = sum.rx_packets; + dev->stats.rx_bytes = sum.rx_bytes; + dev->stats.tx_packets = sum.tx_packets; + dev->stats.tx_bytes = sum.tx_bytes; + + return &dev->stats; +} + +int +map_pool_free(struct map *m, struct map_pool *mp) +{ + list_del(&mp->list); + kfree(mp); + return 0; +} + +int +map_pool_add(struct map *m, struct map_pool_parm *mpp) +{ + struct map_pool *mp; + + write_lock_bh(&m->pool_lock); + list_for_each_entry (mp, &m->pool_list, list) { + if (mp->p.pool_prefix == mpp->pool_prefix && + mp->p.pool_prefix_length == mpp->pool_prefix_length) { + write_unlock_bh(&m->pool_lock); + return -1; + } + } + mp = kmalloc(sizeof(*mp), GFP_KERNEL); + if (!mp) { + write_unlock_bh(&m->pool_lock); + return -1; + } + mp->p = *mpp; + list_add_tail(&mp->list, &m->pool_list); + m->p.pool_num += 1; + write_unlock_bh(&m->pool_lock); + + return 0; +} + +int +map_pool_change(struct map *m, struct map_pool_parm *mpp) +{ + return 0; +} + +int +map_pool_delete(struct map *m, struct map_pool_parm *mpp) +{ + struct map_pool *mp; + + write_lock_bh(&m->pool_lock); + list_for_each_entry (mp, &m->pool_list, list) { + if (mp->p.pool_prefix == mpp->pool_prefix && + mp->p.pool_prefix_length == mpp->pool_prefix_length) { + map_pool_free(m, mp); + break; + } + } + m->p.pool_num -= 1; + write_unlock_bh(&m->pool_lock); + + return 0; +} + +int +map_get_addrport(struct iphdr *iph, __be32 *saddr4, __be32 *daddr4, + __be16 *sport4, __be16 *dport4, __u8 *proto, int *icmperr) +{ + u8 *ptr; + struct iphdr *icmpiph = NULL; + struct tcphdr *tcph, *icmptcph; + struct udphdr *udph, *icmpudph; + struct icmphdr *icmph, *icmpicmph; + + *icmperr = 0; + *saddr4 = iph->saddr; + *daddr4 = iph->daddr; + ptr = (u8 *)iph; + ptr += iph->ihl * 4; + switch (iph->protocol) { + case IPPROTO_TCP: + *proto = IPPROTO_TCP; + tcph = (struct tcphdr *)ptr; + *sport4 = tcph->source; + *dport4 = tcph->dest; + break; + case IPPROTO_UDP: + *proto = IPPROTO_UDP; + udph = (struct udphdr *)ptr; + *sport4 = udph->source; + *dport4 = udph->dest; + break; + case IPPROTO_ICMP: + *proto = IPPROTO_ICMP; + icmph = (struct icmphdr *)ptr; + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + *icmperr = 1; + ptr = (u8 *)icmph; + ptr += sizeof(struct icmphdr); + icmpiph = (struct iphdr*)ptr; + *saddr4 = icmpiph->saddr; + *daddr4 = icmpiph->daddr; + if (ntohs(iph->tot_len) < icmpiph->ihl * 4 + 12) { + printk(KERN_NOTICE "map_get_addrport: ???\n"); + return -1; + } + ptr += icmpiph->ihl * 4; + switch (icmpiph->protocol) { + case IPPROTO_TCP: + *proto = IPPROTO_TCP; + icmptcph = (struct tcphdr *)ptr; + *sport4 = icmptcph->source; + *dport4 = icmptcph->dest; + break; + case IPPROTO_UDP: + *proto = IPPROTO_UDP; + icmpudph = (struct udphdr *)ptr; + *sport4 = icmpudph->source; + *dport4 = icmpudph->dest; + break; + case IPPROTO_ICMP: + *proto = IPPROTO_ICMP; + icmpicmph = (struct icmphdr *)ptr; + *sport4 = icmpicmph->un.echo.id; + *dport4 = icmpicmph->un.echo.id; + break; + default: + printk(KERN_NOTICE "map_get_addrport: " + "innter icmp unknown proto(%d).\n", + icmpiph->protocol); + return -1; + } + break; + default: + *sport4 = icmph->un.echo.id; + *dport4 = icmph->un.echo.id; + } + break; + default: + printk(KERN_NOTICE "map_get_addrport: unknown proto(%d).\n", + iph->protocol); + return -1; + } + + return 0; +} + +int +map_gen_addr6(struct in6_addr *addr6, __be32 addr4, __be16 port4, + struct map_rule *mr, int trans) +{ + int i, pbw0, pbi0, pbi1; + __u32 addr[4]; + __u32 psid = 0; + __u32 mask = 0; + __u32 psid_mask; + __u32 a = ntohl(addr4); + __u16 p = ntohs(port4); + int psid_length = mr->p.ipv4_prefix_length + mr->p.psid_prefix_length + + mr->p.ea_length - 32; + + if (!mr) + return -1; + + if (psid_length < 0) + a &= 0xffffffff << (psid_length * -1); + + if (psid_length > 0) { + mask = 0xffffffff >> (32 - psid_length); + psid = (p >> (16 - psid_length - mr->p.psid_offset)) & mask; + } + + for (i = 0; i < 4; ++i) + addr[i] = ntohl(mr->p.ipv6_prefix.s6_addr32[i]); + + if (mr->p.ipv4_prefix_length < 32) { + pbw0 = mr->p.ipv6_prefix_length >> 5; + pbi0 = mr->p.ipv6_prefix_length & 0x1f; + addr[pbw0] |= (a << mr->p.ipv4_prefix_length) >> pbi0; + pbi1 = pbi0 - mr->p.ipv4_prefix_length; + if (pbi1 > 0) + addr[pbw0+1] |= a << (32 - pbi1); + } + + if ((psid_length - mr->p.psid_prefix_length) > 0) { + psid_mask = (1 << (psid_length - mr->p.psid_prefix_length)) - 1; + pbw0 = (mr->p.ipv6_prefix_length + 32 + - mr->p.ipv4_prefix_length - mr->p.psid_prefix_length) + >> 5; + pbi0 = (mr->p.ipv6_prefix_length + 32 + - mr->p.ipv4_prefix_length - mr->p.psid_prefix_length) + & 0x1f; + addr[pbw0] |= ((psid & psid_mask) << (32 - psid_length)) + >> pbi0; + pbi1 = pbi0 - (32 - psid_length); + if (pbi1 > 0) + addr[pbw0+1] |= (psid & psid_mask) << (32 - pbi1); + } + + /* XXX: */ + if (trans) { + addr[2] |= (ntohl(addr4) >> 16); + addr[3] |= (ntohl(addr4) << 16); + } else { + addr[2] |= (a >> 16); + addr[3] |= (a << 16); + } + addr[3] |= psid; + + for (i = 0; i < 4; ++i) + addr6->s6_addr32[i] = htonl(addr[i]); + + return 0; +} + +static int +map_v4v6(struct sk_buff *skb, struct map *m) +{ + struct iphdr *iph = ip_hdr(skb); + struct map_rule *mr; + __u8 forwarding_mode; + struct pcpu_tstats *tstats = this_cpu_ptr(m->dev->tstats); + int tx_bytes = skb->len; + int err = 0; + u32 mtu; + __be32 saddr4, daddr4; + __be16 sport4, dport4; + __u8 proto; + int icmperr; + int fb = 0; + int df = 0; + + if (iph->frag_off & htons(IP_DF)) + df = 1; + + /* + printk(KERN_NOTICE "map_v4v6: %s: %d.%d.%d.%d -> %d.%d.%d.%d\n", + m->dev->name, + ntohl(iph->saddr) >> 24, + (ntohl(iph->saddr) >> 16) & 0xff, + (ntohl(iph->saddr) >> 8) & 0xff, + ntohl(iph->saddr) & 0xff, + ntohl(iph->daddr) >> 24, + (ntohl(iph->daddr) >> 16) & 0xff, + (ntohl(iph->daddr) >> 8) & 0xff, + ntohl(iph->daddr) & 0xff); + */ + + if (ntohs(iph->frag_off) & IP_OFFSET) { + if (ip_defrag(skb, IP_DEFRAG_MAP46)) + return 0; + iph = ip_hdr(skb); + } + map_get_addrport(iph, &saddr4, &daddr4, &sport4, &dport4, &proto, + &icmperr); + /* + printk(KERN_NOTICE "map_v4v6: %d.%d.%d.%d:%04x -> %d.%d.%d.%d:%04x %d\n", + ntohl(saddr4) >> 24, + (ntohl(saddr4) >> 16) & 0xff, + (ntohl(saddr4) >> 8) & 0xff, + ntohl(saddr4) & 0xff, + ntohs(sport4), + ntohl(daddr4) >> 24, + (ntohl(daddr4) >> 16) & 0xff, + (ntohl(daddr4) >> 8) & 0xff, + ntohl(daddr4) & 0xff, + ntohs(dport4), + proto); + */ + if (icmperr) + mr = map_rule_find_by_ipv4addrport(m, &saddr4, &sport4, 1); + else + mr = map_rule_find_by_ipv4addrport(m, &daddr4, &dport4, 1); + if (mr) + forwarding_mode = mr->p.forwarding_mode; + else { + forwarding_mode = m->p.default_forwarding_mode; + if (m->p.role == MAP_ROLE_BR) + fb = 1; + } + + if ((forwarding_mode != MAP_FORWARDING_MODE_T) && + (forwarding_mode != MAP_FORWARDING_MODE_E)) { + printk(KERN_NOTICE "map_v4v6: unknown forwarding mode.\n"); + err = -1; + goto err; + } + + switch (forwarding_mode) { + case MAP_FORWARDING_MODE_T: + // mtu = 1280 - sizeof(struct ipv6hdr) + sizeof(struct iphdr); + mtu = m->p.ipv6_fragment_size - sizeof(struct ipv6hdr) + + sizeof(struct iphdr); + break; + case MAP_FORWARDING_MODE_E: + // mtu = 1280 - sizeof(struct ipv6hdr); + mtu = m->p.ipv6_fragment_size - sizeof(struct ipv6hdr); + break; + } + + if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { + printk(KERN_NOTICE "map_v4v6: skb->len = %d mtu = %d\n", + skb->len, mtu); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + err = -1; + goto drp; + } + + if (ip_defrag(skb, IP_DEFRAG_MAP46)) + return 0; + + switch (forwarding_mode) { + case MAP_FORWARDING_MODE_T: + err = map_trans_forward_v4v6(skb, m, mr, fb, df); + break; + case MAP_FORWARDING_MODE_E: + err = map_encap_forward_v4v6(skb, m, mr, fb); + break; + } + + if (err) { + printk(KERN_NOTICE "map_v4v6: forwarding error.\n"); + goto err; + } + + tstats->tx_packets++; + tstats->tx_bytes += tx_bytes; + + return 0; + +err: + m->dev->stats.tx_errors++; +drp: + m->dev->stats.tx_dropped++; + kfree_skb(skb); + return err; +} + +struct sk_buff * +map_defrag4(struct sk_buff *skb, struct map *m) +{ + struct ipv6hdr *ipv6h, ipv6h_orig; + struct iphdr *iph; + void *ptr; + unsigned int max_headroom; + + ipv6h = ipv6_hdr(skb); + if (ipv6h->nexthdr != IPPROTO_IPIP) { + return skb; + } + + ptr = ipv6h; + ptr += sizeof(*ipv6h); + iph = (struct iphdr *)ptr; + if (!(iph->frag_off & htons(IP_OFFSET | IP_MF))) { + return skb; + } + + memcpy(&ipv6h_orig, ipv6h, sizeof(struct ipv6hdr)); + + skb_pull(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + + if (ip_defrag(skb, IP_DEFRAG_MAP46)) { + return NULL; + } + + max_headroom = sizeof(struct ipv6hdr) + sizeof(struct frag_hdr) + 20; + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { + struct sk_buff *new_skb; + if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) { + kfree_skb(skb); + return NULL; + } + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + kfree_skb(skb); + skb = new_skb; + } + + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IPV6); + + ipv6h = ipv6_hdr(skb); + memcpy(ipv6h, &ipv6h_orig, sizeof(struct ipv6hdr)); + + return skb; +} + +static int +map_v6v4(struct sk_buff *skb, struct map *m) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + __u8 forwarding_mode; + __be32 saddr4, daddr4; + struct pcpu_tstats *tstats = this_cpu_ptr(m->dev->tstats); + int rx_bytes = skb->len; + int fb = 0; + int frag = 0; + int err = 0; + + if (ip6h->nexthdr == IPPROTO_FRAGMENT) + frag = 1; + + /* + printk(KERN_NOTICE "map_v6v4: %s: %08x %08x %08x %08x -> " + "%08x %08x %08x %08x\n", + m->dev->name, + ntohl(ip6h->saddr.s6_addr32[0]), + ntohl(ip6h->saddr.s6_addr32[1]), + ntohl(ip6h->saddr.s6_addr32[2]), + ntohl(ip6h->saddr.s6_addr32[3]), + ntohl(ip6h->daddr.s6_addr32[0]), + ntohl(ip6h->daddr.s6_addr32[1]), + ntohl(ip6h->daddr.s6_addr32[2]), + ntohl(ip6h->daddr.s6_addr32[3])); + */ + + if ((skb = map_defrag6(skb, m)) == NULL) + return 0; + + if ((skb = map_defrag4(skb, m)) == NULL) + return 0; + + read_lock(&m->rule_lock); + if (m->bmr) { + forwarding_mode = m->bmr->p.forwarding_mode; + } else { + forwarding_mode = m->p.default_forwarding_mode; + } + read_unlock(&m->rule_lock); + + switch (forwarding_mode) { + case MAP_FORWARDING_MODE_T: + err = map_trans_validate_dst(skb, m, &daddr4); + if (err) { + goto drp; + } + err = map_trans_validate_src(skb, m, &saddr4, &fb); + if (err && !fb) { + goto drp; + } + err = map_trans_forward_v6v4(skb, m, &saddr4, &daddr4, fb, frag); + if (err) { + printk(KERN_NOTICE "map_v6v4: " + "map_trans_forward_v6v4 error.\n"); + goto err; + } + break; + case MAP_FORWARDING_MODE_E: + err = map_encap_validate_dst(skb, m, &daddr4); + if (err) { + goto drp; + } + err = map_encap_validate_src(skb, m, &saddr4, &fb); + if (err && !fb) { + goto drp; + } + err = map_encap_forward_v6v4(skb, m, &saddr4, &daddr4, fb); + if (err) { + printk(KERN_NOTICE "map_v6v4: " + "map_encap_forward_v6v4 error.\n"); + goto err; + } + break; + default: + printk(KERN_NOTICE "map_v6v4: unknown forwarding mode.\n"); + err = -1; + goto err; + } + + tstats->rx_packets++; + tstats->rx_bytes += rx_bytes; + + return 0; + +err: + m->dev->stats.rx_errors++; +drp: + m->dev->stats.rx_dropped++; + kfree_skb(skb); + return err; +} + +static netdev_tx_t +map_transmit(struct sk_buff *skb, struct net_device *dev) +{ + struct map *m = netdev_priv(dev); + struct in6_addr zero_addr = {}; + struct ipv6hdr *ipv6h; + struct icmp6hdr *icmp6h; + + if (ipv6_addr_equal(&m->map_ipv6_address, &zero_addr)) + return NETDEV_TX_OK; + + switch (ntohs(skb->protocol)) { + case ETH_P_IP: + map_v4v6(skb, m); + break; + case ETH_P_IPV6: + ipv6h = ipv6_hdr(skb); + if (ipv6h->nexthdr == IPPROTO_ICMPV6) { + icmp6h = (struct icmp6hdr *)(ipv6h + 1); + if (icmp6h->icmp6_type == ICMPV6_PKT_TOOBIG) { + ipv6h = (struct ipv6hdr *)(icmp6h + 1); + rt6_pmtu_discovery(&ipv6h->daddr, + &ipv6h->saddr, skb->dev, + ntohl(icmp6h->icmp6_mtu)); + } + } + map_v6v4(skb, m); + break; + default: + printk(KERN_NOTICE "map_transmit: unknown protocol.\n"); + } + + return NETDEV_TX_OK; +} + +/* +static int +map_receive(struct sk_buff *skb) +{ + struct map_net *mapn = net_generic(dev_net(skb->dev), map_net_id); + struct map *m = NULL, *tmp; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + + printk(KERN_NOTICE "map_receive:\n"); + + if (ntohs(skb->protocol) != ETH_P_IPV6) + return 0; + + read_lock(&mapn->map_list_lock); + list_for_each_entry (tmp, &mapn->map_list, list) { + if (ipv6_addr_equal(&ip6h->daddr, &tmp->map_ipv6_address)) { + m = tmp; + break; + } + } + read_unlock(&mapn->map_list_lock); + + if (m) + map_v6v4(skb, m); + + return 0; +} + +static int +map_error(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, + int offset, __be32 info) +{ + return 0; +} +*/ + +static int +map_change_mtu(struct net_device *dev, int new_mtu) +{ + return 0; +} + +static void +map_uninit(struct net_device *dev) +{ + dev_put(dev); +} + +static int map_open(struct net_device *dev); +static int map_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); + +static const struct net_device_ops map_netdev_ops = { + .ndo_uninit = map_uninit, + .ndo_open = map_open, + .ndo_start_xmit = map_transmit, + .ndo_do_ioctl = map_ioctl, + .ndo_change_mtu = map_change_mtu, + .ndo_get_stats = map_get_stats, +}; + +/* +static struct xfrm6_tunnel map_handler __read_mostly = { + .handler = map_receive, + .err_handler = map_error, + .priority = 1, +}; +*/ + +static void +map_debug_dump(struct map_net *mapn) +{ + struct map *m; + struct map_rule *mr; + int i, j; + + i = 0; + read_lock(&mapn->map_list_lock); + list_for_each_entry (m, &mapn->map_list, list) { + printk(KERN_NOTICE "map[%d]:\n", i); + printk(KERN_NOTICE " p.name = %s\n", m->p.name); + printk(KERN_NOTICE " p.tunnel_source = %d\n", + m->p.tunnel_source); + printk(KERN_NOTICE " p.br_address = %08x %08x %08x %08x\n", + ntohl(m->p.br_address.s6_addr32[0]), + ntohl(m->p.br_address.s6_addr32[1]), + ntohl(m->p.br_address.s6_addr32[2]), + ntohl(m->p.br_address.s6_addr32[3])); + printk(KERN_NOTICE " p.br_address_length = %d\n", + m->p.br_address_length); + printk(KERN_NOTICE " p.role = %02x\n", m->p.role); + printk(KERN_NOTICE " p.default_forwarding_mode = %02x\n", + m->p.default_forwarding_mode); + printk(KERN_NOTICE " p.default_forwarding_rule = %02x\n", + m->p.default_forwarding_rule); + printk(KERN_NOTICE " p.ipv6_fragment_size = %d\n", + m->p.ipv6_fragment_size); + printk(KERN_NOTICE " p.ipv4_fragment_inner = %02x\n", + m->p.ipv4_fragment_inner); + printk(KERN_NOTICE " p.napt_always = %02x\n", + m->p.napt_always); + printk(KERN_NOTICE " p.napt_force_recycle = %02x\n", + m->p.napt_force_recycle); + printk(KERN_NOTICE " p.rule_num = %lu\n", m->p.rule_num); + printk(KERN_NOTICE " bmr = %p\n", m->bmr); + printk(KERN_NOTICE " map_ipv6_address = %08x %08x %08x %08x\n", + ntohl(m->map_ipv6_address.s6_addr32[0]), + ntohl(m->map_ipv6_address.s6_addr32[1]), + ntohl(m->map_ipv6_address.s6_addr32[2]), + ntohl(m->map_ipv6_address.s6_addr32[3])); + printk(KERN_NOTICE " map_ipv6_address_length = %d\n", + m->map_ipv6_address_length); + printk(KERN_NOTICE " laddr4 = %d.%d.%d.%d\n", + ntohl(m->laddr4) >> 24, + (ntohl(m->laddr4) >> 16) & 0xff, + (ntohl(m->laddr4) >> 8) & 0xff, + ntohl(m->laddr4) & 0xff); + printk(KERN_NOTICE " psid = 0x%04x\n", m->psid); + printk(KERN_NOTICE " psid_length = %d\n", m->psid_length); + for (j = 0; j < m->port_range_length; ++j) { + printk(KERN_NOTICE " port_range[%4d] = " + "%6d(0x%04x) - %6d(0x%04x)\n", j, + m->port_range[j].min, m->port_range[j].min, + m->port_range[j].max, m->port_range[j].max); + } + for (j = 0; j < 17; ++j) { + printk(KERN_NOTICE " psid_offset_nums[%2d] = %d\n", + j, m->psid_offset_nums[j]); + } + j = 0; + read_lock(&m->rule_lock); + list_for_each_entry (mr, &m->rule_list, list) { + /* + printk(KERN_NOTICE " map_rule[%d](%p):\n", j, mr); + printk(KERN_NOTICE " p.ipv6_prefix = " + "%08x %08x %08x %08x\n", + ntohl(mr->p.ipv6_prefix.s6_addr32[0]), + ntohl(mr->p.ipv6_prefix.s6_addr32[1]), + ntohl(mr->p.ipv6_prefix.s6_addr32[2]), + ntohl(mr->p.ipv6_prefix.s6_addr32[3])); + printk(KERN_NOTICE " p.ipv6_prefix_length = " + "%d\n", mr->p.ipv6_prefix_length); + printk(KERN_NOTICE " p.ipv4_prefix = " + "%d.%d.%d.%d\n", + ntohl(mr->p.ipv4_prefix) >> 24, + (ntohl(mr->p.ipv4_prefix) >> 16) & 0xff, + (ntohl(mr->p.ipv4_prefix) >> 8) & 0xff, + ntohl(mr->p.ipv4_prefix) & 0xff); + printk(KERN_NOTICE " p.ipv4_prefix_length = " + "%d\n", mr->p.ipv4_prefix_length); + printk(KERN_NOTICE " p.psid_prefix = " + "0x%04x\n", mr->p.psid_prefix); + printk(KERN_NOTICE " p.psid_preix_length = " + "%d\n", mr->p.psid_prefix_length); + printk(KERN_NOTICE " p.ea_length = %d\n", + mr->p.ea_length); + printk(KERN_NOTICE " p.psid_offset = %d\n", + mr->p.psid_offset); + printk(KERN_NOTICE " p.forwarding_mode = " + "%02x\n", mr->p.forwarding_mode); + printk(KERN_NOTICE " p.forwarding_rule = " + "%02x\n", mr->p.forwarding_rule); + */ + ++j; + } + // printk(KERN_NOTICE " mrtn_root_ipv6addr\n"); + // mrtree_node_dump(m->mrtn_root_ipv6addr); + // printk(KERN_NOTICE " mrtn_root_ipv4addrport\n"); + // mrtree_node_dump(m->mrtn_root_ipv4addrport); + read_unlock(&m->rule_lock); + ++i; + } + read_unlock(&mapn->map_list_lock); +} + +static void +map_destroy(struct map_net *mapn) +{ + struct map *m, *q; + LIST_HEAD(list); + + write_lock_bh(&mapn->map_list_lock); + list_for_each_entry_safe (m, q, &mapn->map_list, list) { + unregister_netdevice_queue(m->dev, &list); + } + write_unlock_bh(&mapn->map_list_lock); + + unregister_netdevice_many(&list); +} + +static inline int +map_get_psid_length(struct map_rule *mr) +{ + return (mr->p.ipv4_prefix_length + mr->p.psid_prefix_length + + mr->p.ea_length - 32); +} + +static inline __u16 +map_get_psid(struct map_rule *mr, struct in6_addr *ipv6addr) +{ + int psid_length = map_get_psid_length(mr); + __u16 psid = 0; + int pbw0, pbi0, pbi1; + __u32 d = 0; + + if (psid_length <= 0) + return 0; + + if (mr->p.psid_prefix_length > 0) + psid = mr->p.psid_prefix + << (psid_length - mr->p.psid_prefix_length); + + if (mr->p.ea_length > 0) { + pbw0 = (mr->p.ipv6_prefix_length + mr->p.ea_length + + mr->p.psid_prefix_length - psid_length) >> 5; + pbi0 = (mr->p.ipv6_prefix_length + mr->p.ea_length + + mr->p.psid_prefix_length - psid_length) & 0x1f; + d = (ntohl(ipv6addr->s6_addr32[pbw0]) << pbi0) + >> (32 - (psid_length - mr->p.psid_prefix_length)); + pbi1 = pbi0 - (32 - (psid_length - mr->p.psid_prefix_length)); + if (pbi1 > 0) + d |= ntohl(ipv6addr->s6_addr32[pbw0+1]) >> (32 - pbi1); + psid |= d; + } + + return psid; +} + +static inline __be32 +map_get_laddr4(struct map_rule *mr, struct in6_addr *ipv6addr) +{ + int psid_length = map_get_psid_length(mr); + __be32 laddr4 = mr->p.ipv4_prefix; + int pbw0, pbi0, pbi1; + __u32 d; + + if (mr->p.ipv4_prefix_length < 32) { + pbw0 = mr->p.ipv6_prefix_length >> 5; + pbi0 = mr->p.ipv6_prefix_length & 0x1f; + d = (ntohl(ipv6addr->s6_addr32[pbw0]) << pbi0) >> + mr->p.ipv4_prefix_length; + pbi1 = pbi0 - mr->p.ipv4_prefix_length; + if (pbi1 > 0) + d |= ntohl(ipv6addr->s6_addr32[pbw0+1]) >> (32 - pbi1); + laddr4 |= htonl(d); + } + + if (psid_length < 0) { + d = ntohl(laddr4); + d &= 0xffffffff << (psid_length * -1); + laddr4 = htonl(d); + } + + return laddr4; +} + +int +map_get_map_ipv6_address(struct map_rule *mr, struct in6_addr *ipv6addr, + struct in6_addr *map_ipv6_address) +{ + int psid_length = map_get_psid_length(mr); + __u32 psid = map_get_psid(mr, ipv6addr); + __u32 psid_mask; + __be32 laddr4 = map_get_laddr4(mr, ipv6addr); + int pbw0, pbi0, pbi1; + + memcpy(map_ipv6_address, &mr->p.ipv6_prefix, sizeof(*map_ipv6_address)); + + if (mr->p.ipv4_prefix_length < 32) { + pbw0 = mr->p.ipv6_prefix_length >> 5; + pbi0 = mr->p.ipv6_prefix_length & 0x1f; + map_ipv6_address->s6_addr32[pbw0] |= htonl((ntohl(laddr4) + << mr->p.ipv4_prefix_length) >> pbi0); + pbi1 = pbi0 - mr->p.ipv4_prefix_length; + if (pbi1 > 0) + map_ipv6_address->s6_addr32[pbw0+1] + |= htonl(ntohl(laddr4) << (32 - pbi1)); + } + + if ((psid_length - mr->p.psid_prefix_length) > 0) { + psid_mask = (1 << (psid_length - mr->p.psid_prefix_length)) - 1; + pbw0 = (mr->p.ipv6_prefix_length + 32 + - mr->p.ipv4_prefix_length - mr->p.psid_prefix_length) + >> 5; + pbi0 = (mr->p.ipv6_prefix_length + 32 + - mr->p.ipv4_prefix_length - mr->p.psid_prefix_length) + & 0x1f; + map_ipv6_address->s6_addr32[pbw0] |= htonl(((psid & psid_mask) + << (32 - psid_length)) >> pbi0); + pbi1 = pbi0 - (32 - psid_length); + if (pbi1 > 0) + map_ipv6_address->s6_addr32[pbw0+1] + |= htonl((psid & psid_mask) << (32 - pbi1)); + } + + map_ipv6_address->s6_addr32[2] |= htonl(ntohl(laddr4) >> 16); + map_ipv6_address->s6_addr32[3] |= htonl(ntohl(laddr4) << 16); + map_ipv6_address->s6_addr32[3] |= htonl(psid); + + /* + printk(KERN_NOTICE "* psid = 0x%02x psid_length = %d\n", psid, + psid_length); + printk(KERN_NOTICE "* laddr4 = %d.%d.%d.%d\n", + ntohl(laddr4) >> 24, + (ntohl(laddr4) >> 16) & 0xff, + (ntohl(laddr4) >> 8) & 0xff, + ntohl(laddr4) & 0xff); + printk(KERN_NOTICE "* map_ipv6_address = %08x %08x %08x %08x\n", + ntohl(map_ipv6_address->s6_addr32[0]), + ntohl(map_ipv6_address->s6_addr32[1]), + ntohl(map_ipv6_address->s6_addr32[2]), + ntohl(map_ipv6_address->s6_addr32[3])); + */ + + return 0; +} + +static void +map_route6_del(struct in6_addr *addr, int len, struct map *m) +{ + struct fib6_config cfg = {}; + struct in6_addr prefix, zero_addr = {}; + if (ipv6_addr_equal(addr, &zero_addr)) + return; + ipv6_addr_prefix(&prefix, addr, len); + cfg.fc_table = RT6_TABLE_MAIN; + cfg.fc_ifindex = m->dev->ifindex; + cfg.fc_metric = IP6_RT_PRIO_USER; + cfg.fc_dst_len = len; + cfg.fc_flags = RTF_UP; + cfg.fc_nlinfo.nl_net = dev_net(m->dev); + cfg.fc_dst = prefix; + printk(KERN_NOTICE "map_route6_del: %08x%08x%08x%08x/%d\n", + ntohl(prefix.s6_addr32[0]), + ntohl(prefix.s6_addr32[1]), + ntohl(prefix.s6_addr32[2]), + ntohl(prefix.s6_addr32[3]), + len); + ip6_route_del(&cfg); +} + +static void +map_route6_add(struct in6_addr *addr, int len, struct map *m) +{ + struct fib6_config cfg = {}; + struct in6_addr prefix, zero_addr = {}; + if (ipv6_addr_equal(addr, &zero_addr)) + return; + ipv6_addr_prefix(&prefix, addr, len); + cfg.fc_table = RT6_TABLE_MAIN; + cfg.fc_ifindex = m->dev->ifindex; + cfg.fc_metric = IP6_RT_PRIO_USER; + cfg.fc_dst_len = len; + cfg.fc_flags = RTF_UP; + cfg.fc_nlinfo.nl_net = dev_net(m->dev); + cfg.fc_dst = prefix; + printk(KERN_NOTICE "map_route6_add: %08x%08x%08x%08x/%d\n", + ntohl(prefix.s6_addr32[0]), + ntohl(prefix.s6_addr32[1]), + ntohl(prefix.s6_addr32[2]), + ntohl(prefix.s6_addr32[3]), + len); + ip6_route_add(&cfg); +} + +static int +map_update(struct map *m) +{ + struct net *net = dev_net(m->dev); + struct map_net *mapn = net_generic(net, map_net_id); + struct net_device *dev = NULL; + struct inet6_dev *idev = NULL; + struct inet6_ifaddr *ifa; + struct map_rule *mr = NULL; + struct in6_addr map_ipv6_address, orig_map_ipv6_address; + struct in6_addr first_addr = {}; + int orig_map_ipv6_address_length; + + orig_map_ipv6_address = m->map_ipv6_address; + orig_map_ipv6_address_length = m->map_ipv6_address_length; + + if (m->p.role == MAP_ROLE_CE) + dev = dev_get_by_index(net, m->p.tunnel_source); + if (dev) + idev = in6_dev_get(dev); + if (idev) { + int first = 1; + read_lock_bh(&idev->lock); + /* XXX: */ + list_for_each_entry (ifa, &idev->addr_list, if_list) { + if (first && !ifa->scope) { + first_addr = ifa->addr; + first = 0; + } + mr = map_rule_find_by_ipv6addr(m, &ifa->addr); + if (mr) { + map_get_map_ipv6_address(mr, &ifa->addr, + &map_ipv6_address); + break; + } + } + read_unlock_bh(&idev->lock); + } + if (idev) + in6_dev_put(idev); + if (dev) + dev_put(dev); + + write_lock_bh(&m->rule_lock); + if (m->p.role == MAP_ROLE_CE && mr) { + int i; + __u16 min, max, p1, p2, p3; + + int port_range_length; + struct map_napt_block *new_port_range, *old_port_range; + m->bmr = mr; + m->laddr4 = map_get_laddr4(mr, &map_ipv6_address); + m->psid = map_get_psid(mr, &map_ipv6_address); + m->psid_length = map_get_psid_length(mr); + memcpy(&m->map_ipv6_address, &map_ipv6_address, + sizeof(m->map_ipv6_address)); + if (m->bmr->p.forwarding_mode == MAP_FORWARDING_MODE_T && + m->psid_length < 0) + m->map_ipv6_address_length = 80 + + m->bmr->p.ipv4_prefix_length + m->bmr->p.ea_length; + else + m->map_ipv6_address_length = 128; + + write_lock_bh(&m->port_range_lock); + if (mr->p.psid_offset == 0) + port_range_length = 1; + else + port_range_length = (1 << mr->p.psid_offset) - 1; + old_port_range = m->port_range; + new_port_range = kmalloc(sizeof(struct map_napt_block) * + port_range_length, GFP_KERNEL); + if (new_port_range) { + if (mr->p.psid_offset == 0) { + if (m->psid_length > 0) { + p1 = m->psid << (16 - m->psid_length); + p2 = 0xffff >> m->psid_length; + min = p1; + max = p1 | p2; + new_port_range[0].min = min; + new_port_range[0].max = max; + } else { + new_port_range[0].min = 0x1000; + new_port_range[0].max = 0xffff; + } + } else { + for (i = 0; i < port_range_length; i++) { + if (m->psid_length > 0) { + p1 = (i + 1) << (16 - + mr->p.psid_offset); + p2 = m->psid << (16 - + mr->p.psid_offset - + m->psid_length); + p3 = 0xffff >> + (mr->p.psid_offset + + m->psid_length); + min = p1 | p2; + max = p1 | p2 | p3; + new_port_range[i].min = min; + new_port_range[i].max = max; + } else { + p1 = (i + 1) << (16 - + mr->p.psid_offset); + p3 = 0xffff >> + (mr->p.psid_offset); + min = p1; + max = p1 | p3; + new_port_range[i].min = min; + new_port_range[i].max = max; + } + } + } + m->port_range = new_port_range; + m->port_range_length = port_range_length; + } else { + m->port_range = NULL; + m->port_range_length = 0; + } + if (old_port_range) + kfree(old_port_range); + write_unlock_bh(&m->port_range_lock); + } else { + m->bmr = NULL; + if (m->p.role == MAP_ROLE_BR) { + memcpy(&m->map_ipv6_address, &m->p.br_address, + sizeof(m->map_ipv6_address)); + m->map_ipv6_address_length = m->p.br_address_length; + } else { + m->map_ipv6_address.s6_addr32[0] + = first_addr.s6_addr32[0]; + m->map_ipv6_address.s6_addr32[1] + = first_addr.s6_addr32[1]; + if (m->p.default_forwarding_mode == + MAP_FORWARDING_MODE_E) { + m->map_ipv6_address.s6_addr32[2] + = htonl(0x00c00000); + m->map_ipv6_address.s6_addr32[3] + = htonl(0x02000000); + m->map_ipv6_address_length = 128; + } else { + if (m->p.br_address_length > 64) { + m->map_ipv6_address.s6_addr32[2] + = htonl(0x00006464); + m->map_ipv6_address.s6_addr32[3] + = htonl(0x00000000); + m->map_ipv6_address_length = 96; + } else { + m->map_ipv6_address.s6_addr32[2] + = htonl(0x00000000); + m->map_ipv6_address.s6_addr32[3] + = htonl(0x00000000); + m->map_ipv6_address_length = 72; + } + } + } + m->laddr4 = 0; + m->psid = 0; + m->psid_length = 0; + + write_lock_bh(&m->port_range_lock); + if (m->port_range) + kfree(m->port_range); + m->port_range = kmalloc(sizeof(struct map_napt_block), GFP_KERNEL); + m->port_range[0].min = 0x1000; + m->port_range[0].max = 0xffff; + m->port_range_length = 1; + write_unlock_bh(&m->port_range_lock); + } + write_unlock_bh(&m->rule_lock); + + printk(KERN_NOTICE "map_update: begin\n"); + map_debug_dump(mapn); + printk(KERN_NOTICE "map_update: end\n"); + + if ((!ipv6_addr_equal(&orig_map_ipv6_address, &m->map_ipv6_address) || + orig_map_ipv6_address_length != m->map_ipv6_address_length)) { + map_route6_del(&orig_map_ipv6_address, + orig_map_ipv6_address_length, m); + map_route6_add(&m->map_ipv6_address, m->map_ipv6_address_length, + m); + } + + return 0; +} + +static void +map_free(struct net_device *dev) +{ + struct map *m = netdev_priv(dev); + struct map_net *mapn = net_generic(dev_net(dev), map_net_id); + struct map_rule *mr, *mrq; + struct map_pool *mp, *mpq; + struct map_defrag6_node *dn, *dn_node; + + printk(KERN_NOTICE "map_free: %s\n", m->dev->name); + + write_lock_bh(&m->defrag6_lock); + list_for_each_entry_safe (dn, dn_node, &m->defrag6_list, dn_list) { + kfree_skb(dn->skb); + } + write_unlock_bh(&m->defrag6_lock); + + write_lock_bh(&m->port_range_lock); + if (m->port_range) + kfree(m->port_range); + write_unlock_bh(&m->port_range_lock); + + write_lock_bh(&m->rule_lock); + list_for_each_entry_safe (mr, mrq, &m->rule_list, list) { + /* + list_del(&mr->list); + kfree(mr); + */ + map_rule_free(m, mr); + m->p.rule_num -= 1; + } + write_unlock_bh(&m->rule_lock); + m->p.rule_num = 0; + + write_lock_bh(&m->pool_lock); + list_for_each_entry_safe (mp, mpq, &m->pool_list, list) { + map_pool_free(m, mp); + m->p.pool_num -= 1; + } + write_unlock_bh(&m->pool_lock); + m->p.pool_num = 0; + + write_lock_bh(&mapn->map_list_lock); + list_del(&m->list); + write_unlock_bh(&mapn->map_list_lock); + + free_percpu(dev->tstats); + free_netdev(dev); +} + +static int +map_change(struct map *m, struct map_parm *p) +{ + int old_rule_num; + + old_rule_num = m->p.rule_num; + m->p = *p; + m->p.rule_num = old_rule_num; + + map_update(m); + + return 0; +} + +static int +map_init(struct net_device *dev, struct map_parm *p) +{ + int h, i; + struct map *m = netdev_priv(dev); + + if (p) + m->p = *p; + m->p.rule_num = 0; + m->p.pool_num = 0; + m->dev = dev; + INIT_LIST_HEAD(&m->rule_list); + m->mrtn_root_ipv6addr = NULL; + m->mrtn_root_ipv4addrport = NULL; + INIT_LIST_HEAD(&m->pool_list); + /* */ + rwlock_init(&m->rule_lock); + rwlock_init(&m->pool_lock); + rwlock_init(&m->port_range_lock); + + for (h = 0; h < MAP_NAPT_HASH_LOOKUP_SIZE; ++h) + INIT_HLIST_HEAD(&m->napt_hash_lup0[h]); + for (h = 0; h < MAP_NAPT_HASH_LOOKUP_SIZE; ++h) + INIT_HLIST_HEAD(&m->napt_hash_lup1[h]); + for (h = 0; h < MAP_NAPT_HASH_CREATE_SIZE; ++h) + INIT_HLIST_HEAD(&m->napt_hash_crat[h]); + INIT_LIST_HEAD(&m->napt_list); + INIT_LIST_HEAD(&m->napt_gc_list); + rwlock_init(&m->napt_lock); + m->napt_last_gc = jiffies; + + for (h = 0; h < MAP_DEFRAG6_HASH_SIZE; ++h) + INIT_HLIST_HEAD(&m->defrag6_hash[h]); + INIT_LIST_HEAD(&m->defrag6_list); + rwlock_init(&m->defrag6_lock); + m->defrag6_last_gc = jiffies; + + for (i = 0; i < 17; ++i) { + m->psid_offset_nums[i] = 0; + } + rwlock_init(&m->psid_offset_nums_lock); + + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static void +map_setup(struct net_device *dev) +{ + dev->netdev_ops = &map_netdev_ops; + dev->destructor = map_free; + dev->type = ARPHRD_TUNNEL6; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); + //dev->mtu = 1280; + dev->mtu = ETH_DATA_LEN; + dev->flags |= IFF_NOARP; + dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->priv_flags |= IFF_MAP; +} + +static struct map * +map_create(struct net *net, struct map_parm *p) +{ + struct net_device *dev; + struct map *m; + char name[IFNAMSIZ]; + int err; + struct map_net *mapn = net_generic(net, map_net_id); + + if (p->name[0]) + strlcpy(name, p->name, IFNAMSIZ); + else + sprintf(name, "map%%d"); + + dev = alloc_netdev(sizeof(*m), name, map_setup); + if (dev == NULL) + goto failed; + + dev_net_set(dev, net); + + m = netdev_priv(dev); + + err = map_init(dev, p); + if (err < 0) + goto failed_free; + + err = register_netdevice(dev); + if (err < 0) + goto failed_free; + + strcpy(m->p.name, dev->name); + + dev_hold(dev); + + write_lock_bh(&mapn->map_list_lock); + list_add_tail(&m->list, &mapn->map_list); + write_unlock_bh(&mapn->map_list_lock); + + return m; + +failed_free: + map_free(dev); +failed: + return NULL; +} + +static struct map * +map_find_or_create(struct net *net, struct map_parm *p, int create) +{ + struct map *m; + struct map_net *mapn = net_generic(net, map_net_id); + + read_lock(&mapn->map_list_lock); + list_for_each_entry (m, &mapn->map_list, list) { + if (!strncmp(m->p.name, p->name, IFNAMSIZ)) { + read_unlock(&mapn->map_list_lock); + return m; + } + } + read_unlock(&mapn->map_list_lock); + + if (!create) + return NULL; + + return map_create(net, p); +} + +static int +map_open(struct net_device *dev) +{ + int err = 0; + struct map *m = netdev_priv(dev); + if (m->map_ipv6_address_length) + map_route6_add(&m->map_ipv6_address, m->map_ipv6_address_length, + m); + return err; +} + +static int +map_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + int err = 0; + struct map_parm p; + struct map_parm *pp; + struct map_rule_parm *rpp; + struct map_pool_parm *ppp; + struct map_current_parm *cpp; + struct map_napt_block *nbp; + struct map_napt_parm *npp; + struct map_napt_node_parm *nnpp; + struct map_napt_node *nn; + unsigned int size = 0; + int i, j; + struct map *m = NULL; + struct map_rule *mr = NULL; + struct map_pool *mp = NULL; + struct net *net = dev_net(dev); + struct map_net *mapn = net_generic(net, map_net_id); + unsigned long current_time; + struct timespec timespec_max; + + switch (cmd) { + case SIOCGETMAP: + if (dev == mapn->map_fb_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, + sizeof(p))) { + err = -EFAULT; + break; + } + m = map_find_or_create(net, &p, 0); + } + if (m == NULL) + m = netdev_priv(dev); + memcpy(&p, &m->p, sizeof(p)); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + case SIOCADDMAP: + case SIOCCHGMAP: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + if (p.role != MAP_ROLE_BR && p.role != MAP_ROLE_CE) { + err = -EFAULT; + break; + } + if (p.default_forwarding_mode != MAP_FORWARDING_MODE_T && + p.default_forwarding_mode != MAP_FORWARDING_MODE_E) { + err = -EFAULT; + break; + } + if (p.default_forwarding_rule != MAP_FORWARDING_RULE_T && + p.default_forwarding_rule != MAP_FORWARDING_RULE_F) { + err = -EFAULT; + break; + } + if (p.ipv6_fragment_size < 1280) { + err = -EFAULT; + break; + } + if (p.ipv4_fragment_inner != MAP_IPV4_FRAG_INNER_T && + p.ipv4_fragment_inner != MAP_IPV4_FRAG_INNER_F) { + err = -EFAULT; + break; + } + if (p.napt_always != MAP_NAPT_ALWAYS_T && + p.napt_always != MAP_NAPT_ALWAYS_F) { + err = -EFAULT; + break; + } + if (p.napt_force_recycle != MAP_NAPT_FORCE_RECYCLE_T && + p.napt_force_recycle != MAP_NAPT_FORCE_RECYCLE_F) { + err = -EFAULT; + break; + } + /* + if (p.role == MAP_ROLE_BR && p.br_address_length > 64) { + err = -EFAULT; + break; + } + */ + if (p.br_address_length > 96) { + err = -EFAULT; + break; + } + m = map_find_or_create(net, &p, cmd == SIOCADDMAP); + if (dev != mapn->map_fb_dev && cmd == SIOCCHGMAP) { + if (m != NULL) { + if (m->dev != dev) { + err = -EEXIST; + break; + } + } else + m = netdev_priv(dev); + synchronize_net(); /* XXX: */ + err = map_change(m, &p); + netdev_state_change(dev); /* XXX: */ + } + if (m) { + if (copy_to_user(ifr->ifr_ifru.ifru_data, &m->p, + sizeof(p))) + err = -EFAULT; + } else + err = (cmd == SIOCADDMAP ? -ENOBUFS : -ENOENT); + map_update(m); + break; + case SIOCDELMAP: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (dev == mapn->map_fb_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, + sizeof(p))) { + err = -EFAULT; + break; + } + if ((m = map_find_or_create(net, &p, 0)) == NULL) { + err = -ENOENT; + break; + } + if (m->dev == mapn->map_fb_dev) { + err = -EPERM; + break; + } + dev = m->dev; + } + unregister_netdevice(dev); + map_update(m); + break; + case SIOCGETMAPRULES: + if (dev == mapn->map_fb_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, + sizeof(p))) { + err = -EFAULT; + break; + } + m = map_find_or_create(net, &p, 0); + } + if (m == NULL) + m = netdev_priv(dev); + size = sizeof(*pp) + sizeof(*rpp) * m->p.rule_num; + pp = kmalloc(size, GFP_KERNEL); + if (!pp) { + err = -EFAULT; + break; + } + *pp = m->p; + rpp = pp->rule; + read_lock(&m->rule_lock); + list_for_each_entry (mr, &m->rule_list, list) { + *rpp = mr->p; + ++rpp; + } + read_unlock(&m->rule_lock); + if (copy_to_user(ifr->ifr_ifru.ifru_data, pp, size)) + err = -EFAULT; + kfree(pp); + break; + case SIOCADDMAPRULES: + case SIOCCHGMAPRULES: + case SIOCDELMAPRULES: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + if (dev == mapn->map_fb_dev) + m = map_find_or_create(net, &p, 0); + if (m == NULL) + m = netdev_priv(dev); + size = sizeof(*pp) + sizeof(*rpp) * p.rule_num; + pp = kmalloc(size, GFP_KERNEL); + if (!pp) { + err = -EFAULT; + break; + } + if (copy_from_user(pp, ifr->ifr_ifru.ifru_data, size)) { + kfree(pp); + err = -EFAULT; + break; + } + for (i = 0; i < p.rule_num; i++) { + rpp = &pp->rule[i]; + if (cmd == SIOCADDMAPRULES) + if (map_rule_add(m, rpp) < 0) + err = -EFAULT; + if (cmd == SIOCCHGMAPRULES) + if (map_rule_change(m, rpp) < 0) + err = -EFAULT; + if (cmd == SIOCDELMAPRULES) + if (map_rule_delete(m, rpp) < 0) + err = -EFAULT; + } + kfree(pp); + map_update(m); + break; + case SIOCGETMAPCURRNUM: + case SIOCGETMAPCURR: + if (dev == mapn->map_fb_dev) { + err = -EFAULT; + break; + } + m = netdev_priv(dev); + size = sizeof(*cpp); + if (cmd == SIOCGETMAPCURR) + size += sizeof(*nbp) * m->port_range_length; + cpp = kmalloc(size, GFP_KERNEL); + if (!cpp) { + err = -EFAULT; + break; + } + if (copy_from_user(cpp, ifr->ifr_ifru.ifru_data, sizeof(*cpp))) { + kfree(cpp); + err = -EFAULT; + break; + } + if (cmd == SIOCGETMAPCURR + && cpp->port_range_length < m->port_range_length) { + size = size - m->port_range_length + + cpp->port_range_length; + } + if (m->bmr) { + cpp->has_bmr = 1; + cpp->bmrp = m->bmr->p; + } else { + cpp->has_bmr = 0; + memset(&cpp->bmrp, 0, sizeof(cpp->bmrp)); + } + cpp->map_ipv6_address = m->map_ipv6_address; + cpp->map_ipv6_address_length = m->map_ipv6_address_length; + cpp->laddr4 = m->laddr4; + cpp->psid = m->psid; + cpp->psid_length = m->psid_length; + if (cmd == SIOCGETMAPCURR) { + for (i = 0; i < cpp->port_range_length; ++i) { + cpp->port_range[i] = m->port_range[i]; + } + } + cpp->port_range_length = m->port_range_length; + if (copy_to_user(ifr->ifr_ifru.ifru_data, cpp, size)) + err = -EFAULT; + kfree(cpp); + break; + case SIOCGETMAPNAPTNUM: + case SIOCGETMAPNAPT: + if (dev == mapn->map_fb_dev) { + err = -EFAULT; + break; + } + m = netdev_priv(dev); + write_lock_bh(&m->napt_lock); + map_napt_nn_gc(m); + // write_unlock_bh(&m->napt_lock); + // read_lock(&m->napt_lock); + i = 0; + list_for_each_entry (nn, &m->napt_list, nn_list) { ++i; } + size = sizeof(*npp); + if (cmd == SIOCGETMAPNAPT) + size += sizeof(*nnpp) * i; + npp = kmalloc(size, GFP_KERNEL); + if (!npp) { + // read_unlock(&m->napt_lock); + write_unlock_bh(&m->napt_lock); + err = -EFAULT; + break; + } + if (copy_from_user(npp, ifr->ifr_ifru.ifru_data, + sizeof(*npp))) { + // read_unlock(&m->napt_lock); + write_unlock_bh(&m->napt_lock); + err = -EFAULT; + break; + } + if (cmd == SIOCGETMAPNAPT && npp->napt_node_num < i) { + size = size - i + npp->napt_node_num; + } + if (cmd == SIOCGETMAPNAPT) { + jiffies_to_timespec(ULONG_MAX, ×pec_max); + current_time = jiffies; + jiffies_to_timespec(current_time, &npp->current_time); + npp->current_time = timespec_add(npp->current_time, + timespec_max); + j = 0; + list_for_each_entry_reverse (nn, &m->napt_list, + nn_list) { + npp->napt_node[j].raddr = nn->raddr; + npp->napt_node[j].laddr = nn->laddr; + npp->napt_node[j].maddr = nn->maddr; + npp->napt_node[j].rport = nn->rport; + npp->napt_node[j].lport = nn->lport; + npp->napt_node[j].mport = nn->mport; + npp->napt_node[j].laddr6 = nn->laddr6; + npp->napt_node[j].proto = nn->proto; + npp->napt_node[j].flags = nn->flags; + // npp->napt_node[j].last_used = nn->last_used; + jiffies_to_timespec(nn->last_used, + &npp->napt_node[j].last_used); + if (nn->last_used <= current_time) + npp->napt_node[j].last_used + = timespec_add(npp->napt_node[j] + .last_used, timespec_max); + ++j; + if (j >= npp->napt_node_num) + break; + } + } + npp->napt_node_num = i; + // read_unlock(&m->napt_lock); + write_unlock_bh(&m->napt_lock); + if (copy_to_user(ifr->ifr_ifru.ifru_data, npp, size)) + err = -EFAULT; + kfree(npp); + break; + case SIOCGETMAPPOOLS: + if (dev == mapn->map_fb_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, + sizeof(p))) { + err = -EFAULT; + break; + } + m = map_find_or_create(net, &p, 0); + } + if (m == NULL) + m = netdev_priv(dev); + size = sizeof(*pp) + sizeof(*ppp) * m->p.pool_num; + pp = kmalloc(size, GFP_KERNEL); + if (!pp) { + err = -EFAULT; + break; + } + *pp = m->p; + ppp = pp->pool; + read_lock_bh(&m->pool_lock); + list_for_each_entry (mp, &m->pool_list, list) { + *ppp = mp->p; + ++ppp; + } + read_unlock_bh(&m->pool_lock); + if (copy_to_user(ifr->ifr_ifru.ifru_data, pp, size)) + err = -EFAULT; + kfree(pp); + break; + case SIOCADDMAPPOOLS: + case SIOCDELMAPPOOLS: + case SIOCCHGMAPPOOLS: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + if (dev == mapn->map_fb_dev) + m = map_find_or_create(net, &p, 0); + if (m == NULL) + m = netdev_priv(dev); + size = sizeof(*pp) + sizeof(*ppp) * p.pool_num; + pp = kmalloc(size, GFP_KERNEL); + if (!pp) { + err = -EFAULT; + break; + } + if (copy_from_user(pp, ifr->ifr_ifru.ifru_data, size)) { + kfree(pp); + err = -EFAULT; + break; + } + for (i = 0; i < p.pool_num; i++) { + ppp = &pp->pool[i]; + if (cmd == SIOCADDMAPPOOLS) + if (map_pool_add(m, ppp) < 0) + err = -EFAULT; + if (cmd == SIOCCHGMAPPOOLS) + if (map_pool_change(m, ppp) < 0) + err = -EFAULT; + if (cmd == SIOCDELMAPPOOLS) + if (map_pool_delete(m, ppp) < 0) + err = -EFAULT; + } + kfree(pp); + map_napt_debug_pool(m); /* XXX: */ + break; + default: + printk(KERN_NOTICE "map_ioctl: ???\n"); + } + + /* + printk(KERN_NOTICE "map_ioctl: begin\n"); + map_debug_dump(mapn); + printk(KERN_NOTICE "map_ioctl: end\n"); + */ + + return err; +} + +static int __net_init +map_net_init(struct net *net) +{ + struct map_net *mapn = net_generic(net, map_net_id); + struct map *m = NULL; + struct map_parm p; + int err; + + rwlock_init(&mapn->map_list_lock); + INIT_LIST_HEAD(&mapn->map_list); + + memset(&p, 0, sizeof(p)); + sprintf(p.name, "mapfb"); + + err = -ENOMEM; + mapn->map_fb_dev = alloc_netdev(sizeof(struct map), p.name, map_setup); + if (!mapn->map_fb_dev) + goto err_alloc_dev; + + dev_net_set(mapn->map_fb_dev, net); + + m = netdev_priv(mapn->map_fb_dev); + + err = map_init(mapn->map_fb_dev, &p); + if (err < 0) + goto err_register; + + err = register_netdev(mapn->map_fb_dev); + if (err < 0) + goto err_register; + + dev_hold(mapn->map_fb_dev); + + write_lock_bh(&mapn->map_list_lock); + list_add_tail(&m->list, &mapn->map_list); + write_unlock_bh(&mapn->map_list_lock); + + return 0; + +err_register: + map_free(mapn->map_fb_dev); +err_alloc_dev: + return err; +} + +static void __net_exit +map_net_exit(struct net *net) +{ + struct map_net *mapn = net_generic(net, map_net_id); + rtnl_lock(); + map_destroy(mapn); + rtnl_unlock(); +} + +static struct pernet_operations map_net_ops = { + .init = map_net_init, + .exit = map_net_exit, + .id = &map_net_id, + .size = sizeof(struct map_net), +}; + +static int +map_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = (struct net_device *)ptr; + struct net *net = dev_net(dev); + struct map_net *mapn = net_generic(net, map_net_id); + struct map *m; + + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + + read_lock(&mapn->map_list_lock); + list_for_each_entry (m, &mapn->map_list, list) { + if (m->p.tunnel_source == dev->ifindex) { + printk(KERN_NOTICE "map_netdev_event: %s\n", m->p.name); + m->p.tunnel_source = 0; + map_update(m); + } + } + read_unlock(&mapn->map_list_lock); + + return NOTIFY_DONE; +} + +static struct notifier_block map_netdev_notifier = { + .notifier_call = map_netdev_event, +}; + +static int +map_inet6addr_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; + struct net_device *dev = ifa->idev->dev; + struct net *net = dev_net(dev); + struct map_net *mapn = net_generic(net, map_net_id); + struct map *m; + + read_lock(&mapn->map_list_lock); + list_for_each_entry (m, &mapn->map_list, list) { + if (m->p.tunnel_source == dev->ifindex) { + printk(KERN_NOTICE "map_inet6addr_event: %s\n", + m->p.name); + map_update(m); + } + } + read_unlock(&mapn->map_list_lock); + + return NOTIFY_DONE; +} + +static struct notifier_block map_inet6addr_notifier = { + .notifier_call = map_inet6addr_event, +}; + +static int __init +map_module_init(void) +{ + int err; + + err = map_rule_init(); + if (err < 0) { + printk(KERN_ERR "map init: can't init rule.\n"); + goto out_rule; + } + + err = map_defrag6_init(); + if (err < 0) { + printk(KERN_ERR "map init: can't init defrag6.\n"); + goto out_defrag6; + } + + err = map_napt_init(); + if (err < 0) { + printk(KERN_ERR "map init: can't init napt.\n"); + goto out_napt; + } + + err = register_pernet_device(&map_net_ops); + if (err < 0) { + printk(KERN_ERR "map init: can't register pernet.\n"); + goto out_pernet; + } + + err = register_netdevice_notifier(&map_netdev_notifier); + if (err < 0) { + printk(KERN_ERR "map init: " + "can't register netdevice_notifier.\n"); + goto out_netdevice; + } + + err = register_inet6addr_notifier(&map_inet6addr_notifier); + if (err < 0) { + printk(KERN_ERR "map init: " + "can't register inet6addr_notifier.\n"); + goto out_inet6addr; + } + + /* + err = xfrm6_tunnel_register(&map_handler, AF_INET); + if (err < 0) { + printk(KERN_ERR "map init: can't register tunnel.\n"); + goto out_tunnel; + } + */ + + return 0; + +/* +out_tunnel: + unregister_inet6addr_notifier(&map_inet6addr_notifier); +*/ +out_inet6addr: + unregister_netdevice_notifier(&map_netdev_notifier); +out_netdevice: + unregister_pernet_device(&map_net_ops); +out_pernet: + map_napt_exit(); +out_napt: + map_defrag6_exit(); +out_defrag6: + map_rule_exit(); +out_rule: + return err; +} + +static void __exit +map_module_exit(void) +{ + /* + if (xfrm6_tunnel_deregister(&map_handler, AF_INET)) + printk(KERN_NOTICE "map close: can't deregister tunnel.\n"); + */ + if (unregister_inet6addr_notifier(&map_inet6addr_notifier)) + printk(KERN_NOTICE "map close: " + "can't deregister inet6addr_notifier.\n"); + if (unregister_netdevice_notifier(&map_netdev_notifier)) + printk(KERN_NOTICE "map close: " + "can't deregister netdevice_notifier.\n"); + unregister_pernet_device(&map_net_ops); + map_napt_exit(); + map_defrag6_exit(); + map_rule_exit(); +} + +module_init(map_module_init); +module_exit(map_module_exit); diff --git a/net/ipv6/map_napt.c b/net/ipv6/map_napt.c index e69de29..81204ae 100644 --- a/net/ipv6/map_napt.c +++ b/net/ipv6/map_napt.c @@ -0,0 +1,950 @@ +/* + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * wrote this file. As long as you retain this notice + * you can do whatever you want with this stuff. If we meet some day, and you + * think this stuff is worth it, you can buy me a beer in return Masakazu Asama + * ---------------------------------------------------------------------------- + */ +/* + * MAP A+P NAPT function + * + * Authors: + * Masakazu Asama + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct kmem_cache *nn_kmem __read_mostly; +static int nn_kmem_alloced = 0; + +int +map_napt_hairpin(struct sk_buff *skb, struct map *m, __be32 *daddrp, + __be16 *dportp, struct in6_addr *saddr6, int fb) +{ + struct iphdr *iph; + __be32 *saddrp = NULL; + __be16 *sportp = NULL; + __sum16 *checkp = NULL; + __u16 port; + __u16 mask; + __u16 psid; + __u8 psid_offset; + + if (m->p.role != MAP_ROLE_CE || !m->bmr || + (m->psid_length <= 0 && m->p.napt_always == MAP_NAPT_ALWAYS_F)) + goto out; + + read_lock(&m->rule_lock); + if (!m->bmr) { + read_unlock(&m->rule_lock); + return 0; + } + psid_offset = m->bmr->p.psid_offset; + read_unlock(&m->rule_lock); + + if (*daddrp != m->laddr4) + goto out; + + iph = ip_hdr(skb); + port = ntohs(*dportp); + + if (m->psid_length == 32) + mask = 0xffff; + else { + mask = (1 << m->psid_length) - 1; + mask <<= 16 - psid_offset - m->psid_length; + } + psid = m->psid; + psid <<= 16 - psid_offset - m->psid_length; + if ((port & mask) == psid) { + printk(KERN_NOTICE "map_napt_hairpin: hairpinning!\n"); + if (!map_napt(iph, 1, m, &saddrp, &sportp, &checkp, saddr6, fb)) { + /* XXX: */ + skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); + skb_dst_drop(skb); + nf_reset(skb); + netif_rx(skb); + } + return 1; + } + +out: + return 0; +} + +static inline __be16 +map_napt_generate_port_random(struct map *m) +{ + u32 t; + int i; + + if (!m->port_range) + return 0; + + read_lock(&m->port_range_lock); + i = random32() % m->port_range_length; + t = m->port_range[i].max - m->port_range[i].min; + if (t) + t = random32() % t; + t = m->port_range[i].min + t; + read_unlock(&m->port_range_lock); + + return htons(t); +} + +static inline __be16 +map_napt_generate_port_next(__be16 p, struct map *m) +{ + int i; + u32 t; + + if (!m->port_range) + return 0; + + t = ntohs(p) + 1; + + read_lock(&m->port_range_lock); + for (i = 0; i < m->port_range_length; i++) { + if (ntohs(p) == m->port_range[i].max) { + t = m->port_range[(i + 1) % m->port_range_length].min; + break; + } + } + read_unlock(&m->port_range_lock); + + return htons(t); +} + +static inline u32 +map_napt_nn_hash_lookup(__be32 addr, __be16 port, __u8 proto) +{ + /* XXX: atode yoku kanngaeru */ + u32 h = ntohl(addr) | ntohs(port) | proto; + h ^= (h >> 20); + h ^= (h >> 10); + h ^= (h >> 5); + h &= (MAP_NAPT_HASH_LOOKUP_SIZE - 1); + return h; +} + +static inline u32 +map_napt_nn_hash_create(__be32 addr, __u8 proto) +{ + /* XXX: atode yoku kanngaeru */ + u32 h = ntohl(addr) | proto; + h ^= (h >> 20); + h ^= (h >> 10); + h ^= (h >> 5); + h &= (MAP_NAPT_HASH_CREATE_SIZE - 1); + return h; +} + +static inline void +map_napt_nn_debug(char *h, char *f, struct map_napt_node *nn) +{ + printk(KERN_INFO "%s: proto = %d laddr6 = %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " + "laddr = %d.%d.%d.%d lport = %d maddr = %d.%d.%d.%d mport = %d " + "raddr = %d.%d.%d.%d rport = %d last_used = %lu " + "nn_kmem_alloced = %d %s\n", + h, + nn->proto, + ntohs(nn->laddr6.s6_addr16[0]), + ntohs(nn->laddr6.s6_addr16[1]), + ntohs(nn->laddr6.s6_addr16[2]), + ntohs(nn->laddr6.s6_addr16[3]), + ntohs(nn->laddr6.s6_addr16[4]), + ntohs(nn->laddr6.s6_addr16[5]), + ntohs(nn->laddr6.s6_addr16[6]), + ntohs(nn->laddr6.s6_addr16[7]), + ntohl(nn->laddr) >> 24, + (ntohl(nn->laddr) >> 16) & 0xff, + (ntohl(nn->laddr) >> 8) & 0xff, + ntohl(nn->laddr) & 0xff, + ntohs(nn->lport), + ntohl(nn->maddr) >> 24, + (ntohl(nn->maddr) >> 16) & 0xff, + (ntohl(nn->maddr) >> 8) & 0xff, + ntohl(nn->maddr) & 0xff, + ntohs(nn->mport), + ntohl(nn->raddr) >> 24, + (ntohl(nn->raddr) >> 16) & 0xff, + (ntohl(nn->raddr) >> 8) & 0xff, + ntohl(nn->raddr) & 0xff, + ntohs(nn->rport), + nn->last_used, + nn_kmem_alloced, + f); +} + +static inline int +map_napt_nn_expired(struct map_napt_node *nn) +{ + if (nn->proto == IPPROTO_TCP) + return time_is_before_jiffies(nn->last_used + + MAP_NAPT_EXPIRES_TCP); + else + return time_is_before_jiffies(nn->last_used + + MAP_NAPT_EXPIRES_OTHER); +} + +static inline void +map_napt_nn_destroy(char *f, struct map_napt_node *nn) +{ + map_napt_nn_debug("before destroy", f, nn); + hlist_del(&nn->nn_hash_lup0); + hlist_del(&nn->nn_hash_lup1); + hlist_del(&nn->nn_hash_crat); + list_del_init(&nn->nn_list); + list_del_init(&nn->nn_gc_list); + kmem_cache_free(nn_kmem, nn); + --nn_kmem_alloced; +} + +static inline int +map_napt_nn_est(__u8 flags) +{ + if ((flags & MAP_NAPT_F_EST) == MAP_NAPT_F_EST) + return 1; + return 0; +} + +static inline int +map_napt_nn_finrst(__u8 flags) +{ + if ((flags & MAP_NAPT_F_FIN) == MAP_NAPT_F_FIN) + return 1; + if ((flags & MAP_NAPT_F_RST) == MAP_NAPT_F_RST) + return 2; + return 0; +} + +void +map_napt_nn_gc(struct map *m) +{ + int ret = 0; + struct map_napt_node *nn, *nn_node; + char *reason = "???"; + unsigned long min_expires, gc_threshold; + + min_expires = (MAP_NAPT_EXPIRES_TCP < MAP_NAPT_EXPIRES_OTHER) + ? MAP_NAPT_EXPIRES_TCP + : MAP_NAPT_EXPIRES_OTHER; + gc_threshold = MAP_NAPT_GC_THRESHOLD; + + list_for_each_entry_safe (nn, nn_node, &m->napt_list, nn_list) { + if (time_is_after_jiffies(nn->last_used + min_expires)) + break; + if (map_napt_nn_expired(nn)) + map_napt_nn_destroy("exp", nn); + } + + list_for_each_entry_safe (nn, nn_node, &m->napt_gc_list, nn_gc_list) { + if (time_is_after_jiffies(nn->last_used + gc_threshold)) + break; + ret = map_napt_nn_finrst(nn->flags); + if (ret) + reason = ret ? "fin" : "rst"; + else { + if (!map_napt_nn_est(nn->flags)) + reason = "syn"; + } + map_napt_nn_destroy(reason, nn); + } + + m->napt_last_gc = jiffies; +} + +static struct map_napt_node* +map_napt_nn_create(__be32 saddr, __be16 sport, __be32 daddr, __be16 dport, + __u8 proto, struct in6_addr *saddr6, __be32 paddr, struct map *m) +{ + struct map_napt_node *nn, *nn_node; + struct hlist_node *node; + u32 hl0, hl1; + u32 h = map_napt_nn_hash_create(daddr, proto); + __be16 p = map_napt_generate_port_random(m); + __be16 origp = p; + int first = 1; + + map_napt_nn_gc(m); + + hlist_for_each_entry (nn, node, &m->napt_hash_crat[h], nn_hash_crat) + if (nn->proto == proto + && nn->raddr == daddr && nn->rport == dport + && nn->laddr == saddr && nn->lport == sport + && (!saddr6 || ipv6_addr_equal(&nn->laddr6, saddr6))) { + printk(KERN_INFO "map_napt_nn_create: " + "napt node found.\n"); + goto out; + } + + hl0 = map_napt_nn_hash_lookup(saddr, sport, proto); + hlist_for_each_entry (nn, node, &m->napt_hash_lup0[hl0], nn_hash_lup0) { + if (nn->proto == proto && nn->laddr == saddr && + nn->lport == sport && (!saddr6 || ipv6_addr_equal(&nn->laddr6, saddr6))) { + printk(KERN_NOTICE "map_napt_nn_create: " + "Endpoint-Independent Mapping:\n"); + map_napt_nn_debug("map_napt_nn_create: " + "recycle e-i mapping:", "", nn); + p = nn->mport; + origp = p; + } + } + +try_next: + if (!first && p == origp) { + printk(KERN_CRIT "map_napt_nn_create: source port for " + "%d.%d.%d.%d(%d) exhausted.\n", + ntohl(nn->raddr) >> 24, + (ntohl(nn->raddr) >> 16) & 0xff, + (ntohl(nn->raddr) >> 8) & 0xff, + ntohl(nn->raddr) & 0xff, + nn->proto); + list_for_each_entry_safe (nn, nn_node, &m->napt_list, nn_list) { + if (nn->proto == proto + && nn->raddr == daddr && nn->rport == dport) { + if (proto == IPPROTO_TCP + && m->p.napt_force_recycle + == MAP_NAPT_FORCE_RECYCLE_F + && !map_napt_nn_finrst(nn->flags) + && map_napt_nn_est(nn->flags)) + continue; + printk(KERN_CRIT "map_napt_nn_create: " + "recycle oldest.\n"); + map_napt_nn_debug("map_napt_nn_create: " + "recycle oldest:", "", nn); + p = nn->mport; + map_napt_nn_destroy("map_napt_nn_create: " + "recycle oldest:", nn); + goto recycle; + } + } + printk(KERN_CRIT "map_napt_nn_create: recycle faild.\n"); + nn = NULL; + goto out; + } + + hlist_for_each_entry (nn, node, &m->napt_hash_crat[h], nn_hash_crat) { + if (nn->proto == proto + && nn->raddr == daddr + && nn->rport == dport + && nn->mport == p) { + p = map_napt_generate_port_next(p, m); + first = 0; + goto try_next; + } + } + +recycle: + nn = kmem_cache_alloc(nn_kmem, GFP_KERNEL); + if (!nn) { + printk(KERN_INFO "map_napt_nn_create: " + "kmem_cache_alloc fail.\n"); + goto out; + } + + ++nn_kmem_alloced; + nn->proto = proto; + nn->raddr = daddr; + nn->rport = dport; + nn->laddr = saddr; + nn->lport = sport; + nn->maddr = paddr; + nn->mport = p; + if (saddr6) + memcpy(&nn->laddr6, saddr6, sizeof(*saddr6)); + else + memset(&nn->laddr6, 0, sizeof(*saddr6)); + nn->flags = 0; + nn->last_used = jiffies; + hl0 = map_napt_nn_hash_lookup(nn->laddr, nn->lport, proto); + hl1 = map_napt_nn_hash_lookup(nn->maddr, nn->mport, proto); + hlist_add_head(&nn->nn_hash_lup0, &m->napt_hash_lup0[hl0]); + hlist_add_head(&nn->nn_hash_lup1, &m->napt_hash_lup1[hl1]); + hlist_add_head(&nn->nn_hash_crat, &m->napt_hash_crat[h]); + list_add_tail(&nn->nn_list, &m->napt_list); + if (proto == IPPROTO_TCP) + list_add_tail(&nn->nn_gc_list, &m->napt_gc_list); + else + INIT_LIST_HEAD(&nn->nn_gc_list); + +out: + if (nn) { + map_napt_nn_debug("after create", "", nn); + } + + return nn; +} + +/** + * @dir: 1 = in; 0 = out; + **/ + +static struct map_napt_node* +map_napt_nn_lookup(__be32 saddr, __be16 sport, __be32 waddr, __be16 wport, + __u8 proto, struct in6_addr *saddr6, int dir, struct map *m) +{ + struct map_napt_node *nn; + struct hlist_node *node; + __be32 sa, wa; + __be16 sp, wp; + u32 h; + + /* + printk(KERN_NOTICE "map_napt_nn_lookup (%s): \n", dir ? "in" : "out"); + */ + + h = map_napt_nn_hash_lookup(saddr, sport, proto); + + if (dir) + hlist_for_each_entry (nn, node, &m->napt_hash_lup1[h], + nn_hash_lup1) { + wa = nn->raddr; wp = nn->rport; + sa = nn->maddr; sp = nn->mport; + if (nn->proto == proto + && wa == waddr && wp == wport + && sa == saddr && sp == sport + && !map_napt_nn_expired(nn)) { + /* + printk(KERN_NOTICE "map_napt_nn_lookup (%s): " + "match!\n", dir ? "in" : "out"); + */ + return nn; + } + } + else + hlist_for_each_entry (nn, node, &m->napt_hash_lup0[h], + nn_hash_lup0) { + wa = nn->raddr; wp = nn->rport; + sa = nn->laddr; sp = nn->lport; + if (nn->proto == proto + && wa == waddr && wp == wport + && sa == saddr && sp == sport + && (!saddr6 || ipv6_addr_equal(&nn->laddr6, saddr6)) + && !map_napt_nn_expired(nn)) { + /* + printk(KERN_NOTICE "map_napt_nn_lookup (%s): " + "match!\n", dir ? "in" : "out"); + */ + return nn; + } + } + + /* + printk(KERN_NOTICE "map_napt_nn_lookup (%s): miss!\n", + dir ? "in" : "out"); + */ + + return 0; +} + +static __sum16 +map_napt_update_csum(__sum16 check, __be32 oaddr, __be16 oport, + __be32 naddr, __be16 nport, __u8 proto, int nested_icmp) +{ + long csum = ntohs(check); + + if (proto == IPPROTO_UDP && csum == 0) { + return htons(csum); + } + + csum = ~csum & 0xffff; + + if (proto != IPPROTO_ICMP || nested_icmp) { + csum -= ntohl(oaddr) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum -= (ntohl(oaddr) >> 16) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + } + csum -= ntohs(oport) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + if (proto != IPPROTO_ICMP || nested_icmp) { + csum += ntohl(naddr) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum += (ntohl(naddr) >> 16) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + } + csum += ntohs(nport) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + csum = ~csum & 0xffff; + + return htons(csum); +} + +static void +map_napt_set_flags(struct map_napt_node *nn, __u8 flags, int dir) +{ + if (dir) { + if ((flags & MAP_NAPT_TCP_F_SYN) == MAP_NAPT_TCP_F_SYN) + nn->flags |= MAP_NAPT_F_I_SYN_ACK; + if ((flags & MAP_NAPT_TCP_F_FIN) == MAP_NAPT_TCP_F_FIN) + nn->flags |= MAP_NAPT_F_I_FIN; + if ((nn->flags & MAP_NAPT_F_O_FIN) == MAP_NAPT_F_O_FIN + && (flags & MAP_NAPT_TCP_F_ACK) == MAP_NAPT_TCP_F_ACK) + nn->flags |= MAP_NAPT_F_I_FIN_ACK; + } else { + if ((flags & MAP_NAPT_TCP_F_SYN) == MAP_NAPT_TCP_F_SYN) + nn->flags |= MAP_NAPT_F_O_SYN; + if ((flags & MAP_NAPT_TCP_F_ACK) == MAP_NAPT_TCP_F_ACK) + nn->flags |= MAP_NAPT_F_O_ACK; + if ((flags & MAP_NAPT_TCP_F_FIN) == MAP_NAPT_TCP_F_FIN) + nn->flags |= MAP_NAPT_F_O_FIN; + if ((nn->flags & MAP_NAPT_F_I_FIN) == MAP_NAPT_F_I_FIN + && (flags & MAP_NAPT_TCP_F_ACK) == MAP_NAPT_TCP_F_ACK) + nn->flags |= MAP_NAPT_F_O_FIN_ACK; + } + if ((flags & MAP_NAPT_TCP_F_RST) == MAP_NAPT_TCP_F_RST) + nn->flags |= MAP_NAPT_F_RST; +} + +/** + * @dir: 1 = in; 0 = out; + **/ + +static int +map_napt_update(__be32 *saddrp, __be16 *sportp, __be32 waddr, __be16 wport, + __u8 proto, struct in6_addr *saddr6, __be32 paddr, __sum16 *checkp, __be32 *icmpaddr, int dir, + __u8 flags, int nested_icmp, struct map *m) +{ + __be32 naddr = 0; + __be16 nport = 0; + struct map_napt_node *nn; + __u8 orig_flags; + + /* + printk(KERN_NOTICE "map_napt_update (%s):\n", dir ? "in" : "out"); + */ + + if (proto == IPPROTO_ICMP) + wport = 0; + + /* + printk(KERN_NOTICE "map_napt_update: saddr %d.%d.%d.%d " + "waddr %d.%d.%d.%d\n", + ntohl(*saddrp) >> 24, + (ntohl(*saddrp) >> 16) & 0xff, + (ntohl(*saddrp) >> 8) & 0xff, + ntohl(*saddrp) &0xff, + ntohl(waddr) >> 24, + (ntohl(waddr) >> 16) & 0xff, + (ntohl(waddr) >> 8) & 0xff, + ntohl(waddr) & 0xff); + printk(KERN_NOTICE "map_napt_update: sport %d wport %d\n", + ntohs(*sportp), ntohs(wport)); + */ + + write_lock_bh(&m->napt_lock); + nn = map_napt_nn_lookup(*saddrp, *sportp, waddr, wport, proto, saddr6, dir, m); + if (nn) { + orig_flags = nn->flags; + map_napt_set_flags(nn, flags, dir); + if (dir) { + naddr = nn->laddr; nport = nn->lport; + if (saddr6) + memcpy(saddr6, &nn->laddr6, sizeof(nn->laddr6)); + } else { + naddr = nn->maddr; nport = nn->mport; + } + list_move_tail(&nn->nn_list, &m->napt_list); + if (nn->proto == IPPROTO_TCP + && !map_napt_nn_est(orig_flags) && map_napt_nn_est(nn->flags) + && !map_napt_nn_finrst(nn->flags) + && nn->nn_gc_list.next != &nn->nn_gc_list) + list_del_init(&nn->nn_gc_list); + if (nn->proto == IPPROTO_TCP + && !map_napt_nn_finrst(orig_flags) + && map_napt_nn_finrst(nn->flags) + && nn->nn_gc_list.next == &nn->nn_gc_list) + list_add_tail(&nn->nn_gc_list, + &m->napt_gc_list); + if (nn->nn_gc_list.next != &nn->nn_gc_list) + list_move_tail(&nn->nn_gc_list, &m->napt_gc_list); + } else { + /* + printk(KERN_NOTICE "map_napt_update: map_napt_nn_lookup " + "return null"); + */ + if (dir) { + write_unlock_bh(&m->napt_lock); + return -1; + } + /* + if (proto == IPPROTO_TCP + && (flags & MAP_NAPT_TCP_F_SYN) != MAP_NAPT_TCP_F_SYN) { + write_unlock_bh(&m->napt_lock); + return -2; + } + */ + nn = map_napt_nn_create(*saddrp, *sportp, waddr, wport, + proto, saddr6, paddr, m); + if (!nn) { + /* + printk(KERN_NOTICE "map_napt: " + "map_napt_nn_create return null"); + */ + write_unlock_bh(&m->napt_lock); + return -3; + } + map_napt_set_flags(nn, flags, dir); + naddr = nn->maddr; nport = nn->mport; + } + nn->last_used = jiffies; + write_unlock_bh(&m->napt_lock); + + /* + printk(KERN_NOTICE "map_napt_update: saddr " + "%d.%d.%d.%d -> %d.%d.%d.%d\n", + ntohl(*saddrp) >> 24, + (ntohl(*saddrp) >> 16) & 0xff, + (ntohl(*saddrp) >> 8) & 0xff, + ntohl(*saddrp) &0xff, + ntohl(naddr) >> 24, + (ntohl(naddr) >> 16) & 0xff, + (ntohl(naddr) >> 8) & 0xff, + ntohl(naddr) & 0xff); + printk(KERN_NOTICE "map_napt_update: sport %d -> %d\n", + ntohs(*sportp), ntohs(nport)); + */ + + *checkp = map_napt_update_csum(*checkp, *saddrp, *sportp, naddr, + nport, proto, nested_icmp); + *saddrp = naddr; + *sportp = nport; + + if (icmpaddr) + *icmpaddr = naddr; + + return 0; +} + +static inline int +map_napt_first_pool(__be32 *first, struct map *m) +{ + struct map_pool *mp; + if (m->p.role == MAP_ROLE_CE) { + *first = m->laddr4; + return 0; + } + if (m->p.pool_num > 0) { + mp = list_first_entry(&m->pool_list, struct map_pool, list); + *first = mp->p.pool_prefix; + return 0; + } + return -1; +} + +static inline int +map_napt_next_pool(__be32 cur, __be32 *next, struct map *m) +{ + struct map_pool *mp; + __u32 mask; + if (m->p.role == MAP_ROLE_CE || m->p.pool_num == 0) + return -1; + read_lock_bh(&m->pool_lock); + list_for_each_entry (mp, &m->pool_list, list) { + mask = 0xffffffff << (32 - mp->p.pool_prefix_length); + if ((ntohl(cur) & mask) == ntohl(mp->p.pool_prefix)) { + if (((ntohl(cur) + 1) & mask) == ntohl(mp->p.pool_prefix)) { + *next = htonl(ntohl(cur) + 1); + read_unlock_bh(&m->pool_lock); + return 0; + } + if (!list_is_last(&mp->list, &m->pool_list)) { + mp = list_entry(mp->list.next, struct map_pool, list); + *next = mp->p.pool_prefix; + read_unlock_bh(&m->pool_lock); + return 0; + } + } + } + read_unlock_bh(&m->pool_lock); + return -1; +} + +void +map_napt_debug_pool(struct map *m) +{ + __be32 paddr; + if (!map_napt_first_pool(&paddr, m)) { + printk(KERN_NOTICE "map_napt_debug_pool: %d.%d.%d.%d\n", + ntohl(paddr) >> 24, + (ntohl(paddr) >> 16 & 0xff), + (ntohl(paddr) >> 8 & 0xff), + (ntohl(paddr) & 0xff)); + } else { + printk(KERN_NOTICE "map_napt_debug_pool: error\n"); + return; + } + while (!map_napt_next_pool(paddr, &paddr, m)) { + printk(KERN_NOTICE "map_napt_debug_pool: %d.%d.%d.%d\n", + ntohl(paddr) >> 24, + (ntohl(paddr) >> 16 & 0xff), + (ntohl(paddr) >> 8 & 0xff), + (ntohl(paddr) & 0xff)); + } +} + +static inline int +map_napt_needed(struct map *m, int fb) +{ + if (m->p.role == MAP_ROLE_BR && m->p.pool_num > 0 && fb) + return MAP_ROLE_BR; + if (m->p.role == MAP_ROLE_CE && m->bmr && + (m->psid_length > 0 || m->p.napt_always == MAP_NAPT_ALWAYS_T)) + return MAP_ROLE_CE; + return 0; +} + +/** + * @dir: 1 = in; 0 = out; + **/ + +int +map_napt(struct iphdr *iph, int dir, struct map *m, __be32 **waddrpp, + __be16 **wportpp, __sum16 **checkpp, struct in6_addr *saddr6, int fb) +{ + __be32 *saddrp = NULL; + __be16 *sportp = NULL; + __u8 proto; + __be32 *icmpaddr = NULL; + __be32 paddr; + u8 flags = 0; + u8 *ptr; + int err = 0; + struct iphdr *icmpiph = NULL; + struct tcphdr *tcph, *icmptcph; + struct udphdr *udph, *icmpudph; + struct icmphdr *icmph, *icmpicmph; + int nested_icmp = 0; + + if (dir) { + saddrp = &iph->daddr; + *waddrpp = &iph->saddr; + } else { + saddrp = &iph->saddr; + *waddrpp = &iph->daddr; + } + + ptr = (u8 *)iph; + ptr += iph->ihl * 4; + switch (iph->protocol) { + case IPPROTO_TCP: + proto = IPPROTO_TCP; + tcph = (struct tcphdr *)ptr; + if (dir) { + sportp = &tcph->dest; + *wportpp = &tcph->source; + } else { + sportp = &tcph->source; + *wportpp = &tcph->dest; + } + *checkpp = &tcph->check; + if (tcph->syn) flags |= MAP_NAPT_TCP_F_SYN; + if (tcph->ack) flags |= MAP_NAPT_TCP_F_ACK; + if (tcph->fin) flags |= MAP_NAPT_TCP_F_FIN; + if (tcph->rst) flags |= MAP_NAPT_TCP_F_RST; + break; + case IPPROTO_UDP: + proto = IPPROTO_UDP; + udph = (struct udphdr *)ptr; + if (dir) { + sportp = &udph->dest; + *wportpp = &udph->source; + } else { + sportp = &udph->source; + *wportpp = &udph->dest; + } + *checkpp = &udph->check; + break; + case IPPROTO_ICMP: + proto = IPPROTO_ICMP; + icmph = (struct icmphdr *)ptr; + *checkpp = &icmph->checksum; + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + ptr = (u8 *)icmph; + ptr += sizeof(struct icmphdr); + icmpiph = (struct iphdr*)ptr; + if (ntohs(iph->tot_len) < icmpiph->ihl * 4 + 12) { + err = -1; + printk(KERN_NOTICE "map_napt: ???\n"); + goto out; + } + if (dir) { + saddrp = &icmpiph->saddr; + *waddrpp = &icmpiph->daddr; + } else { + saddrp = &icmpiph->daddr; + *waddrpp = &icmpiph->saddr; + } + ptr += icmpiph->ihl * 4; + switch (icmpiph->protocol) { + case IPPROTO_TCP: + proto = IPPROTO_TCP; + icmptcph = (struct tcphdr *)ptr; + if (dir) { + sportp = &icmptcph->source; + *wportpp = &icmptcph->dest; + icmpaddr = &iph->daddr; + } else { + sportp = &icmptcph->dest; + *wportpp = &icmptcph->source; + icmpaddr = &iph->saddr; + } + break; + case IPPROTO_UDP: + proto = IPPROTO_UDP; + icmpudph = (struct udphdr *)ptr; + if (dir) { + sportp = &icmpudph->source; + *wportpp = &icmpudph->dest; + icmpaddr = &iph->daddr; + } else { + sportp = &icmpudph->dest; + *wportpp = &icmpudph->source; + icmpaddr = &iph->saddr; + } + break; + case IPPROTO_ICMP: + nested_icmp = 1; + proto = IPPROTO_ICMP; + icmpicmph = (struct icmphdr *)ptr; + if (dir) { + sportp = &icmpicmph->un.echo.id; + *wportpp = &icmpicmph->un.echo.id; + icmpaddr = &iph->daddr; + } else { + sportp = &icmpicmph->un.echo.id; + *wportpp = &icmpicmph->un.echo.id; + icmpaddr = &iph->saddr; + } + break; + default: + err = -1; + printk(KERN_NOTICE "map_napt: " + "unknown proto in icmp.\n"); + goto out; + } + break; + default: + sportp = &icmph->un.echo.id; + *wportpp = &icmph->un.echo.id; + break; + } + break; + default: + err = -1; + printk(KERN_NOTICE "map_napt: unknown proto.\n"); + goto out; + } + + if (saddrp && sportp && map_napt_needed(m, fb)) { + err = map_napt_first_pool(&paddr, m); + if (err) { + printk(KERN_NOTICE "map_napt: map_napt_first_pool err.\n"); + goto out; + } +retry: + err = map_napt_update(saddrp, sportp, **waddrpp, + **wportpp, proto, saddr6, paddr, *checkpp, icmpaddr, dir, + flags, nested_icmp, m); + if (err) { + if (!map_napt_next_pool(paddr, &paddr, m)) + goto retry; + printk(KERN_NOTICE "map_napt: " + "map_napt_update failed(2). " + "dir = %d err = %d\n", dir, err); + printk(KERN_NOTICE "map_napt: " + "s=%d.%d.%d.%d:%d(%04x) " + "w=%d.%d.%d.%d:%d(%04x) proto=%d\n", + ((ntohl(*saddrp) >> 24) & 0xff), + ((ntohl(*saddrp) >> 16) & 0xff), + ((ntohl(*saddrp) >> 8) & 0xff), + ((ntohl(*saddrp)) & 0xff), + ntohs(*sportp), + ntohs(*sportp), + ((ntohl(**waddrpp) >> 24) & 0xff), + ((ntohl(**waddrpp) >> 16) & 0xff), + ((ntohl(**waddrpp) >> 8) & 0xff), + ((ntohl(**waddrpp)) & 0xff), + ntohs(**wportpp), + ntohs(**wportpp), + proto); + goto out; + } + /* XXX: */ + if (icmpiph) { + __sum16 ocheck, ncheck; + long csum; + ocheck = icmpiph->check; + icmpiph->check = 0; + icmpiph->check = ip_fast_csum( + (unsigned char *)icmpiph, icmpiph->ihl); + ncheck = icmpiph->check; + csum = ntohs(**checkpp); + csum = ~csum & 0xffff; + csum -= ntohs(ocheck) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum += ntohs(ncheck) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum = ~csum & 0xffff; + **checkpp = htons(csum); + } + } + +out: + return err; +} + +int +map_napt_init(void) +{ + nn_kmem = kmem_cache_create("map_napt_node", + sizeof(struct map_napt_node), 0, SLAB_HWCACHE_ALIGN, NULL); + if (!nn_kmem) + return -1; + + return 0; +} + +void +map_napt_exit(void) +{ + kmem_cache_destroy(nn_kmem); +} diff --git a/net/ipv6/map_rule.c b/net/ipv6/map_rule.c index e69de29..744987f 100644 --- a/net/ipv6/map_rule.c +++ b/net/ipv6/map_rule.c @@ -0,0 +1,868 @@ +/* + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * wrote this file. As long as you retain this notice + * you can do whatever you want with this stuff. If we meet some day, and you + * think this stuff is worth it, you can buy me a beer in return Masakazu Asama + * ---------------------------------------------------------------------------- + */ +/* + * MAP Mapping Rule function + * + * Authors: + * Masakazu Asama + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +// #include +#include +#include +#include +#include +#include + +static struct kmem_cache *mrtn_kmem __read_mostly; +static struct kmem_cache *mr_kmem __read_mostly; + +static int +mrtree_node_init(struct mrtree_node *node, struct map_rule *mr) +{ + if (!node || !mr) + return -1; + + node->mr = mr; + node->children[0] = NULL; + node->children[1] = NULL; + node->parent = NULL; + + return 0; +} + +static int +mrtree_node_init_ipv6addr(struct mrtree_node *node, struct map_rule *mr) +{ + if (!node || !mr) + return -1; + + mrtree_node_init(node, mr); + mr->mrtn_ipv6addr = node; + node->val[0] = ntohl(mr->p.ipv6_prefix.s6_addr32[0]); + node->val[1] = ntohl(mr->p.ipv6_prefix.s6_addr32[1]); + node->val[2] = ntohl(mr->p.ipv6_prefix.s6_addr32[2]); + node->val[3] = ntohl(mr->p.ipv6_prefix.s6_addr32[3]); + node->len = mr->p.ipv6_prefix_length; + + return 0; +} + +static int +mrtree_node_init_ipv4addrport(struct mrtree_node *node, struct map_rule *mr) +{ + if (!node || !mr) + return -1; + + mrtree_node_init(node, mr); + mr->mrtn_ipv4addrport = node; + node->val[0] = ntohl(mr->p.ipv4_prefix); + node->len = mr->p.ipv4_prefix_length; + if (mr->p.ipv4_prefix_length == 32) { + node->val[1] = mr->p.psid_prefix << (32 - mr->p.psid_prefix_length); + node->len += mr->p.psid_prefix_length; + } else { + node->val[1] = 0; + } + node->val[2] = node->val[3] = 0; + + return 0; +} + +static int +mrtree_node_1st_is_contained_in_2nd(struct mrtree_node *node1, struct mrtree_node *node2) +{ + int i, pbw, pbi; + __u32 mask; + + if (!node1 || !node2) { + printk(KERN_NOTICE "mrtree_node_1st_is_contained_in_2nd: !node1 || !node2\n"); + return 0; + } + + if (node2->len < 0 || node2->len > 128) { + printk(KERN_NOTICE "mrtree_node_1st_is_contained_in_2nd: node2->len < 0 || node2->len > 128\n"); + return 0; + } + + if (node1->len < node2->len) { + /* + printk(KERN_NOTICE "mrtree_node_1st_is_contained_in_2nd: node1->len < node2->len\n"); + */ + return 0; + } + + pbw = node2->len >> 5; + pbi = node2->len & 0x1f; + for (i = 0; i < pbw; i++) + if (node1->val[i] != node2->val[i]) + return 0; + if (node2->len == 128) + return 1; + if (pbi > 0) { + mask = 0xffffffff << (32 - pbi); + if ((node1->val[pbw] & mask) != node2->val[pbw]) + return 0; + } + + return 1; +} + +static int +mrtree_node_1st_is_equal_to_2nd(struct mrtree_node *node1, struct mrtree_node *node2) +{ + int i; + + if (!node1 || !node2) { + printk(KERN_NOTICE "mrtree_node_1st_is_equal_to_2nd: !node1 || !node2\n"); + return 0; + } + + for (i = 0; i < 4; ++i) + if (node1->val[i] != node2->val[i]) + return 0; + if (node1->len != node2->len) + return 0; + + return 1; +} + +static int +mrtree_node_next_index_of_1st_for_2nd(struct mrtree_node *node1, struct mrtree_node *node2) +{ + int pbw, pbi; + __u32 mask; + + if (!node1 || !node2) { + printk(KERN_NOTICE "mrtree_node_next_index_of_1st_for_2nd: node1 or node2 is null.\n"); + return -1; + } + + if (node1->len < 0 || node1->len >= 128) { + printk(KERN_NOTICE "mrtree_node_next_index_of_1st_for_2nd: node1->len >= 128.\n"); + return -1; + } + if (node2->len <= node1->len) + return -1; + pbw = node1->len >> 5; + pbi = node1->len & 0x1f; + mask = 0x1 << (31 - pbi); + if (node2->val[pbw] & mask) + return 1; + else + return 0; +} + +static struct mrtree_node * +mrtree_node_next_of_1st_for_2nd(struct mrtree_node *node1, struct mrtree_node *node2) +{ + int index; + + if (!node1 || !node2) { + printk(KERN_NOTICE "mrtree_node_next_of_1st_for_2nd: !node1 || !node2\n"); + return NULL; + } + + index = mrtree_node_next_index_of_1st_for_2nd(node1, node2); + if (index < 0) + return NULL; + return node1->children[index]; +} + +static int +mrtree_node_same_bits_length(struct mrtree_node *node1, struct mrtree_node *node2) +{ + int i, pbw, pbi; + __u32 mask; + + if (!node1 || !node2) { + printk(KERN_NOTICE "mrtree_node_same_bits_length: !node1 || !node2\n"); + return -1; + } + + for (i = 1; i < 128; i++) { + pbw = i >> 5; + pbi = i & 0x1f; + mask = 0x0; + if (pbi) + mask = 0xffffffff << (32 - pbi); + if (pbw && !pbi) { + if (node1->val[pbw - 1] != node2->val[pbw - 1]) + return i - 1; + } else { + if ((node1->val[pbw] & mask) != (node2->val[pbw] & mask)) + return i - 1; + } + } + return 128; +} + +static int +mrtree_node_same_bits(struct mrtree_node *node1, struct mrtree_node *node2, struct mrtree_node *node) +{ + int length, i, pbw, pbi; + __u32 mask; + + if (!node1 || !node2 || !node) { + printk(KERN_NOTICE "mrtree_node_same_bits: !node1 || !node2 || !node\n"); + return -1; + } + + length = mrtree_node_same_bits_length(node1, node2); + if (length < 0 || length > 128) { + printk(KERN_NOTICE "mrtree_node_same_bits: length < 0 || length > 128\n"); + return -1; + } + pbw = length >> 5; + pbi = length & 0x1f; + for (i = 0; i < pbw; i++) + node->val[i] = node1->val[i]; + if (pbi) { + mask = 0xffffffff << (32 - pbi); + node->val[pbw] = node1->val[pbw] & mask; + } + node->len = length; + return 0; +} + +static int +mrtree_node_add(struct mrtree_node *node, struct mrtree_node **root) +{ + struct mrtree_node *cur, *tmp, *child; + int index, index2; + + if (!node || !root) { + printk(KERN_NOTICE "mrtree_node_add: !node || !root\n"); + return -1; + } + + /* *root == NULL */ + if (!*root) { + *root = node; + return 0; + } + + cur = *root; + for (;;) { + tmp = NULL; + if (mrtree_node_1st_is_contained_in_2nd(node, cur)) { + tmp = mrtree_node_next_of_1st_for_2nd(cur, node); + } else { + if (cur->parent) + cur = cur->parent; + } + if (!tmp) + break; + else + cur = tmp; + } + + /* cur == *root */ + if (cur == *root && !mrtree_node_1st_is_contained_in_2nd(node, cur)) { + tmp = kmem_cache_alloc(mrtn_kmem, GFP_KERNEL); + if (!tmp) { + printk(KERN_NOTICE "mrtree_node_add: alloc mrtree_node failed.\n"); + return -1; + } + memset(tmp, 0, sizeof(*tmp)); + mrtree_node_same_bits(*root, node, tmp); + index = mrtree_node_next_index_of_1st_for_2nd(tmp, node); + if (index < 0) { + printk(KERN_NOTICE "mrtree_node_add: index = %d\n", index); + return -1; + } + if (index) { + tmp->children[0] = *root; + tmp->children[1] = node; + } else { + tmp->children[0] = node; + tmp->children[1] = *root; + } + node->parent = tmp; + (*root)->parent = tmp; + *root = tmp; + return 0; + } + + /* cur == node */ + if (mrtree_node_1st_is_equal_to_2nd(node, cur)) { + if (cur->mr) { + printk(KERN_NOTICE "mrtree_node_add: mrtree_node_1st_is_equal_to_2nd dup?\n"); + return -1; + } + cur->mr = node->mr; + if (cur->mr->mrtn_ipv6addr == node) + cur->mr->mrtn_ipv6addr = cur; + if (cur->mr->mrtn_ipv4addrport == node) + cur->mr->mrtn_ipv4addrport = cur; + kmem_cache_free(mrtn_kmem, node); + return 0; + } + + index = mrtree_node_next_index_of_1st_for_2nd(cur, node); + if (index < 0) { + printk(KERN_NOTICE "mrtree_node_add: index = %d\n", index); + return -1; + } + + if (!cur->children[index]) { + /* child == NULL */ + cur->children[index] = node; + node->parent = cur; + } else { + /* child != NULL */ + child = cur->children[index]; + tmp = kmem_cache_alloc(mrtn_kmem, GFP_KERNEL); + if (!tmp) { + printk(KERN_NOTICE "mrtree_node_add: alloc mrtree_node failed.\n"); + return -1; + } + memset(tmp, 0, sizeof(*tmp)); + mrtree_node_same_bits(child, node, tmp); + if (tmp->len >= node->len) { + index2 = mrtree_node_next_index_of_1st_for_2nd(node, child); + if (index2 < 0) { + printk(KERN_NOTICE "mrtree_node_add: index2 = %d\n", index2); + return -1; + } + if (node->children[index2]) { + printk(KERN_NOTICE "mrtree_node_add: node->children[index2]\n"); + return -1; + } + node->children[index2] = child; + child->parent = node; + cur->children[index] = node; + node->parent = cur; + kmem_cache_free(mrtn_kmem, tmp); + return 0; + } + if (tmp->len >= child->len) { + printk(KERN_NOTICE "*** tmp->len >= child->len\n"); + } + index2 = mrtree_node_next_index_of_1st_for_2nd(tmp, node); + if (index2 < 0) { + printk(KERN_NOTICE "mrtree_node_add: index2 = %d\n", index2); + printk(KERN_NOTICE "* cur = %08x:%08x:%08x:%08x:%03d\n", + cur->val[0], cur->val[1], cur->val[2], cur->val[3], cur->len); + printk(KERN_NOTICE "* tmp = %08x:%08x:%08x:%08x:%03d\n", + tmp->val[0], tmp->val[1], tmp->val[2], tmp->val[3], tmp->len); + printk(KERN_NOTICE "* node = %08x:%08x:%08x:%08x:%03d\n", + node->val[0], node->val[1], node->val[2], node->val[3], node->len); + printk(KERN_NOTICE "* child = %08x:%08x:%08x:%08x:%03d\n", + child->val[0], child->val[1], child->val[2], child->val[3], child->len); + if (!(cur->len < node->len && node->len < child->len)) + printk(KERN_NOTICE "*** !(cur->len < node->len && node->len < child->len)\n"); + return -1; + } + if (index2) { + tmp->children[0] = child; + tmp->children[1] = node; + } else { + tmp->children[0] = node; + tmp->children[1] = child; + } + node->parent = tmp; + child->parent = tmp; + cur->children[index] = tmp; + tmp->parent = cur; + } + + return 0; +} + +static int +mrtree_node_delete(struct mrtree_node *node, struct mrtree_node **root) +{ + struct mrtree_node *parent; + + if (node->mr && node->mr->mrtn_ipv6addr == node) + node->mr->mrtn_ipv6addr = NULL; + if (node->mr && node->mr->mrtn_ipv4addrport == node) + node->mr->mrtn_ipv4addrport = NULL; + node->mr = NULL; + + if (node->children[0] && node->children[1]) { + /* + printk(KERN_NOTICE "mrtree_node_delete: node->children[0] && node->children[1]\n"); + */ + return 0; + } + + if (!node->children[0] && !node->children[1]) { + /* + printk(KERN_NOTICE "mrtree_node_delete: !node->children[0] && !node->children[1]\n"); + */ + if (!node->parent) { + *root = NULL; + } + if (node->parent && node->parent->children[0] == node) { + node->parent->children[0] = NULL; + } + if (node->parent && node->parent->children[1] == node) { + node->parent->children[1] = NULL; + } + } + + if (node->children[0]) { + /* + printk(KERN_NOTICE "mrtree_node_delete: node->children[0]\n"); + */ + if (node->parent && node->parent->children[0] == node) { + /* + printk(KERN_NOTICE "mrtree_node_delete: node->parent && node->parent->children[0] == node\n"); + */ + node->parent->children[0] = node->children[0]; + node->children[0]->parent = node->parent; + } + if (node->parent && node->parent->children[1] == node) { + /* + printk(KERN_NOTICE "mrtree_node_delete: node->parent && node->parent->children[1] == node\n"); + */ + node->parent->children[1] = node->children[0]; + node->children[0]->parent = node->parent; + } + if (!node->parent) { + /* + printk(KERN_NOTICE "mrtree_node_delete: !node->parent\n"); + */ + *root = node->children[0]; + } + } + + if (node->children[1]) { + /* + printk(KERN_NOTICE "mrtree_node_delete: node->children[1]\n"); + */ + if (node->parent && node->parent->children[0] == node) { + /* + printk(KERN_NOTICE "mrtree_node_delete: node->parent && node->parent->children[0] == node\n"); + */ + node->parent->children[0] = node->children[1]; + node->children[1]->parent = node->parent; + } + if (node->parent && node->parent->children[1] == node) { + /* + printk(KERN_NOTICE "mrtree_node_delete: node->parent && node->parent->children[1] == node\n"); + */ + node->parent->children[1] = node->children[1]; + node->children[1]->parent = node->parent; + } + if (!node->parent) { + /* + printk(KERN_NOTICE "mrtree_node_delete: !node->parent\n"); + */ + *root = node->children[1]; + } + } + + parent = node->parent; + + kmem_cache_free(mrtn_kmem, node); + + if (parent && !parent->mr) { + mrtree_node_delete(parent, root); + } + + return 0; +} + +struct map_rule * +map_rule_find_by_ipv6addr(struct map *m, struct in6_addr *ipv6addr) +{ + struct map_rule *mr = NULL; + struct mrtree_node *cur = NULL, *tmp, key; + + key.val[0] = ntohl(ipv6addr->s6_addr32[0]); + key.val[1] = ntohl(ipv6addr->s6_addr32[1]); + key.val[2] = ntohl(ipv6addr->s6_addr32[2]); + key.val[3] = ntohl(ipv6addr->s6_addr32[3]); + key.len = 128; + + if (!m->mrtn_root_ipv6addr) + return NULL; + + read_lock(&m->rule_lock); + /* + list_for_each_entry (tmp, &m->rule_list, list) { + if (ipv6_prefix_equal(&tmp->p.ipv6_prefix, ipv6addr, + tmp->p.ipv6_prefix_length)) { + if (!mr || (tmp->p.ipv6_prefix_length > + mr->p.ipv6_prefix_length)) + mr = tmp; + } + } + */ + tmp = m->mrtn_root_ipv6addr; + for (;;) { + if (tmp && mrtree_node_1st_is_contained_in_2nd(&key, tmp)) { + if (tmp->mr && (!cur || tmp->len > cur->len)) + cur = tmp; + tmp = mrtree_node_next_of_1st_for_2nd(tmp, &key); + if (!tmp) + break; + } else + break; + } + if (cur) + mr = cur->mr; + read_unlock(&m->rule_lock); + + return mr; +} + +struct map_rule * +map_rule_find_by_ipv4addrport(struct map *m, __be32* ipv4addr, __be16* port, + int fro) +{ + struct map_rule *mr = NULL; + struct mrtree_node *cur = NULL, *tmp, *tmp2, key; + // __u32 amask; + // __u16 pmask; + // int psidrp; + int i; + + if (!m->mrtn_root_ipv4addrport) + return NULL; + + key.val[0] = ntohl(*ipv4addr); + key.len = 48; + + read_lock(&m->rule_lock); + /* + list_for_each_entry (tmp, &m->rule_list, list) { + if (fro && tmp->p.forwarding_rule != MAP_FORWARDING_RULE_T) + continue; + amask = 0xffffffff << (32 - tmp->p.ipv4_prefix_length); + if ((ntohl(tmp->p.ipv4_prefix) & amask) != (ntohl(*ipv4addr) & amask)) + continue; + if (tmp->p.ipv4_prefix_length == 32 && tmp->p.psid_prefix_length > 0) { + pmask = 0xffff; + psidrp = 16 - tmp->p.psid_offset - tmp->p.psid_prefix_length; + if (tmp->p.psid_prefix_length < 16) + pmask = ((1 << tmp->p.psid_prefix_length) - 1) << psidrp; + if ((ntohs(*port) & pmask) == (tmp->p.psid_prefix << psidrp)) + if (!mr || (tmp->p.psid_prefix_length > mr->p.psid_prefix_length)) + mr = tmp; + } else { + if (!mr || (tmp->p.ipv4_prefix_length > mr->p.ipv4_prefix_length)) + mr = tmp; + } + } + */ + tmp = m->mrtn_root_ipv4addrport; + for (;;) { + if (tmp && mrtree_node_1st_is_contained_in_2nd(&key, tmp)) { + if (tmp->mr && (!cur || tmp->len > cur->len)) + if (!fro || tmp->mr->p.forwarding_rule == MAP_FORWARDING_RULE_T) + cur = tmp; + tmp = mrtree_node_next_of_1st_for_2nd(tmp, &key); + if (!tmp) + break; + } else + break; + if (tmp->len > 32) { + if (tmp->parent) + tmp = tmp->parent; + break; + } + } + for (i = 0; i < 17; ++i) { + if (m->psid_offset_nums[i] == 0) + continue; + key.val[1] = ((__u32)ntohs(*port)) << (16 + i); + tmp2 = tmp; + for (;;) { + if (tmp2 && mrtree_node_1st_is_contained_in_2nd(&key, tmp2)) { + if (tmp2->mr && (!cur || tmp2->len > cur->len)) + if (!fro || tmp2->mr->p.forwarding_rule == MAP_FORWARDING_RULE_T) + cur = tmp2; + tmp2 = mrtree_node_next_of_1st_for_2nd(tmp2, &key); + if (!tmp2) + break; + } else + break; + } + } + if (cur) + mr = cur->mr; + read_unlock(&m->rule_lock); + + return mr; +} + +void +mrtree_node_print(struct mrtree_node *node, int indent) +{ + int i; + char head[24], foot[8]; + + if (!node) + return; + + /* + for(i = 0; i < indent; ++i) printk(KERN_NOTICE " "); + printk(KERN_NOTICE "X"); + for(i = 0; i < (20-indent); ++i) printk(KERN_NOTICE " "); + */ + + for (i = 0; i < 24; ++i) head[i] = ' '; + i = indent; + if (i > 22) { + head[22] = '-'; + } else { + head[i] = '*'; + } + head[23] = '\0'; + + for (i = 0; i < 8; ++i) foot[i] = ' '; + if (node->parent) { + if (node->parent->len >= node->len) + foot[0] = '1'; + if (node->parent->children[0] != node && node->parent->children[1] != node) + foot[1] = '2'; + } + if (node->children[0]) { + if (node->children[0]->len <= node->len) + foot[2] = '3'; + if (node->children[0]->parent != node) + foot[3] = '4'; + } + if (node->children[1]) { + if (node->children[1]->len <= node->len) + foot[4] = '5'; + if (node->children[1]->parent != node) + foot[5] = '6'; + } + foot[7] = '\0'; + + printk(KERN_NOTICE "%s 0x%08x 0x%08x 0x%08x %08x:%08x:%08x:%08x:%03d 0x%08x 0x%08x %s\n", + head, + (__u32)node, + (__u32)node->children[0], + (__u32)node->children[1], + node->val[0], + node->val[1], + node->val[2], + node->val[3], + node->len, + (__u32)node->mr, + (__u32)node->parent, + foot + ); + + mrtree_node_print(node->children[0], indent + 1); + mrtree_node_print(node->children[1], indent + 1); +} + +void +mrtree_node_dump(struct mrtree_node *root) +{ + printk(KERN_NOTICE " NODE CHILD[0] CHILD[1] VAL[0] VAL[1] VAL[2] VAL[3] LEN MR PARENT\n"); + if (root) + mrtree_node_print(root, 0); +} + +static struct map_rule * +map_rule_find(struct map *m, struct map_rule_parm *mrp) +{ + struct map_rule *mr = NULL, *tmp; + + read_lock(&m->rule_lock); + list_for_each_entry (tmp, &m->rule_list, list) { + if (!strncmp((void *)&tmp->p.ipv6_prefix, + (void *)&mrp->ipv6_prefix, sizeof(struct in6_addr)) && + tmp->p.ipv4_prefix == mrp->ipv4_prefix && + tmp->p.psid_prefix == mrp->psid_prefix && + tmp->p.ipv6_prefix_length == mrp->ipv6_prefix_length && + tmp->p.ipv4_prefix_length == mrp->ipv4_prefix_length && + tmp->p.psid_prefix_length == mrp->psid_prefix_length) { + mr = tmp; + break; + } + } + read_unlock(&m->rule_lock); + + return mr; +} + +static struct map_rule * +map_rule_find_loose(struct map *m, struct map_rule_parm *mrp) +{ + struct map_rule *mr = NULL, *tmp; + + read_lock(&m->rule_lock); + list_for_each_entry (tmp, &m->rule_list, list) { + if ((!strncmp((void *)&tmp->p.ipv6_prefix, + (void *)&mrp->ipv6_prefix, sizeof(struct in6_addr)) && + tmp->p.ipv6_prefix_length == mrp->ipv6_prefix_length) || + (tmp->p.ipv4_prefix == mrp->ipv4_prefix && + tmp->p.ipv4_prefix_length == mrp->ipv4_prefix_length && + tmp->p.psid_prefix == mrp->psid_prefix && + tmp->p.psid_prefix_length == mrp->psid_prefix_length)) { + mr = tmp; + break; + } + } + read_unlock(&m->rule_lock); + + return mr; +} + +int +map_rule_free(struct map *m, struct map_rule *mr) +{ + /* XXX: */ + mrtree_node_delete(mr->mrtn_ipv4addrport, &m->mrtn_root_ipv4addrport); + mrtree_node_delete(mr->mrtn_ipv6addr, &m->mrtn_root_ipv6addr); + list_del(&mr->list); + kmem_cache_free(mr_kmem, mr); + return 0; +} + +int +map_rule_add(struct map *m, struct map_rule_parm *mrp) +{ + struct map_rule *mr; + struct mrtree_node *mrtn_ipv6addr, *mrtn_ipv4addrport; + + mr = map_rule_find_loose(m, mrp); + if (mr) + return -1; + + if (mrp->forwarding_mode != MAP_FORWARDING_MODE_T && + mrp->forwarding_mode != MAP_FORWARDING_MODE_E) + return -1; + + if (mrp->forwarding_rule != MAP_FORWARDING_RULE_T && + mrp->forwarding_rule != MAP_FORWARDING_RULE_F) + return -1; + + mr = kmem_cache_alloc(mr_kmem, GFP_KERNEL); + if (!mr) + goto mr_err; + mr->p = *mrp; + + mrtn_ipv6addr = kmem_cache_alloc(mrtn_kmem, GFP_KERNEL); + if (!mrtn_ipv6addr) + goto mrtn_ipv6addr_err; + memset(mrtn_ipv6addr, 0, sizeof(*mrtn_ipv6addr)); + + mrtn_ipv4addrport = kmem_cache_alloc(mrtn_kmem, GFP_KERNEL); + if (!mrtn_ipv4addrport) + goto mrtn_ipv4addrport_err; + memset(mrtn_ipv4addrport, 0, sizeof(*mrtn_ipv4addrport)); + + mrtree_node_init_ipv6addr(mrtn_ipv6addr, mr); + mrtree_node_init_ipv4addrport(mrtn_ipv4addrport, mr); + + write_lock_bh(&m->psid_offset_nums_lock); + m->psid_offset_nums[mr->p.psid_offset]++; + write_unlock_bh(&m->psid_offset_nums_lock); + + write_lock_bh(&m->rule_lock); + list_add_tail(&mr->list, &m->rule_list); + mrtree_node_add(mrtn_ipv6addr, &m->mrtn_root_ipv6addr); + mrtree_node_add(mrtn_ipv4addrport, &m->mrtn_root_ipv4addrport); + m->p.rule_num += 1; + write_unlock_bh(&m->rule_lock); + + return 0; + +mrtn_ipv4addrport_err: + kmem_cache_free(mrtn_kmem, mrtn_ipv6addr); +mrtn_ipv6addr_err: + kmem_cache_free(mr_kmem, mr); +mr_err: + printk(KERN_NOTICE "map_rule_add: alloc failed.\n"); + return -1; +} + +int +map_rule_change(struct map *m, struct map_rule_parm *mrp) +{ + struct map_rule *mr = map_rule_find(m, mrp); + + if (!mr) + return -1; + + mr->p.ea_length = mrp->ea_length; + mr->p.psid_offset = mrp->psid_offset; + mr->p.forwarding_mode = mrp->forwarding_mode; + mr->p.forwarding_rule = mrp->forwarding_rule; + + return 0; +} + +int +map_rule_delete(struct map *m, struct map_rule_parm *mrp) +{ + struct map_rule *mr = map_rule_find(m, mrp); + + if (!mr) + return -1; + + write_lock_bh(&m->psid_offset_nums_lock); + m->psid_offset_nums[mr->p.psid_offset]--; + write_unlock_bh(&m->psid_offset_nums_lock); + + write_lock_bh(&m->rule_lock); + if (m->bmr == mr) + m->bmr = NULL; + map_rule_free(m, mr); + m->p.rule_num -= 1; + write_unlock_bh(&m->rule_lock); + + return 0; +} + +int +map_rule_init(void) +{ + mrtn_kmem = kmem_cache_create("mrtree_node", sizeof(struct mrtree_node), 0, SLAB_HWCACHE_ALIGN, NULL); + if (!mrtn_kmem) + return -1; + mr_kmem = kmem_cache_create("map_rule", sizeof(struct map_rule), 0, SLAB_HWCACHE_ALIGN, NULL); + if (!mr_kmem) + return -1; + return 0; +} + +void +map_rule_exit(void) +{ + kmem_cache_destroy(mr_kmem); + kmem_cache_destroy(mrtn_kmem); +} diff --git a/net/ipv6/map_trans.c b/net/ipv6/map_trans.c index e69de29..7aeb881 100644 --- a/net/ipv6/map_trans.c +++ b/net/ipv6/map_trans.c @@ -0,0 +1,1346 @@ +/* + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * wrote this file. As long as you retain this notice + * you can do whatever you want with this stuff. If we meet some day, and you + * think this stuff is worth it, you can buy me a beer in return Masakazu Asama + * ---------------------------------------------------------------------------- + */ +/* + * MAP-T function + * + * Authors: + * Masakazu Asama + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static inline __sum16 +map_trans_update_csum_v6v4(__sum16 check, struct in6_addr *osaddr, + struct in6_addr *odaddr, __be32 nsaddr, __be32 ndaddr) +{ + long csum = ntohs(check); + int i; + + csum = ~csum & 0xffff; + + for (i = 0; i < 4; ++i) { + csum -= ntohl(osaddr->s6_addr32[i]) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum -= (ntohl(osaddr->s6_addr32[i]) >> 16) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + csum -= ntohl(odaddr->s6_addr32[i]) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum -= (ntohl(odaddr->s6_addr32[i]) >> 16) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + } + + csum += ntohl(nsaddr) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum += (ntohl(nsaddr) >> 16) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + csum += ntohl(ndaddr) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum += (ntohl(ndaddr) >> 16) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + csum = ~csum & 0xffff; + + return htons(csum); +} + +static inline __sum16 +map_trans_update_csum_v4v6(__sum16 check, __be32 osaddr, __be32 odaddr, + struct in6_addr *nsaddr, struct in6_addr *ndaddr) +{ + long csum = ntohs(check); + int i; + + csum = ~csum & 0xffff; + + csum -= ntohl(osaddr) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum -= (ntohl(osaddr) >> 16) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + csum -= ntohl(odaddr) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum -= (ntohl(odaddr) >> 16) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + for (i = 0; i < 4; ++i) { + csum += ntohl(nsaddr->s6_addr32[i]) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum += (ntohl(nsaddr->s6_addr32[i]) >> 16) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + csum += ntohl(ndaddr->s6_addr32[i]) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum += (ntohl(ndaddr->s6_addr32[i]) >> 16) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + } + + csum = ~csum & 0xffff; + + return htons(csum); +} + +static inline int +map_trans_icmp_typecode_v6v4(__u8 otype, __u8 ocode, __u8 *ntype, __u8 *ncode) +{ + switch (otype) { + case ICMPV6_ECHO_REQUEST: + *ntype = ICMP_ECHO; + break; + case ICMPV6_ECHO_REPLY: + *ntype = ICMP_ECHOREPLY; + break; + case ICMPV6_MGM_QUERY: + case ICMPV6_MGM_REPORT: + case ICMPV6_MGM_REDUCTION: + return -1; + case ICMPV6_DEST_UNREACH: + /* XXX: */ + *ntype = ICMP_DEST_UNREACH; + switch (ocode) { + case ICMPV6_NOROUTE: + case ICMPV6_NOT_NEIGHBOUR: + case ICMPV6_ADDR_UNREACH: + *ncode = ICMP_HOST_UNREACH; + break; + case ICMPV6_ADM_PROHIBITED: + *ncode = ICMP_HOST_ANO; + break; + case ICMPV6_PORT_UNREACH: + *ncode = ICMP_PORT_UNREACH; + break; + default: + return -1; + } + break; + case ICMPV6_PKT_TOOBIG: + /* XXX: */ + *ntype = ICMP_DEST_UNREACH; + *ncode = ICMP_FRAG_NEEDED; + break; + case ICMPV6_TIME_EXCEED: + /* XXX: */ + *ntype = ICMP_TIME_EXCEEDED; + break; + case ICMPV6_PARAMPROB: + /* XXX: */ + switch (ocode) { + case ICMPV6_HDR_FIELD: + *ntype = ICMP_PARAMETERPROB; + *ncode = 0; + break; + case ICMPV6_UNK_NEXTHDR: + *ntype = ICMP_DEST_UNREACH; + *ncode = ICMP_PROT_UNREACH; + break; + case ICMPV6_UNK_OPTION: + default: + return -1; + } + break; + default: + return -1; + } + return 0; +} + +static inline int +map_trans_icmp_typecode_v4v6(__u8 otype, __u8 ocode, __u8 *ntype, __u8 *ncode) +{ + switch (otype) { + case ICMP_ECHO: + *ntype = ICMPV6_ECHO_REQUEST; + break; + case ICMP_ECHOREPLY: + *ntype = ICMPV6_ECHO_REPLY; + break; + case ICMP_INFO_REQUEST: + case ICMP_INFO_REPLY: + case ICMP_TIMESTAMP: + case ICMP_TIMESTAMPREPLY: + case ICMP_ADDRESS: + case ICMP_ADDRESSREPLY: + return -1; + case ICMP_DEST_UNREACH: + /* XXX: */ + *ntype = ICMPV6_DEST_UNREACH; + switch (ocode) { + case ICMP_NET_UNREACH: + case ICMP_HOST_UNREACH: + *ncode = ICMPV6_NOROUTE; + break; + case ICMP_PROT_UNREACH: + *ntype = ICMPV6_PARAMPROB; + *ncode = ICMPV6_UNK_NEXTHDR; + /* XXX: */ + break; + case ICMP_PORT_UNREACH: + *ncode = ICMPV6_PORT_UNREACH; + break; + case ICMP_FRAG_NEEDED: + *ntype = ICMPV6_PKT_TOOBIG; + *ncode = 0; + break; + case ICMP_SR_FAILED: + *ncode = ICMPV6_NOROUTE; + break; + case ICMP_NET_UNKNOWN: + case ICMP_HOST_UNKNOWN: + case ICMP_HOST_ISOLATED: + *ncode = ICMPV6_NOROUTE; + break; + case ICMP_NET_ANO: + case ICMP_HOST_ANO: + *ncode = ICMPV6_ADM_PROHIBITED; + break; + case ICMP_NET_UNR_TOS: + /* XXX: */ + *ncode = ICMPV6_NOROUTE; + break; + case ICMP_PKT_FILTERED: + *ncode = ICMPV6_ADM_PROHIBITED; + break; + case ICMP_PREC_VIOLATION: + return -1; + case ICMP_PREC_CUTOFF: + *ncode = ICMPV6_ADM_PROHIBITED; + break; + default: + return -1; + } + break; + case ICMP_REDIRECT: + case ICMP_SOURCE_QUENCH: + return -1; + case ICMP_TIME_EXCEEDED: + *ntype = ICMPV6_TIME_EXCEED; + break; + case ICMP_PARAMETERPROB: + /* XXX: */ + switch (ocode) { + default: + return -1; + } + break; + default: + return -1; + } + return 0; +} + +static inline __sum16 +map_trans_icmp_csum_v6v4(__sum16 check, __u8 otype, __u8 ocode, __u8 ntype, + __u8 ncode, __u8 otypein, __u8 ocodein, __sum16 ocsumin, __u8 ntypein, + __u8 ncodein, __sum16 ncsumin, struct in6_addr *saddr, + struct in6_addr *daddr, __be16 payload_len, int iphlen, + struct iphdr *iph, int ipv6hlen, struct ipv6hdr *ipv6h) +{ + long csum = ntohs(check); + __be32 *sumtmp; + u32 t; + int i; + + csum = ~csum & 0xffff; + + t = otype; t <<= 8; t |= ocode; + csum -= t & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + t = ntype; t <<= 8; t |= ncode; + csum += t & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + t = otypein; t <<= 8; t |= ocodein; + csum -= t & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + t = ntypein; t <<= 8; t |= ncodein; + csum += t & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + csum -= ntohs(ocsumin) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + csum += ntohs(ncsumin) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + for (i = 0; i < 4; ++i) { + csum -= ntohl(saddr->s6_addr32[i]) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum -= (ntohl(saddr->s6_addr32[i]) >> 16) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum -= ntohl(daddr->s6_addr32[i]) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum -= (ntohl(daddr->s6_addr32[i]) >> 16) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + } + + csum -= ntohs(payload_len) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + t = IPPROTO_ICMPV6; + csum -= t & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + if (ipv6hlen) { + sumtmp = (__be32 *)ipv6h; + for (i = 0; i < (ipv6hlen / 4); ++i) { + csum -= ntohl(sumtmp[i]) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum -= (ntohl(sumtmp[i]) >> 16) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + } + } + + if (iphlen) { + sumtmp = (__be32 *)iph; + for (i = 0; i < (iphlen / 4); ++i) { + csum += ntohl(sumtmp[i]) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum += (ntohl(sumtmp[i]) >> 16) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + } + } + + csum = ~csum & 0xffff; + + return htons(csum); +} + +static inline __sum16 +map_trans_icmp_csum_v4v6(__sum16 check, __u8 otype, __u8 ocode, __u8 ntype, + __u8 ncode, __u8 otypein, __u8 ocodein, __sum16 ocsumin, __u8 ntypein, + __u8 ncodein, __sum16 ncsumin, struct in6_addr *saddr, + struct in6_addr *daddr, __be16 payload_len, int iphlen, + struct iphdr *iph, int ipv6hlen, struct ipv6hdr *ipv6h) +{ + long csum = ntohs(check); + __be32 *sumtmp; + u32 t; + int i; + + csum = ~csum & 0xffff; + + t = otype; t <<= 8; t |= ocode; + csum -= t & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + t = ntype; t <<= 8; t |= ncode; + csum += t & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + t = otypein; t <<= 8; t |= ocodein; + csum -= t & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + t = ntypein; t <<= 8; t |= ncodein; + csum += t & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + csum -= ntohs(ocsumin) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + + csum += ntohs(ncsumin) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + for (i = 0; i < 4; ++i) { + csum += ntohl(saddr->s6_addr32[i]) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum += (ntohl(saddr->s6_addr32[i]) >> 16) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum += ntohl(daddr->s6_addr32[i]) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum += (ntohl(daddr->s6_addr32[i]) >> 16) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + } + + csum += ntohs(payload_len) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + t = IPPROTO_ICMPV6; + csum += t & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + + if (iphlen) { + sumtmp = (__be32 *)iph; + for (i = 0; i < (iphlen / 4); ++i) { + csum -= ntohl(sumtmp[i]) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + csum -= (ntohl(sumtmp[i]) >> 16) & 0xffff; + if (csum <= 0) { --csum; csum &= 0xffff; } + } + } + + if (ipv6hlen) { + sumtmp = (__be32 *)ipv6h; + for (i = 0; i < (ipv6hlen / 4); ++i) { + csum += ntohl(sumtmp[i]) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + csum += (ntohl(sumtmp[i]) >> 16) & 0xffff; + if (csum & 0x10000) { ++csum; csum &= 0xffff; } + } + } + + csum = ~csum & 0xffff; + + return htons(csum); +} + +static int +map_trans_icmp_v6v4(struct sk_buff **skb, struct icmphdr *icmph, + struct in6_addr *saddr, struct in6_addr *daddr, __be16 *payload_len, + struct map *m) +{ + __u8 otype = 0, ocode = 0; + __u8 ntype = 0, ncode = 0; + __u8 otypein = 0, ocodein = 0; + __u8 ntypein = 0, ncodein = 0; + __sum16 ocsumin = 0, ncsumin = 0; + struct icmphdr *icmpinh; + __sum16 check = icmph->checksum; + u8 *buf = NULL; + struct iphdr *iph = NULL; + struct ipv6hdr *ipv6h = NULL; + struct frag_hdr *fragh; + u8 *ptr, *datas, *datad; + int len, ipv6hlen = 0, iphlen = 0; + __u8 nexthdr; + __be16 orig_payload_len = *payload_len; + struct map_rule *mr = NULL; + struct in6_addr tmpsaddr6 = {}, tmpdaddr6 = {}; + int ret; + + otype = icmph->type; ocode = icmph->code; + ret = map_trans_icmp_typecode_v6v4(otype, ocode, &ntype, &ncode); + icmph->type = ntype; icmph->code = ncode; + if (ret) + return ret; + + switch (otype) { + case ICMPV6_DEST_UNREACH: + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + case ICMPV6_PARAMPROB: + ptr = (u8 *)icmph; + ptr += sizeof(struct icmp6hdr); + len = skb_tail_pointer(*skb) - ptr; + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + printk(KERN_NOTICE "map_trans_icmp_v6v4: " + "buf malloc failed.\n"); + return -1; + } + memcpy(buf, ptr, len); + + iph = (struct iphdr*)ptr; + datad = (u8 *)iph; + datad += sizeof(struct iphdr); + iphlen = sizeof(struct iphdr); + + iph->version = 4; + iph->ihl = 5; + iph->tos = 0; + + ipv6h = (struct ipv6hdr *)buf; + datas = (u8 *)ipv6h; + datas += sizeof(struct ipv6hdr); + len -= sizeof(struct ipv6hdr); + ipv6hlen = sizeof(struct ipv6hdr); + nexthdr = ipv6h->nexthdr; + if (ipv6h->nexthdr == IPPROTO_FRAGMENT) { + fragh = (struct frag_hdr *)datas; + datas += sizeof(struct frag_hdr); + len -= sizeof(struct frag_hdr); + ipv6hlen += sizeof(struct frag_hdr); + nexthdr = fragh->nexthdr; + + iph->tot_len = htons(ntohs(ipv6h->payload_len) - 8 + + sizeof(struct iphdr)); + iph->id = htons(ntohl(fragh->identification) & 0xffff); + iph->frag_off = htons(ntohs(fragh->frag_off) & 0xfff8); + if (ntohs(fragh->frag_off) & IP6_MF) + iph->frag_off |= htons(IP_MF); + } else { + iph->tot_len = htons(ntohs(ipv6h->payload_len) + + sizeof(struct iphdr)); + iph->id = 0; + iph->frag_off = htons(IP_DF); + } + iph->ttl = ipv6h->hop_limit; + mr = map_rule_find_by_ipv6addr(m, &ipv6h->saddr); + if (mr) + map_get_map_ipv6_address(mr, &ipv6h->saddr, &tmpsaddr6); + if (ipv6_addr_equal(&ipv6h->saddr, &tmpsaddr6)) { + iph->saddr = htonl(ntohl(ipv6h->saddr.s6_addr32[2]) << 16); + iph->saddr |= htonl(ntohl(ipv6h->saddr.s6_addr32[3]) >> 16); + } else { + if (m->p.br_address_length > 64) + iph->saddr = ipv6h->saddr.s6_addr32[3]; + else { + iph->saddr = htonl(ntohl(ipv6h->saddr.s6_addr32[2]) << 8); + iph->saddr |= htonl(ntohl(ipv6h->saddr.s6_addr32[3]) >> 24); + } + } + mr = map_rule_find_by_ipv6addr(m, &ipv6h->daddr); + if (mr) + map_get_map_ipv6_address(mr, &ipv6h->daddr, &tmpdaddr6); + if (ipv6_addr_equal(&ipv6h->daddr, &tmpdaddr6)) { + iph->daddr = htonl(ntohl(ipv6h->daddr.s6_addr32[2]) << 16); + iph->daddr |= htonl(ntohl(ipv6h->daddr.s6_addr32[3]) >> 16); + } else { + if (m->p.br_address_length > 64) + iph->daddr = ipv6h->daddr.s6_addr32[3]; + else { + iph->daddr = htonl(ntohl(ipv6h->daddr.s6_addr32[2]) << 8); + iph->daddr |= htonl(ntohl(ipv6h->daddr.s6_addr32[3]) >> 24); + } + } + memcpy(datad, datas, len); + skb_trim(*skb, (*skb)->len - ipv6hlen + iphlen); + *payload_len = htons(ntohs(*payload_len) - ipv6hlen + iphlen); + if (nexthdr == IPPROTO_ICMPV6) { + iph->protocol = IPPROTO_ICMP; + ptr = (u8 *)iph; + ptr += iph->ihl * 4; + icmpinh = (struct icmphdr *)ptr; + otypein = icmpinh->type; + ocodein = icmpinh->code; + ocsumin = icmpinh->checksum; + ret = map_trans_icmp_v6v4(skb, icmpinh, &ipv6h->saddr, + &ipv6h->daddr, &ipv6h->payload_len, m); + ntypein = icmpinh->type; + ncodein = icmpinh->code; + ncsumin = icmpinh->checksum; + /* + printk(KERN_NOTICE "map_trans_icmp_v4v6: " + "otin:%d ocin:%d ntin:%d ncin:%d\n", + otypein, ocodein, ntypein, ncodein); + */ + if (ret) { + printk(KERN_NOTICE "map_trans_icmp_v6v4: " + "innter func err.\n"); + kfree(buf); + return ret; + } + } else + iph->protocol = nexthdr; + } + + icmph->checksum = map_trans_icmp_csum_v6v4(check, otype, ocode, ntype, + ncode, otypein, ocodein, ocsumin, ntypein, ncodein, ncsumin, + saddr, daddr, orig_payload_len, iphlen, iph, ipv6hlen, ipv6h); + + if (buf) + kfree(buf); + + return 0; +} + +static int +map_trans_icmp_v4v6(struct sk_buff **skb, struct icmp6hdr *icmp6h, + struct in6_addr *saddr, struct in6_addr *daddr, __be16 *payload_len, + struct map *m) +{ + __u8 otype = 0, ocode = 0; + __u8 ntype = 0, ncode = 0; + __u8 otypein = 0, ocodein = 0; + __u8 ntypein = 0, ncodein = 0; + __sum16 ocsumin = 0, ncsumin = 0; + struct icmp6hdr *icmp6inh; + __sum16 check = icmp6h->icmp6_cksum; + struct in6_addr icmpsaddr, icmpdaddr; + u8 *buf = NULL; + struct iphdr *iph = NULL; + struct ipv6hdr *ipv6h = NULL; + u8 *ptr, *datas, *datad; + int len, ipv6hlen = 0, iphlen = 0; + struct map_rule *mr = NULL; + __be32 saddr4, daddr4; + __be16 sport4, dport4; + __u8 proto; + int icmperr; + int ret; + + otype = icmp6h->icmp6_type; ocode = icmp6h->icmp6_code; + ret = map_trans_icmp_typecode_v4v6(otype, ocode, &ntype, &ncode); + icmp6h->icmp6_type = ntype; icmp6h->icmp6_code = ncode; + /* + printk(KERN_NOTICE "map_trans_icmp_v4v6: ot:%d oc:%d nt:%d nc:%d\n", + otype, ocode, ntype, ncode); + */ + if (ret) { + printk(KERN_NOTICE "map_trans_icmp_v4v6: " + "map_trans_icmp_typecode_v4v6 err.\n"); + return ret; + } + + switch (otype) { + case ICMP_DEST_UNREACH: + case ICMP_TIME_EXCEEDED: + ptr = (u8 *)icmp6h; + ptr += sizeof(struct icmphdr); + len = skb_tail_pointer(*skb) - ptr; + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + printk(KERN_NOTICE "map_trans_icmp_v4v6: " + "buf malloc failed.\n"); + return -1; + } + memcpy(buf, ptr, len); + + ipv6h = (struct ipv6hdr*)ptr; + datad = (u8 *)ipv6h; + datad += sizeof(struct ipv6hdr); + ipv6hlen = sizeof(struct ipv6hdr); + + iph = (struct iphdr *)buf; + datas = (u8 *)iph; + datas += iph->ihl * 4; + len -= iph->ihl * 4; + iphlen = iph->ihl * 4; + + if (map_get_addrport(iph, &saddr4, &daddr4, &sport4, &dport4, + &proto, &icmperr)) { + printk(KERN_NOTICE "map_trans_icmp_v4v6: " + "map_get_addrport error\n"); + return -1; + } + + mr = map_rule_find_by_ipv4addrport(m, &iph->saddr, &sport4, 1); + if (mr) { + map_gen_addr6(&icmpsaddr, iph->saddr, sport4, mr, 1); + } else { + icmpsaddr.s6_addr32[0] = m->p.br_address.s6_addr32[0]; + icmpsaddr.s6_addr32[1] = m->p.br_address.s6_addr32[1]; + if (m->p.br_address_length > 64) { + icmpsaddr.s6_addr32[2] = m->p.br_address.s6_addr32[1]; + icmpsaddr.s6_addr32[3] = iph->saddr; + } else { + icmpsaddr.s6_addr32[2] = htonl(ntohl(iph->saddr) >> 8); + icmpsaddr.s6_addr32[3] = htonl(ntohl(iph->saddr) << 24); + } + } + + mr = map_rule_find_by_ipv4addrport(m, &iph->daddr, &dport4, 1); + if (mr) { + map_gen_addr6(&icmpsaddr, iph->daddr, dport4, mr, 1); + } else { + icmpdaddr.s6_addr32[0] = m->p.br_address.s6_addr32[0]; + icmpdaddr.s6_addr32[1] = m->p.br_address.s6_addr32[1]; + if (m->p.br_address_length > 64) { + icmpdaddr.s6_addr32[2] = m->p.br_address.s6_addr32[2]; + icmpdaddr.s6_addr32[3] = iph->daddr; + } else { + icmpdaddr.s6_addr32[2] = htonl(ntohl(iph->daddr) >> 8); + icmpdaddr.s6_addr32[3] = htonl(ntohl(iph->daddr) << 24); + } + } + + skb_put(*skb, ipv6hlen - iphlen); /* XXX: */ + *payload_len = htons(ntohs(*payload_len) + ipv6hlen - iphlen); + + ipv6h->version = 6; + ipv6h->priority = 0; /* XXX: */ + ipv6h->flow_lbl[0] = 0; + ipv6h->flow_lbl[1] = 0; + ipv6h->flow_lbl[2] = 0; + ipv6h->payload_len = htons(ntohs(iph->tot_len) - iph->ihl * 4); + ipv6h->hop_limit = iph->ttl; + memcpy(&ipv6h->saddr, &icmpsaddr, sizeof(struct in6_addr)); + memcpy(&ipv6h->daddr, &icmpdaddr, sizeof(struct in6_addr)); + memcpy(datad, datas, len); + if (iph->protocol == IPPROTO_ICMP) { + ipv6h->nexthdr = IPPROTO_ICMPV6; + ptr = (u8 *)ipv6h; + ptr += sizeof(struct ipv6hdr); + icmp6inh = (struct icmp6hdr *)ptr; + otypein = icmp6inh->icmp6_type; + ocodein = icmp6inh->icmp6_code; + ocsumin = icmp6inh->icmp6_cksum; + ret = map_trans_icmp_v4v6(skb, icmp6inh, &ipv6h->saddr, + &ipv6h->daddr, &ipv6h->payload_len, m); + ntypein = icmp6inh->icmp6_type; + ncodein = icmp6inh->icmp6_code; + ncsumin = icmp6inh->icmp6_cksum; + /* + printk(KERN_NOTICE "map_trans_icmp_v4v6: " + "otin:%d ocin:%d ntin:%d ncin:%d\n", + otypein, ocodein, ntypein, ncodein); + */ + if (ret) { + printk(KERN_NOTICE "map_trans_icmp_v4v6: " + "innter func err.\n"); + kfree(buf); + return ret; + } + } else + ipv6h->nexthdr = iph->protocol; + } + + icmp6h->icmp6_cksum = map_trans_icmp_csum_v4v6(check, otype, ocode, + ntype, ncode, otypein, ocodein, ocsumin, ntypein, ncodein, + ncsumin, saddr, daddr, *payload_len, iphlen, iph, ipv6hlen, + ipv6h); + + if (buf) + kfree(buf); + + return 0; +} + +/* XXX: */ + +int +map_trans_validate_src(struct sk_buff *skb, struct map *m, __be32 *saddr4, int *fb) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct map_rule *mr; + u8 *ptr; + struct iphdr *icmp6iph; + struct tcphdr *tcph, *icmp6tcph; + struct udphdr *udph, *icmp6udph; + struct icmp6hdr *icmp6h; + struct icmphdr *icmpicmph; + __u8 proto; + __be32 saddr; + __be16 sport; + struct in6_addr addr6; + int src_prefix_length; + int err = 0; + + proto = ipv6h->nexthdr; + ptr = (u8 *)ipv6h; + ptr += sizeof(struct ipv6hdr); + if (proto == IPPROTO_FRAGMENT) { + proto = ((struct frag_hdr *)ptr)->nexthdr; + ptr += sizeof(struct frag_hdr); + } + + if (proto != IPPROTO_ICMPV6 && + proto != IPPROTO_TCP && + proto != IPPROTO_UDP) { + printk(KERN_NOTICE "map_trans_validate_src: " + "is this transed?\n"); + err = -1; + goto err; + } + + if (m->p.role == MAP_ROLE_CE && + ipv6_prefix_equal(&ipv6h->saddr, &m->p.br_address, + m->p.br_address_length)) { + if (m->p.br_address_length > 64) { + *saddr4 = ipv6h->saddr.s6_addr32[3]; + } else { + *saddr4 = htonl(ntohl(ipv6h->saddr.s6_addr32[2]) << 8); + *saddr4 |= htonl(ntohl(ipv6h->saddr.s6_addr32[3]) >> 24); + } + return 0; + } + + saddr = htonl(ntohl(ipv6h->saddr.s6_addr32[2]) << 16); + saddr |= htonl(ntohl(ipv6h->saddr.s6_addr32[3]) >> 16); + + switch (proto) { + case IPPROTO_ICMPV6: + icmp6h = (struct icmp6hdr *)ptr; + switch (icmp6h->icmp6_type) { + case ICMPV6_DEST_UNREACH: + case ICMPV6_PKT_TOOBIG: + ptr = (u8 *)icmp6h; + ptr += sizeof(struct icmp6hdr); + icmp6iph = (struct iphdr*)ptr; + saddr = icmp6iph->daddr; + ptr += icmp6iph->ihl * 4; + switch (icmp6iph->protocol) { + case IPPROTO_TCP: + icmp6tcph = (struct tcphdr *)ptr; + sport = icmp6tcph->dest; + break; + case IPPROTO_UDP: + icmp6udph = (struct udphdr *)ptr; + sport = icmp6udph->dest; + break; + case IPPROTO_ICMP: + icmpicmph = (struct icmphdr *)ptr; + sport = icmpicmph->un.echo.id; + break; + default: + printk(KERN_NOTICE "map_trans_validate_src: " + "unknown proto transed in icmp error.\n"); + err = -1; + goto err; + } + break; + default: + sport = icmp6h->icmp6_dataun.u_echo.identifier; + break; + } + break; + case IPPROTO_TCP: + tcph = (struct tcphdr *)ptr; + sport = tcph->source; + break; + case IPPROTO_UDP: + udph = (struct udphdr *)ptr; + sport = udph->source; + break; + default: + printk(KERN_NOTICE "map_trans_validate_src: " + "unknown encaped.\n"); + err = -1; + goto err; + } + + mr = map_rule_find_by_ipv6addr(m, &ipv6h->saddr); + if (!mr) { + if (m->p.role == MAP_ROLE_BR) { + *fb = 1; + goto fallback; + } else { + err = -1; + goto err; + } + } + + if (map_gen_addr6(&addr6, saddr, sport, mr, 1)) { + if (m->p.role == MAP_ROLE_BR) { + *fb = 1; + goto fallback; + } else { + err = -1; + goto err; + } + } + + if (mr->p.ipv4_prefix_length + mr->p.ea_length < 32) + src_prefix_length = 80 + mr->p.ipv4_prefix_length + + mr->p.ea_length; + else + src_prefix_length = 128; + if (!ipv6_prefix_equal(&addr6, &ipv6h->saddr, src_prefix_length)) { + if (m->p.role == MAP_ROLE_BR) { + *fb = 1; + goto fallback; + } else { + printk(KERN_NOTICE "map_trans_validate_src: " + "validation failed.\n"); + err = -1; + goto err_icmpv6_send; + } + } + +fallback: + if (*fb) { + if (m->p.br_address_length > 64) { + *saddr4 = ipv6h->saddr.s6_addr32[3]; + } else { + *saddr4 = htonl(ntohl(ipv6h->saddr.s6_addr32[2]) << 8); + *saddr4 |= htonl(ntohl(ipv6h->saddr.s6_addr32[3]) >> 24); + } + } else { + *saddr4 = htonl(ntohl(ipv6h->saddr.s6_addr32[2]) << 16); + *saddr4 |= htonl(ntohl(ipv6h->saddr.s6_addr32[3]) >> 16); + } + + return 0; + +err_icmpv6_send: + printk(KERN_NOTICE "map_trans_validate_src: " + "icmpv6_send(skb, ICMPV6_DEST_UNREACH, " + "5 /* Source address failed ingress/egress policy */, 0);\n"); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, + 5 /* Source address failed ingress/egress policy */, 0); +err: + map_debug_print_skb("map_trans_validate_src", skb); + return err; +} + +int +map_trans_validate_dst(struct sk_buff *skb, struct map *m, __be32 *daddr4) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + u8 *ptr; + struct ipv6hdr *icmp6ipv6h; + struct tcphdr *tcph, *icmp6tcph; + struct udphdr *udph, *icmp6udph; + struct icmp6hdr *icmp6h, *icmp6icmp6h; + __u8 proto, nexthdr; + __be32 daddr; + __be16 dport; + struct in6_addr addr6; + int err = 0; + + proto = ipv6h->nexthdr; + ptr = (u8 *)ipv6h; + ptr += sizeof(struct ipv6hdr); + if (proto == IPPROTO_FRAGMENT) { + proto = ((struct frag_hdr *)ptr)->nexthdr; + ptr += sizeof(struct frag_hdr); + } + + if (proto != IPPROTO_ICMPV6 && + proto != IPPROTO_TCP && + proto != IPPROTO_UDP) { + printk(KERN_NOTICE "map_trans_validate_dst: " + "is this transed?\n"); + err = -1; + goto err; + } + + if (!ipv6_prefix_equal(&ipv6h->daddr, &m->map_ipv6_address, + m->map_ipv6_address_length)) { + printk(KERN_NOTICE "map_trans_validate_dst: " + "not match my address.\n"); + err = -1; + goto err; + } + + if (m->p.role == MAP_ROLE_BR || + (m->p.role == MAP_ROLE_CE && !m->bmr)) { + if (m->p.br_address_length > 64) { + *daddr4 = ipv6h->daddr.s6_addr32[3]; + } else { + *daddr4 = htonl(ntohl(ipv6h->daddr.s6_addr32[2]) << 8); + *daddr4 |= htonl(ntohl(ipv6h->daddr.s6_addr32[3]) >> 24); + } + return 0; + } + + if (!m->bmr) { + printk(KERN_NOTICE "map_trans_validate_dst: m->bmr is null.\n"); + err = -1; + goto err; + } + + daddr = htonl(ntohl(ipv6h->daddr.s6_addr32[2]) << 16); + daddr |= htonl(ntohl(ipv6h->daddr.s6_addr32[3]) >> 16); + + switch (proto) { + case IPPROTO_ICMPV6: + icmp6h = (struct icmp6hdr *)ptr; + switch (icmp6h->icmp6_type) { + case ICMPV6_DEST_UNREACH: + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + case ICMPV6_PARAMPROB: + ptr = (u8 *)icmp6h; + ptr += sizeof(struct icmp6hdr); + icmp6ipv6h = (struct ipv6hdr *)ptr; + daddr = htonl(ntohl(icmp6ipv6h->saddr.s6_addr32[2]) << 16); + daddr |= htonl(ntohl(icmp6ipv6h->saddr.s6_addr32[3]) >> 16); + nexthdr = icmp6ipv6h->nexthdr; + ptr += sizeof(struct ipv6hdr); + if (nexthdr == IPPROTO_FRAGMENT) { + nexthdr = ((struct frag_hdr *)ptr)->nexthdr; + ptr += sizeof(struct frag_hdr); + } + switch (nexthdr) { + case IPPROTO_TCP: + icmp6tcph = (struct tcphdr *)ptr; + dport = icmp6tcph->source; + break; + case IPPROTO_UDP: + icmp6udph = (struct udphdr *)ptr; + dport = icmp6udph->source; + break; + case IPPROTO_ICMPV6: + icmp6icmp6h = (struct icmp6hdr *)ptr; + dport = + icmp6icmp6h->icmp6_dataun.u_echo.identifier; + break; + default: + printk(KERN_NOTICE "map_trans_validate_dst: " + "unknown proto transed in icmp error.\n"); + err = -1; + goto err; + } + break; + default: + dport = icmp6h->icmp6_dataun.u_echo.identifier; + break; + } + break; + case IPPROTO_TCP: + tcph = (struct tcphdr *)ptr; + dport = tcph->dest; + break; + case IPPROTO_UDP: + udph = (struct udphdr *)ptr; + dport = udph->dest; + break; + default: + printk(KERN_NOTICE "map_trans_validate_dst: " + "unknown encaped.\n"); + err = -1; + goto err; + } + + read_lock(&m->rule_lock); + if (!m->bmr) { + read_unlock(&m->rule_lock); + printk(KERN_NOTICE "map_trans_validate_dst: bmr is null..\n"); + err = -1; + goto err; + } + if (map_gen_addr6(&addr6, daddr, dport, m->bmr, 1)) { + read_unlock(&m->rule_lock); + printk(KERN_NOTICE "map_trans_validate_dst: " + "map_gen_addr6 failed.\n"); + err = -1; + goto err; + } + read_unlock(&m->rule_lock); + + if (!ipv6_prefix_equal(&addr6, &ipv6h->daddr, m->map_ipv6_address_length)) { + printk(KERN_NOTICE "map_trans_validate_dst: " + "validation failed.\n"); + printk(KERN_NOTICE "map_trans_validate_dst: " + "addr6 = %08x%08x%08x%08x\n", + ntohl(addr6.s6_addr32[0]), + ntohl(addr6.s6_addr32[1]), + ntohl(addr6.s6_addr32[2]), + ntohl(addr6.s6_addr32[3])); + printk(KERN_NOTICE "map_trans_validate_dst: " + "ipv6h->daddr = %08x%08x%08x%08x\n", + ntohl(ipv6h->daddr.s6_addr32[0]), + ntohl(ipv6h->daddr.s6_addr32[1]), + ntohl(ipv6h->daddr.s6_addr32[2]), + ntohl(ipv6h->daddr.s6_addr32[3])); + printk(KERN_NOTICE "map_trans_validate_dst: " + "daddr = %d.%d.%d.%d dport = %d(%04x)\n", + ((ntohl(daddr) >> 24) & 0xff), + ((ntohl(daddr) >> 16) & 0xff), + ((ntohl(daddr) >> 8) & 0xff), + (ntohl(daddr) & 0xff), + ntohs(dport), ntohs(dport)); + err = -1; + goto err_icmpv6_send; + } + + *daddr4 = htonl(ntohl(ipv6h->daddr.s6_addr32[2]) << 16); + *daddr4 |= htonl(ntohl(ipv6h->daddr.s6_addr32[3]) >> 16); + + return 0; + +err_icmpv6_send: + printk(KERN_NOTICE "map_trans_validate_dst: " + "icmpv6_send(skb, ICMPV6_DEST_UNREACH, " + "ICMPV6_ADDR_UNREACH, 0);\n"); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); +err: + map_debug_print_skb("map_trans_validate_dst", skb); + return err; +} + +int +map_trans_forward_v6v4(struct sk_buff *skb, struct map *m, __be32 *saddr4, + __be32 *daddr4, int fb, int frag) +{ + struct ipv6hdr orig_ipv6h = {}, *ipv6h; + struct frag_hdr orig_fragh = {}, *fragh; + int hsize; + __u8 nexthdr; + struct iphdr *iph; + __be32 *saddrp = NULL; + __be16 *sportp = NULL; + __sum16 *checkp = NULL; + struct in6_addr *saddr6; + u8 *ptr; + int err = 0; + + ipv6h = ipv6_hdr(skb); + + memcpy(&orig_ipv6h, ipv6h, sizeof(orig_ipv6h)); + saddr6 = &orig_ipv6h.saddr; + hsize = sizeof(orig_ipv6h); + nexthdr = orig_ipv6h.nexthdr; + if (orig_ipv6h.nexthdr == IPPROTO_FRAGMENT) { + ptr = (u8 *)ipv6h; + ptr += sizeof(*ipv6h); + fragh = (struct frag_hdr *)ptr; + memcpy(&orig_fragh, fragh, sizeof(orig_fragh)); + hsize += sizeof(orig_fragh); + nexthdr = orig_fragh.nexthdr; + } + + if (nexthdr != IPPROTO_ICMPV6 && + nexthdr != IPPROTO_TCP && + nexthdr != IPPROTO_UDP) { + printk(KERN_NOTICE "map_trans_forward_v6v4: " + "this packet is not transed?\n"); + err = -1; + goto err; + } + + skb_dst_drop(skb); + skb_pull(skb, hsize); + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + iph = ip_hdr(skb); + + iph->version = 4; + iph->ihl = 5; + iph->tos = 0; + if (orig_ipv6h.nexthdr == IPPROTO_FRAGMENT) { + iph->tot_len = htons(ntohs(orig_ipv6h.payload_len) - 8 + + sizeof(struct iphdr)); + iph->id = htons(ntohl(orig_fragh.identification) & 0xffff); + iph->frag_off = htons(ntohs(orig_fragh.frag_off) & 0xfff8); + if (ntohs(orig_fragh.frag_off) & IP6_MF) + iph->frag_off |= htons(IP_MF); + } else { + iph->tot_len = htons(ntohs(orig_ipv6h.payload_len) + + sizeof(struct iphdr)); + iph->id = 0; + iph->frag_off = frag ? 0 : htons(IP_DF); + } + if (nexthdr == IPPROTO_ICMPV6) { + __be16 payload_len = orig_ipv6h.payload_len; + iph->protocol = IPPROTO_ICMP; + ptr = (u8 *)iph; + ptr += iph->ihl * 4; + err = map_trans_icmp_v6v4(&skb, (struct icmphdr *)ptr, + &orig_ipv6h.saddr, &orig_ipv6h.daddr, &payload_len, m); + if (err) + goto err; + iph->tot_len = htons(ntohs(iph->tot_len) - + (ntohs(orig_ipv6h.payload_len) - ntohs(payload_len))); + } else + iph->protocol = nexthdr; + iph->ttl = orig_ipv6h.hop_limit; + iph->saddr = *saddr4; + iph->daddr = *daddr4; + + if (m->p.role == MAP_ROLE_BR && fb) { + err = map_napt(iph, 0, m, &saddrp, &sportp, &checkp, saddr6, fb); + if (err) + goto err; + /* NAPT Hairpinning */ + if (map_napt_hairpin(skb, m, saddrp, sportp, saddr6, fb)) + goto out; + } else + err = map_napt(iph, 1, m, &saddrp, &sportp, &checkp, NULL, fb); + if (err) { + printk(KERN_NOTICE "map_trans_forward_v6v4: " + "saddr:%d.%d.%d.%d daddr:%d.%d.%d.%d\n", + ((ntohl(iph->saddr) >> 24) & 0xff), + ((ntohl(iph->saddr) >> 16) & 0xff), + ((ntohl(iph->saddr) >> 8) & 0xff), + ((ntohl(iph->saddr)) & 0xff), + ((ntohl(iph->daddr) >> 24) & 0xff), + ((ntohl(iph->daddr) >> 16) & 0xff), + ((ntohl(iph->daddr) >> 8) & 0xff), + ((ntohl(iph->daddr)) & 0xff)); + goto err; + } + + if (iph->protocol != IPPROTO_ICMP) + *checkp = map_trans_update_csum_v6v4(*checkp, + &orig_ipv6h.saddr, &orig_ipv6h.daddr, *saddr4, *daddr4); + + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + skb->dev = m->dev; + skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); + skb_dst_drop(skb); + nf_reset(skb); + + netif_rx(skb); + + return 0; + +err: + map_debug_print_skb("map_trans_forward_v6v4", skb); +out: + return err; +} + +int +map_trans_forward_v4v6(struct sk_buff *skb, struct map *m, struct map_rule *mr, int fb, int df) +{ + struct flowi6 fl6; + struct in6_addr saddr6, daddr6; + struct iphdr orig_iph; + unsigned int max_headroom; + struct sk_buff *oskb; + struct dst_entry *dst; + struct net *net = dev_net(m->dev); + struct ipv6hdr *ipv6h; + int pkt_len; + struct iphdr *iph; + __be32 *daddrp = NULL; + __be16 *dportp = NULL; + __sum16 *checkp = NULL; + struct icmphdr *icmph; + u8 *ptr; + int err = 0; + + iph = ip_hdr(skb); + if (iph->protocol == IPPROTO_ICMP) { + ptr = (u8 *)iph; + ptr += iph->ihl * 4; + icmph = (struct icmphdr *)ptr; + if (((icmph->type == ICMP_DEST_UNREACH) || + (icmph->type == ICMP_TIME_EXCEEDED)) && + (skb_tailroom(skb) < sizeof(struct ipv6hdr))) { + oskb = skb; + skb = skb_copy_expand(skb, LL_MAX_HEADER, + sizeof(struct ipv6hdr), GFP_ATOMIC); + kfree_skb(oskb); + } + } + + iph = ip_hdr(skb); + + err = map_napt(iph, 0, m, &daddrp, &dportp, &checkp, NULL, 0); + if (err) + goto err; + /* NAPT Hairpinning */ + if (map_napt_hairpin(skb, m, daddrp, dportp, NULL, 0)) + goto out; + + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + saddr6.s6_addr32[0] = m->map_ipv6_address.s6_addr32[0]; + saddr6.s6_addr32[1] = m->map_ipv6_address.s6_addr32[1]; + /* XXXXX: */ + if (m->p.role == MAP_ROLE_BR || !m->bmr || + (m->psid_length <= 0 && m->p.napt_always == MAP_NAPT_ALWAYS_F)) { + if (m->psid_length <= 0 && m->p.napt_always == MAP_NAPT_ALWAYS_F) { + saddr6.s6_addr32[2] = htonl(ntohl(iph->saddr) >> 16); + saddr6.s6_addr32[3] = htonl(ntohl(iph->saddr) << 16); + } else if (m->p.br_address_length > 64) { + saddr6.s6_addr32[2] = m->map_ipv6_address.s6_addr32[2]; + saddr6.s6_addr32[3] = iph->saddr; + } else { + saddr6.s6_addr32[2] = htonl(ntohl(iph->saddr) >> 8); + saddr6.s6_addr32[3] = htonl(ntohl(iph->saddr) << 24); + } + } else { + saddr6.s6_addr32[2] = m->map_ipv6_address.s6_addr32[2]; + saddr6.s6_addr32[3] = m->map_ipv6_address.s6_addr32[3]; + } + + if (mr) { + map_gen_addr6(&daddr6, iph->daddr, *dportp, mr, 1); + } else { + daddr6.s6_addr32[0] = m->p.br_address.s6_addr32[0]; + daddr6.s6_addr32[1] = m->p.br_address.s6_addr32[1]; + if (m->p.br_address_length > 64) { + daddr6.s6_addr32[2] = m->p.br_address.s6_addr32[2]; + daddr6.s6_addr32[3] = iph->daddr; + } else { + daddr6.s6_addr32[2] = htonl(ntohl(iph->daddr) >> 8); + daddr6.s6_addr32[3] = htonl(ntohl(iph->daddr) << 24); + } + } + + if (m->p.role == MAP_ROLE_BR && fb) { + err = map_napt(iph, 1, m, &daddrp, &dportp, &checkp, &daddr6, fb); + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + if (err) + goto err; + } + + if (iph->protocol != IPPROTO_ICMP) + *checkp = map_trans_update_csum_v4v6(*checkp, iph->saddr, + iph->daddr, &saddr6, &daddr6); + + memset(&fl6, 0, sizeof(fl6)); + fl6.saddr = saddr6; + fl6.daddr = daddr6; + fl6.flowi6_oif = m->dev->ifindex; + fl6.flowlabel = 0; + + dst = ip6_route_output(net, NULL, &fl6); + // dst_metric_set(dst, RTAX_MTU, 1280); + if (dst_mtu(dst) > m->p.ipv6_fragment_size) + dst_metric_set(dst, RTAX_MTU, m->p.ipv6_fragment_size); + + max_headroom = sizeof(struct ipv6hdr) + sizeof(struct frag_hdr) - sizeof(struct iphdr) + 20; + + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { + struct sk_buff *new_skb; + + if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) + goto tx_err_dst_release; + + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + kfree_skb(skb); + skb = new_skb; + } + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + memcpy(&orig_iph, iph, sizeof(orig_iph)); + skb_pull(skb, orig_iph.ihl * 4); + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IPV6); + ipv6h = ipv6_hdr(skb); + + ipv6h->version = 6; + ipv6h->priority = 0; /* XXX: */ + ipv6h->flow_lbl[0] = 0; + ipv6h->flow_lbl[1] = 0; + ipv6h->flow_lbl[2] = 0; + ipv6h->payload_len = htons(ntohs(orig_iph.tot_len) - orig_iph.ihl * 4); + ipv6h->hop_limit = orig_iph.ttl; + memcpy(&ipv6h->saddr, &fl6.saddr, sizeof(struct in6_addr)); + memcpy(&ipv6h->daddr, &fl6.daddr, sizeof(struct in6_addr)); + if (orig_iph.protocol == IPPROTO_ICMP) { + ipv6h->nexthdr = IPPROTO_ICMPV6; + ptr = (u8 *)ipv6h; + ptr += sizeof(*ipv6h); + err = map_trans_icmp_v4v6(&skb, (struct icmp6hdr *)ptr, + &ipv6h->saddr, &ipv6h->daddr, &ipv6h->payload_len, m); + if (err) + goto tx_err_dst_release; + } else + ipv6h->nexthdr = orig_iph.protocol; + + pkt_len = skb->len; + + skb->local_df = 1; + + if (df) + err = ip6_local_out(skb); + else + err = ip6_fragment(skb, ip6_local_out); + + return 0; + +tx_err_dst_release: + printk(KERN_NOTICE "map_trans_forward_v4v6: tx_err_dst_release:\n"); + // dst_release(dst); /* XXX: */ +err: + map_debug_print_skb("map_trans_forward_v4v6", skb); +out: + return err; +} diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 444f08b..d403a9d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1456,6 +1456,7 @@ out: dst_free(&rt->dst); return err; } +EXPORT_SYMBOL(ip6_route_add); static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { @@ -1485,7 +1486,7 @@ int ip6_del_rt(struct rt6_info *rt) return __ip6_del_rt(rt, &info); } -static int ip6_route_del(struct fib6_config *cfg) +int ip6_route_del(struct fib6_config *cfg) { struct fib6_table *table; struct fib6_node *fn; @@ -1523,6 +1524,7 @@ static int ip6_route_del(struct fib6_config *cfg) return err; } +EXPORT_SYMBOL(ip6_route_del); /* * Handle redirects @@ -1786,6 +1788,7 @@ void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *sad rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); } +EXPORT_SYMBOL(rt6_pmtu_discovery); /* * Misc support functions