Bedrock Computer Technologies, LLC v. Softlayer Technologies, Inc. et al
Filing
845
MOTION for Judgment as a Matter of Law Regarding Invalidity (Renewed) by Yahoo! Inc.. (Attachments: #1 Text of Proposed Order, #2 Exhibit 1 - Declaration of Alexey Kuznetsov - DX-48, #3 Exhibit 2 - Source Code - key.c - DX-37, #4 Exhibit 3 - U.S. Patent 5,121,495 - DX-65, #5 Exhibit 4 - Application Approval for Filing - DX-57, #6 Exhibit 5 - U. S. Patent 6,119,214 - DX101, #7 Exhibit 6 - U.S. Patent 4,996,663 - DX-64, #8 Exhibit 7 - Donald Knuth, Sorting and Searching, vol. 3, of The Art of Computer Programming - DX-98, #9 Exhibit 8 - Kruse, "Data Structures and Program Design" - DX-108, #10 Exhibit 9 - Daniel F. Stubbs and Neil W. Webre, Data Structures with Abstract Data Types and Pascal - DX-118, #11 Exhibit 10 - Kuznetsov email to Day re contact request - DX-436, #12 Exhibit 11 - Absher email to Kuznetsov re Linux route.c question - DX-440, #13 Exhibit 12 - Kuznetsov email to Absher re Linux route.c question - DX-441)(Doan, Jennifer)
Exhibit 1
DEF00009285
From a06606bdd748dfeba6cdc1100360d3035663e2d5 Mon Sep 17 00:00:00 2001
From: davem
Date: Fri, 17 Nov 1995 01:02:00 +0000
Subject: [PATCH 003/103] Merge to 1.3.42
--net/ipv4/route.c | 1727 +++++++++++++++++++++++++++++++++++++++++++---------1 files changed, 1380 insertions(+), 347 deletions(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6483db0..d14fead 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -35,6 +35,8 @@
*
Alan Cox
:
Aligned routing errors more closely with BSD
*
our system is still very different.
*
Alan Cox
:
Faster /proc handling
+ *
Alexey Kuznetsov :
Massive rework to support tree based routing,
+ *
routing caches and better behaviour.
*
*
This program is free software; you can redistribute it and/or
*
modify it under the terms of the GNU General Public License
@@ -42,8 +44,10 @@
*
2 of the License, or (at your option) any later version.
*/
+#include
#include
#include
+#include
#include
#include
#include
@@ -65,102 +69,246 @@
#include
/*
- *
The routing table list
+ * Forwarding Information Base definitions.
*/
-static struct rtable *rt_base = NULL;
-unsigned long rt_stamp = 1;
/* Routing table version stamp for caches
( 0 is 'unset' ) */
+struct fib_node
+{
+
struct fib_node
*fib_next;
+
__u32
fib_dst;
+
unsigned long
fib_use;
+
struct fib_info
*fib_info;
+
short
fib_metric;
+
unsigned char
fib_tos;
+};
/*
- *
Pointer to the loopback route
DEF00009286
RHT-BR00029911
+ * This structure contains data shared by many of routes.
+ */
+
+struct fib_info
+{
+
struct fib_info
*fib_next;
+
struct fib_info
*fib_prev;
+
__u32
fib_gateway;
+
struct device
*fib_dev;
+
int
fib_refcnt;
+
unsigned long
fib_window;
+
unsigned short
fib_flags;
+
unsigned short
fib_mtu;
+
unsigned short
fib_irtt;
+};
+
+struct fib_zone
+{
+
struct fib_zone
*fz_next;
+
struct fib_node
**fz_hash_table;
+
struct fib_node
*fz_list;
+
int
fz_nent;
+
int
fz_logmask;
+
__u32
fz_mask;
+};
+
+static struct fib_zone
*fib_zones[33];
+static struct fib_zone
*fib_zone_list;
+static struct fib_node
*fib_loopback = NULL;
+static struct fib_info
*fib_info_list;
+
+/*
+ * Backlogging.
*/
-static struct rtable *rt_loopback = NULL;
+
+#define RT_BH_REDIRECT
0
+#define RT_BH_GARBAGE_COLLECT
1
+#define RT_BH_FREE
2
+
+struct rt_req
+{
+
struct rt_req * rtr_next;
+
struct device *dev;
+
__u32 dst;
+
__u32 gw;
+
unsigned char tos;
+};
+
+int
ip_rt_lock;
+unsigned
ip_rt_bh_mask;
+static struct rt_req
*rt_backlog;
/*
- *
Remove a routing table entry.
+ * Route cache.
DEF00009287
RHT-BR00029912
*/
-static int rt_del(__u32 dst, __u32 mask,
char *devname, __u32 gtw, short rt_flags, short metric)
+struct rtable
*ip_rt_hash_table[RT_HASH_DIVISOR];
+static int
rt_cache_size;
+static struct rtable
*rt_free_queue;
+struct wait_queue
*rt_wait;
+
+static void rt_kick_backlog(void);
+static void rt_cache_add(unsigned hash, struct rtable * rth);
+static void rt_cache_flush(void);
+static void rt_garbage_collect_1(void);
+
+/*
+ * Evaluate mask length.
+ */
+
+static __inline__ int rt_logmask(__u32 mask)
{
struct rtable *r, **rp;
unsigned long flags;
int found=0;
+
if (!(mask = ntohl(mask)))
+
return 32;
+
return ffz(~mask);
+}
rp = &rt_base;
/*
*
This must be done with interrupts off because we could take
*
an ICMP_REDIRECT.
*/
save_flags(flags);
cli();
while((r = *rp) != NULL)
+/*
+ * Create mask from length.
+ */
+
+static __inline__ __u32 rt_mask(int logmask)
+{
+
if (logmask >= 32)
+
return 0;
+
return htonl(~((1<>logmask);
+}
+
+/*
+ * Free FIB node.
+ */
DEF00009288
RHT-BR00029913
+
+static void fib_free_node(struct fib_node * f)
+{
+
struct fib_info * fi = f->fib_info;
+
if (!--fi->fib_refcnt)
{
/*
*
Make sure the destination and netmask match.
*
metric, gateway and device are also checked
*
if they were specified.
*/
if (r->rt_dst != dst ||
(mask && r->rt_mask != mask) ||
(gtw && r->rt_gateway != gtw) ||
(metric >= 0 && r->rt_metric != metric) ||
(devname && strcmp((r->rt_dev)->name,devname) != 0) )
+#if RT_CACHE_DEBUG >= 2
+
printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway,
fi->fib_dev->name);
+#endif
+
if (fi->fib_next)
+
fi->fib_next->fib_prev = fi->fib_prev;
+
if (fi->fib_prev)
+
fi->fib_prev->fib_next = fi->fib_next;
+
if (fi == fib_info_list)
+
fib_info_list = fi->fib_next;
+
}
+
kfree_s(f, sizeof(struct fib_node));
+}
+
+/*
+ * Find gateway route by address.
+ */
+
+static struct fib_node * fib_lookup_gateway(__u32 dst)
+{
+
struct fib_zone * fz;
+
struct fib_node * f;
+
+
for (fz = fib_zone_list; fz; fz = fz->fz_next)
+
{
+
if (fz->fz_hash_table)
+
f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
+
else
+
f = fz->fz_list;
+
+
for ( ; f; f = f->fib_next)
{
rp = &r->rt_next;
continue;
+
if ((dst ^ f->fib_dst) & fz->fz_mask)
+
continue;
+
if (f->fib_info->fib_flags & RTF_GATEWAY)
+
return NULL;
+
return f;
}
*rp = r->rt_next;
DEF00009289
RHT-BR00029914
/*
*
If we delete the loopback route update its pointer.
*/
if (rt_loopback == r)
rt_loopback = NULL;
ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, rt_flags, metric,
r->rt_dev->name);
kfree_s(r, sizeof(struct rtable));
found=1;
}
rt_stamp++;
/* New table revision */
restore_flags(flags);
if(found)
return 0;
return -ESRCH;
+
}
+
return NULL;
}
+/*
+ * Find local route by address.
+ * FIXME: I use "longest match" principle. If destination
+ *
has some non-local route, I'll not search shorter matches.
+ *
It's possible, I'm wrong, but I wanted to prevent following
+ *
situation:
+ *
route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
+ *
route add 193.233.7.0
netmask 255.255.255.0 eth1
+ *
(Two ethernets connected by serial line, one is small and other is
large)
+ *
Host 193.233.7.129 is locally unreachable,
+ *
but old (<=1.3.37) code will send packets destined for it to eth1.
+ *
+ */
+
+static struct fib_node * fib_lookup_local(__u32 dst)
+{
+
struct fib_zone * fz;
+
struct fib_node * f;
+
+
for (fz = fib_zone_list; fz; fz = fz->fz_next)
+
{
+
int longest_match_found = 0;
+
+
if (fz->fz_hash_table)
+
f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
+
else
+
f = fz->fz_list;
+
+
for ( ; f; f = f->fib_next)
+
{
+
if ((dst ^ f->fib_dst) & fz->fz_mask)
+
continue;
+
if (!(f->fib_info->fib_flags & RTF_GATEWAY))
DEF00009290
RHT-BR00029915
+
+
+
+
+
+
+
+}
return f;
longest_match_found = 1;
}
if (longest_match_found)
return NULL;
}
return NULL;
/*
- *
Remove all routing table entries for a device. This is called when
- *
a device is downed.
+ * Main lookup routine.
+ *
IMPORTANT NOTE: this algorithm has small difference from <=1.3.37
visible
+ *
by user. It doesn't route non-CIDR broadcasts by default.
+ *
+ *
F.e.
+ *
ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast
193.233.7.255
+ *
is valid, but if you really are not able (not allowed, do not want) to
+ *
use CIDR compliant broadcast 193.233.7.127, you should add host route:
+ *
route add -host 193.233.7.255 eth0
*/
-void ip_rt_flush(struct device *dev)
+
+static struct fib_node * fib_lookup(__u32 dst)
{
struct rtable *r;
struct rtable **rp;
unsigned long flags;
+
struct fib_zone * fz;
+
struct fib_node * f;
+
+
+
+
+
+
+
+
+
+
+
+
-
rp = &rt_base;
save_flags(flags);
cli();
while ((r = *rp) != NULL) {
if (r->rt_dev != dev) {
rp = &r->rt_next;
continue;
for (fz = fib_zone_list; fz; fz = fz->fz_next)
{
if (fz->fz_hash_table)
f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
else
f = fz->fz_list;
for ( ; f; f = f->fib_next)
{
if ((dst ^ f->fib_dst) & fz->fz_mask)
continue;
return f;
}
*rp = r->rt_next;
if (rt_loopback == r)
DEF00009291
RHT-BR00029916
rt_loopback = NULL;
kfree_s(r, sizeof(struct rtable));
}
rt_stamp++;
/* New table revision */
restore_flags(flags);
+
}
+
return NULL;
+}
+
+static __inline__ struct device * get_gw_dev(__u32 gw)
+{
+
struct fib_node * f;
+
f = fib_lookup_gateway(gw);
+
if (f)
+
return f->fib_info->fib_dev;
+
return NULL;
}
/*
@@ -200,60 +348,181 @@ static __u32 guess_mask(__u32 dst, struct device *
dev)
/*
- *
+ *
*/
Find the route entry through which our gateway will be reached
Check if a mask is acceptable.
-static inline struct device * get_gw_dev(__u32 gw)
+static inline int bad_mask(__u32 mask, __u32 addr)
{
struct rtable * rt;
+
if (addr & (mask = ~mask))
+
return 1;
+
mask = ntohl(mask);
+
if (mask & (mask+1))
+
return 1;
+
return 0;
+}
+
+
+static int fib_del_list(struct fib_node **fp, __u32 dst,
+
struct device * dev, __u32 gtw, short flags, short metric, __u32
mask)
+{
+
struct fib_node *f;
+
int found=0;
+
+
+
+
for (rt = rt_base ; ; rt = rt->rt_next)
while((f = *fp) != NULL)
{
if (!rt)
return NULL;
if ((gw ^ rt->rt_dst) & rt->rt_mask)
struct fib_info * fi = f->fib_info;
/*
DEF00009292
RHT-BR00029917
+
+
+
+
+
+
+
+
+
+
*
Make sure the destination and netmask match.
*
metric, gateway and device are also checked
*
if they were specified.
*/
if (f->fib_dst != dst ||
(gtw && fi->fib_gateway != gtw) ||
(metric >= 0 && f->fib_metric != metric) ||
(dev && fi->fib_dev != dev) )
{
fp = &f->fib_next;
continue;
/*
*
Gateways behind gateways are a no-no
+
}
+
cli();
+
*fp = f->fib_next;
+
if (fib_loopback == f)
+
fib_loopback = NULL;
+
sti();
+
ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi>fib_dev->name);
+
fib_free_node(f);
+
found++;
+
}
+
return found;
+}
+
+static __inline__ int fib_del_1(__u32 dst, __u32 mask,
+
struct device * dev, __u32 gtw, short flags, short metric)
+{
+
struct fib_node **fp;
+
struct fib_zone *fz;
+
int found=0;
+
+
if (!mask)
+
{
+
for (fz=fib_zone_list; fz; fz = fz->fz_next)
+
{
+
int tmp;
+
if (fz->fz_hash_table)
+
fp = &fz->fz_hash_table[fz_hash_code(dst, fz>fz_logmask)];
+
else
+
fp = &fz->fz_list;
+
+
tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
+
fz->fz_nent -= tmp;
+
found += tmp;
+
}
+
}
+
else
+
{
+
if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
+
{
+
if (fz->fz_hash_table)
+
fp = &fz->fz_hash_table[fz_hash_code(dst, fz>fz_logmask)];
DEF00009293
RHT-BR00029918
+
else
+
fp = &fz->fz_list;
+
+
found = fib_del_list(fp, dst, dev, gtw, flags, metric,
mask);
+
fz->fz_nent -= found;
+
}
+
}
+
+
if (found)
+
{
+
rt_cache_flush();
+
return 0;
+
}
+
return -ESRCH;
+}
+
+
+static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
+
unsigned short flags, unsigned short mss,
+
unsigned long window, unsigned short irtt)
+{
+
struct fib_info * fi;
+
+
if (!(flags & RTF_MSS))
+
{
+
mss = dev->mtu;
+#ifdef CONFIG_NO_PATH_MTU_DISCOVERY
+
/*
+
*
If MTU was not specified, use default.
+
*
If you want to increase MTU for some net (local subnet)
+
*
use "route add .... mss xxx".
+
*
+
*
The MTU isnt currently always used and computed as it
+
*
should be as far as I can tell. [Still verifying this is
right]
*/
if (rt->rt_flags & RTF_GATEWAY)
return NULL;
return rt->rt_dev;
+
if ((flags & RTF_GATEWAY) && mss > 576)
+
mss = 576;
+#endif
}
+
if (!(flags & RTF_WINDOW))
+
window = 0;
+
if (!(flags & RTF_IRTT))
+
irtt = 0;
+
+
for (fi=fib_info_list; fi; fi = fi->fib_next)
+
{
+
if (fi->fib_gateway != gw ||
+
fi->fib_dev != dev ||
+
fi->fib_flags != flags ||
+
fi->fib_mtu != mss ||
+
fi->fib_window != window ||
DEF00009294
RHT-BR00029919
+
fi->fib_irtt != irtt)
+
continue;
+
fi->fib_refcnt++;
+#if RT_CACHE_DEBUG >= 2
+
printk("fib_create_info: fi %08x/%s is duplicate\n", fi>fib_gateway, fi->fib_dev->name);
+#endif
+
return fi;
+
}
+
fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
+
if (!fi)
+
return NULL;
+
memset(fi, 0, sizeof(struct fib_info));
+
fi->fib_flags = flags;
+
fi->fib_dev = dev;
+
fi->fib_gateway = gw;
+
fi->fib_mtu = mss;
+
fi->fib_window = window;
+
fi->fib_refcnt++;
+
fi->fib_next = fib_info_list;
+
fi->fib_prev = NULL;
+
if (fib_info_list)
+
fib_info_list->fib_prev = fi;
+
fib_info_list = fi;
+#if RT_CACHE_DEBUG >= 2
+
printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi>fib_dev->name);
+#endif
+
return fi;
}
-/*
- *
Rewrote rt_add(), as the old one was weird - Linus
- *
- *
This routine is used to update the IP routing table, either
- *
from the kernel (ICMP_REDIRECT) or via an ioctl call issued
- *
by the superuser.
- */
-void ip_rt_add(short flags, __u32 dst, __u32 mask,
__u32 gw, struct device *dev, unsigned short mtu,
+
+static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
+
__u32 gw, struct device *dev, unsigned short mss,
unsigned long window, unsigned short irtt, short metric)
{
struct rtable *r, *rt;
struct rtable **rp;
unsigned long cpuflags;
int duplicate = 0;
+
struct fib_node *f, *f1;
+
struct fib_node **fp;
+
struct fib_node **dup_fp = NULL;
+
struct fib_zone * fz;
+
struct fib_info * fi;
+
int logmask;
DEF00009295
RHT-BR00029920
-
/*
*
*/
-
if (flags & RTF_HOST)
{
mask = 0xffffffff;
}
+
A host is a unique machine and has no network bits.
/*
*
Calculate the network mask
* If mask is not specified, try to guess it.
*/
+
else if (!mask)
else if (!mask)
{
if (!((dst ^ dev->pa_addr) & dev->pa_mask))
{
@@ -261,7 +530,7 @@ void ip_rt_add(short flags, __u32 dst, __u32 mask,
flags &= ~RTF_GATEWAY;
if (flags & RTF_DYNAMIC)
{
/*printk("Dynamic route to my own net rejected\n");*/
+
printk("Dynamic route to my own net rejected\n");
return;
}
}
@@ -295,132 +564,1027 @@ void ip_rt_add(short flags, __u32 dst, __u32 mask,
*
Allocate an entry and fill it in.
*/
+
+
+
+
+
+
+
+
+
+
+
-
rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
if (rt == NULL)
f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
if (f == NULL)
return;
memset(f, 0, sizeof(struct fib_node));
f->fib_dst = dst;
f->fib_metric = metric;
f->fib_tos
= 0;
if
{
((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
kfree_s(f, sizeof(struct fib_node));
return;
}
memset(rt, 0, sizeof(struct rtable));
rt->rt_flags = flags | RTF_UP;
rt->rt_dst = dst;
rt->rt_dev = dev;
rt->rt_gateway = gw;
rt->rt_mask = mask;
rt->rt_mss = dev->mtu - HEADER_SIZE;
rt->rt_metric = metric;
rt->rt_window = 0;
/* Default is no clamping */
DEF00009296
RHT-BR00029921
+
f->fib_info = fi;
+
+
/* Are the MSS/Window valid ? */
logmask = rt_logmask(mask);
fz = fib_zones[logmask];
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
if(rt->rt_flags & RTF_MSS)
rt->rt_mss = mtu;
+
if(rt->rt_flags & RTF_WINDOW)
rt->rt_window = window;
if(rt->rt_flags & RTF_IRTT)
rt->rt_irtt = irtt;
if (!fz)
{
int i;
fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
if (!fz)
{
fib_free_node(f);
return;
}
memset(fz, 0, sizeof(struct fib_zone));
fz->fz_logmask = logmask;
fz->fz_mask = mask;
for (i=logmask-1; i>=0; i--)
if (fib_zones[i])
break;
cli();
if (i<0)
{
fz->fz_next = fib_zone_list;
fib_zone_list = fz;
}
else
{
fz->fz_next = fib_zones[i]->fz_next;
fib_zones[i]->fz_next = fz;
}
fib_zones[logmask] = fz;
sti();
}
/*
*
*
*
*
* If
*/
What we have to do is loop though this until we have
found the first address which has a higher generality than
the one in rt. Then we can put rt in right before it.
The interrupts must be off for this process.
zone overgrows RTZ_HASHING_LIMIT, create hash table.
save_flags(cpuflags);
cli();
+
if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table &&
logmask<32)
+
{
DEF00009297
RHT-BR00029922
+
struct fib_node ** ht;
+#if RT_CACHE_DEBUG
+
printk("fib_add_1: hashing for zone %d started\n", logmask);
+#endif
+
ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*),
GFP_KERNEL);
+
+
if (ht)
+
{
+
memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
+
cli();
+
f1 = fz->fz_list;
+
while (f1)
+
{
+
struct fib_node * next;
+
unsigned hash = fz_hash_code(f1->fib_dst, logmask);
+
next = f1->fib_next;
+
f1->fib_next = ht[hash];
+
ht[hash] = f1;
+
f1 = next;
+
}
+
fz->fz_list = NULL;
+
fz->fz_hash_table = ht;
+
sti();
+
}
+
}
+
+
if (fz->fz_hash_table)
+
fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
+
else
+
fp = &fz->fz_list;
+
+
-
/*
*
Remove old route if we are getting a duplicate.
* Scan list to find the first route with the same destination
*/
rp = &rt_base;
while ((r = *rp) != NULL)
while ((f1 = *fp) != NULL)
{
if (r->rt_dst != dst ||
r->rt_mask != mask)
{
rp = &r->rt_next;
continue;
}
if (r->rt_metric != metric && r->rt_gateway != gw)
{
duplicate = 1;
rp = &r->rt_next;
continue;
}
*rp = r->rt_next;
if (rt_loopback == r)
rt_loopback = NULL;
DEF00009298
RHT-BR00029923
ip_netlink_msg(RTMSG_DELROUTE, dst,gw, mask, flags, metric, rt>rt_dev->name);
kfree_s(r, sizeof(struct rtable));
+
if (f1->fib_dst == dst)
+
break;
+
fp = &f1->fib_next;
}
+
/*
*
Add the new route
+
* Find route with the same destination and less (or equal) metric.
*/
rp = &rt_base;
while ((r = *rp) != NULL) {
/*
* When adding a duplicate route, add it before
* the route with a higher metric.
*/
if (duplicate &&
r->rt_dst == dst &&
r->rt_mask == mask &&
r->rt_metric > metric)
+
while ((f1 = *fp) != NULL && f1->fib_dst == dst)
+
{
+
if (f1->fib_metric >= metric)
break;
else
/*
* Otherwise, just add it before the
* route with a higher generality.
+
*
Record route with the same destination and gateway,
+
*
but less metric. We'll delete it
+
*
after instantiation of new route.
*/
if ((r->rt_mask & mask) != mask)
break;
rp = &r->rt_next;
+
if (f1->fib_info->fib_gateway == gw)
+
dup_fp = fp;
+
fp = &f1->fib_next;
+
}
+
+
/*
+
* Is it already present?
+
*/
+
+
if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
+
{
+
fib_free_node(f);
+
return;
}
rt->rt_next = r;
*rp = rt;
/*
DEF00009299
RHT-BR00029924
+
-
*
Update the loopback route
* Insert new entry to the list.
*/
if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback)
rt_loopback = rt;
rt_stamp++;
/* New table revision */
+
cli();
+
f->fib_next = f1;
+
*fp = f;
+
if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
+
fib_loopback = f;
+
sti();
+
fz->fz_nent++;
+
ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi>fib_dev->name);
+
/*
*
Restore the interrupts and return
+
*
Delete route with the same destination and gateway.
+
*
Note that we should have at most one such route.
*/
+
if (dup_fp)
+
fp = dup_fp;
+
else
+
fp = &f->fib_next;
restore_flags(cpuflags);
ip_netlink_msg(RTMSG_NEWROUTE, dst,gw, mask, flags, metric, rt->rt_dev>name);
+
while ((f1 = *fp) != NULL && f1->fib_dst == dst)
+
{
+
if (f1->fib_info->fib_gateway == gw)
+
{
+
cli();
+
*fp = f1->fib_next;
+
if (fib_loopback == f1)
+
fib_loopback = NULL;
+
sti();
+
ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags,
metric, f1->fib_info->fib_dev->name);
+
fib_free_node(f1);
+
fz->fz_nent--;
+
break;
+
}
+
fp = &f1->fib_next;
+
}
+
rt_cache_flush();
return;
}
+static int rt_flush_list(struct fib_node ** fp, struct device *dev)
+{
+
int found = 0;
+
struct fib_node *f;
DEF00009300
RHT-BR00029925
-/*
- *
Check if a mask is acceptable.
+
while ((f = *fp) != NULL) {
+
if (f->fib_info->fib_dev != dev) {
+
fp = &f->fib_next;
+
continue;
+
}
+
cli();
+
*fp = f->fib_next;
+
if (fib_loopback == f)
+
fib_loopback = NULL;
+
sti();
+
fib_free_node(f);
+
found++;
+
}
+
return found;
+}
+
+static __inline__ void fib_flush_1(struct device *dev)
+{
+
struct fib_zone *fz;
+
int found = 0;
+
+
for (fz = fib_zone_list; fz; fz = fz->fz_next)
+
{
+
if (fz->fz_hash_table)
+
{
+
int i;
+
int tmp = 0;
+
for (i=0; ifz_hash_table[i], dev);
+
fz->fz_nent -= tmp;
+
found += tmp;
+
}
+
else
+
{
+
int tmp;
+
tmp = rt_flush_list(&fz->fz_list, dev);
+
fz->fz_nent -= tmp;
+
found += tmp;
+
}
+
}
+
+
if (found)
+
rt_cache_flush();
+}
+
+
+/*
+ *
Called from the PROCfs module. This outputs /proc/net/route.
+ *
+ *
We preserve the old format but pad the buffers out. This means that
+ *
we can spin over the other entries as we read them. Remember the
+ *
gated BGP4 code could need to read 60,000+ routes on occasion (thats
+ *
about 7Mb of data). To do that ok we will need to also cache the
+ *
last route we got to (reads will generally be following on from
DEF00009301
RHT-BR00029926
+ *
*/
one another without gaps).
-static inline int bad_mask(__u32 mask, __u32 addr)
+int rt_get_info(char *buffer, char **start, off_t offset, int length, int
dummy)
{
if (addr & (mask = ~mask))
return 1;
mask = ntohl(mask);
if (mask & (mask+1))
return 1;
return 0;
+
struct fib_zone *fz;
+
struct fib_node *f;
+
int len=0;
+
off_t pos=0;
+
char temp[129];
+
int i;
+
+
pos = 128;
+
+
if (offset<128)
+
{
+
sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway
\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
+
len = 128;
+
}
+
+
while (ip_rt_lock)
+
sleep_on(&rt_wait);
+
ip_rt_fast_lock();
+
+
for (fz=fib_zone_list; fz; fz = fz->fz_next)
+
{
+
int maxslot;
+
struct fib_node ** fp;
+
+
if (fz->fz_nent == 0)
+
continue;
+
+
if (pos + 128*fz->fz_nent <= offset)
+
{
+
pos += 128*fz->fz_nent;
+
len = 0;
+
continue;
+
}
+
+
if (fz->fz_hash_table)
+
{
+
maxslot = RTZ_HASH_DIVISOR;
+
fp
= fz->fz_hash_table;
+
}
+
else
+
{
+
maxslot
= 1;
+
fp
= &fz->fz_list;
DEF00009302
RHT-BR00029927
+
}
+
+
for (i=0; i < maxslot; i++, fp++)
+
{
+
+
for (f = *fp; f; f = f->fib_next)
+
{
+
struct fib_info * fi;
+
/*
+
*
Spin through entries until we are ready
+
*/
+
pos += 128;
+
+
if (pos <= offset)
+
{
+
len=0;
+
continue;
+
}
+
+
fi = f->fib_info;
+
sprintf(temp,
"%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
+
fi->fib_dev->name, (unsigned long)f->fib_dst,
(unsigned long)fi->fib_gateway,
+
fi->fib_flags, 0, f->fib_use, f->fib_metric,
+
(unsigned long)fz->fz_mask, (int)fi->fib_mtu,
fi->fib_window, (int)fi->fib_irtt);
+
sprintf(buffer+len,"%-127s\n",temp);
+
+
len += 128;
+
if (pos >= offset+length)
+
goto done;
+
}
+
}
+
}
+
+done:
+
ip_rt_unlock();
+
wake_up(&rt_wait);
+
+
*start = buffer+len-(pos-offset);
+
len = pos - offset;
+
if (len>length)
+
len = length;
+
return len;
+}
+
+int rt_cache_get_info(char *buffer, char **start, off_t offset, int length,
int dummy)
+{
+
int len=0;
+
off_t pos=0;
+
char temp[129];
+
struct rtable *r;
+
int i;
+
+
pos = 128;
DEF00009303
RHT-BR00029928
+
+
if (offset<128)
+
{
+
sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway
\tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP\n");
+
len = 128;
+
}
+
+
+
while (ip_rt_lock)
+
sleep_on(&rt_wait);
+
ip_rt_fast_lock();
+
+
for (i = 0; irt_next)
+
{
+
/*
+
*
Spin through entries until we are ready
+
*/
+
pos += 128;
+
+
if (pos <= offset)
+
{
+
len = 0;
+
continue;
+
}
+
+
sprintf(temp,
"%s\t%08lX\t%08lX\t%02X\t%ld\t%lu\t%d\t%08lX\t%d\t%lu\t%u\t%ld\t%1d",
+
r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned
long)r->rt_gateway,
+
r->rt_flags, r->rt_refcnt, r->rt_use, 0,
+
(unsigned long)r->rt_src, (int)r->rt_mtu, r>rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ?
r->rt_hh->hh_uptodate : 0);
+
sprintf(buffer+len,"%-127s\n",temp);
+
len += 128;
+
if (pos >= offset+length)
+
goto done;
+
}
+
}
+
+done:
+
ip_rt_unlock();
+
wake_up(&rt_wait);
+
+
*start = buffer+len-(pos-offset);
+
len = pos-offset;
+
if (len>length)
+
len = length;
+
return len;
+}
+
+
+static void rt_free(struct rtable * rt)
+{
DEF00009304
RHT-BR00029929
+
unsigned long flags;
+
+
save_flags(flags);
+
cli();
+
if (!rt->rt_refcnt)
+
{
+
struct hh_cache * hh = rt->rt_hh;
+
rt->rt_hh = NULL;
+
if (hh && !--hh->hh_refcnt)
+
{
+
restore_flags(flags);
+
kfree_s(hh, sizeof(struct hh_cache));
+
}
+
restore_flags(flags);
+
kfree_s(rt, sizeof(struct rt_table));
+
return;
+
}
+
rt->rt_next = rt_free_queue;
+
rt->rt_flags &= ~RTF_UP;
+
rt_free_queue = rt;
+
ip_rt_bh_mask |= RT_BH_FREE;
+#if RT_CACHE_DEBUG >= 2
+
printk("rt_free: %08x\n", rt->rt_dst);
+#endif
+
restore_flags(flags);
+}
+
+/*
+ * RT "bottom half" handlers. Called with masked inetrrupts.
+ */
+
+static __inline__ void rt_kick_free_queue(void)
+{
+
struct rtable *rt, **rtp;
+
+
rtp = &rt_free_queue;
+
+
while ((rt = *rtp) != NULL)
+
{
+
if (!rt->rt_refcnt)
+
{
+
struct hh_cache * hh = rt->rt_hh;
+#if RT_CACHE_DEBUG >= 2
+
__u32 daddr = rt->rt_dst;
+#endif
+
*rtp = rt->rt_next;
+
rt->rt_hh = NULL;
+
if (hh && !--hh->hh_refcnt)
+
{
+
sti();
+
kfree_s(hh, sizeof(struct hh_cache));
+
}
+
sti();
+
kfree_s(rt, sizeof(struct rt_table));
+#if RT_CACHE_DEBUG >= 2
+
printk("rt_kick_free_queue: %08x is free\n", daddr);
+#endif
DEF00009305
RHT-BR00029930
+
+
+
+
+
+}
+
+void
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+}
+
+
+void
+{
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
cli();
continue;
}
rtp = &rt->rt_next;
}
ip_rt_run_bh() {
unsigned long flags;
save_flags(flags);
cli();
if (ip_rt_bh_mask && !ip_rt_lock)
{
if (ip_rt_bh_mask & RT_BH_REDIRECT)
rt_kick_backlog();
if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
{
ip_rt_fast_lock();
ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
sti();
rt_garbage_collect_1();
cli();
ip_rt_fast_unlock();
}
if (ip_rt_bh_mask & RT_BH_FREE)
rt_kick_free_queue();
}
restore_flags(flags);
ip_rt_check_expire()
ip_rt_fast_lock();
if (ip_rt_lock == 1)
{
int i;
struct rtable *rth, **rthp;
unsigned long flags;
unsigned long now = jiffies;
save_flags(flags);
for (i=0; irt_next;
/*
* Cleanup aged off entries.
*/
cli();
DEF00009306
RHT-BR00029931
+
if (!rth->rt_refcnt && rth->rt_lastuse +
RT_CACHE_TIMEOUT < now)
+
{
+
*rthp = rth_next;
+
sti();
+
rt_cache_size--;
+#if RT_CACHE_DEBUG >= 2
+
printk("rt_check_expire clean %02x@%08x\n", i,
rth->rt_dst);
+#endif
+
rt_free(rth);
+
continue;
+
}
+
sti();
+
+
if (!rth_next)
+
break;
+
+
/*
+
* LRU ordering.
+
*/
+
+
if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOULD <
rth_next->rt_lastuse ||
+
(rth->rt_lastuse < rth_next->rt_lastuse &&
+
rth->rt_use < rth_next->rt_use))
+
{
+#if RT_CACHE_DEBUG >= 2
+
printk("rt_check_expire bubbled %02x@%08x<>%08x\n", i, rth->rt_dst, rth_next->rt_dst);
+#endif
+
cli();
+
*rthp = rth_next;
+
rth->rt_next = rth_next->rt_next;
+
rth_next->rt_next = rth;
+
sti();
+
rthp = &rth_next->rt_next;
+
continue;
+
}
+
rthp = &rth->rt_next;
+
}
+
}
+
restore_flags(flags);
+
rt_kick_free_queue();
+
}
+
ip_rt_unlock();
+}
+
+static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
+{
+
struct rtable *rt;
+
unsigned long hash = ip_rt_hash_code(dst);
+
+
if (gw == dev->pa_addr)
+
return;
+
if (dev != get_gw_dev(gw))
+
return;
DEF00009307
RHT-BR00029932
+
rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
+
if (rt == NULL)
+
return;
+
memset(rt, 0, sizeof(struct rtable));
+
rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY |
RTF_UP;
+
rt->rt_dst = dst;
+
rt->rt_dev = dev;
+
rt->rt_gateway = gw;
+
rt->rt_src = dev->pa_addr;
+
rt->rt_mtu = dev->mtu;
+#ifdef CONFIG_NO_PATH_MTU_DISCOVERY
+
if (dev->mtu > 576)
+
rt->rt_mtu = 576;
+#endif
+
rt->rt_lastuse = jiffies;
+
rt->rt_refcnt = 1;
+
rt_cache_add(hash, rt);
+
ip_rt_put(rt);
+
return;
+}
+
+static void rt_cache_flush(void)
+{
+
int i;
+
struct rtable * rth, * next;
+
+
for (i=0; irt_next;
+
rt_cache_size--;
+
nr++;
+
rth->rt_next = NULL;
+
rt_free(rth);
+
}
+#if RT_CACHE_DEBUG >= 2
+
if (nr > 0)
+
printk("rt_cache_flush: %d@%02x\n", nr, i);
+#endif
+
}
+#if RT_CACHE_DEBUG >= 1
+
if (rt_cache_size)
+
{
DEF00009308
RHT-BR00029933
+
printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
+
rt_cache_size = 0;
+
}
+#endif
+}
+
+static void rt_garbage_collect_1(void)
+{
+
int i;
+
unsigned expire = RT_CACHE_TIMEOUT>>1;
+
struct rtable * rth, **rthp;
+
unsigned long now = jiffies;
+
+
for (;;)
+
{
+
for (i=0; irt_next)
+
{
+
if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) >
now)
+
continue;
+
rt_cache_size--;
+
cli();
+
*rthp=rth->rt_next;
+
rth->rt_next = NULL;
+
sti();
+
rt_free(rth);
+
break;
+
}
+
}
+
if (rt_cache_size < RT_CACHE_SIZE_MAX)
+
return;
+
expire >>= 1;
+
}
+}
+
+static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
+{
+
unsigned long flags;
+
struct rt_req * tail;
+
+
save_flags(flags);
+
cli();
+
tail = *q;
+
if (!tail)
+
rtr->rtr_next = rtr;
+
else
+
{
+
rtr->rtr_next = tail->rtr_next;
+
tail->rtr_next = rtr;
+
}
+
*q = rtr;
+
restore_flags(flags);
DEF00009309
RHT-BR00029934
+
return;
+}
+
+/*
+ * Caller should mask interrupts.
+ */
+
+static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
+{
+
struct rt_req * rtr;
+
+
if (*q)
+
{
+
rtr = (*q)->rtr_next;
+
(*q)->rtr_next = rtr->rtr_next;
+
if (rtr->rtr_next == rtr)
+
*q = NULL;
+
rtr->rtr_next = NULL;
+
return rtr;
+
}
+
return NULL;
+}
+
+/*
+
Called with masked interrupts
+ */
+
+static void rt_kick_backlog()
+{
+
if (!ip_rt_lock)
+
{
+
struct rt_req * rtr;
+
+
ip_rt_fast_lock();
+
+
while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
+
{
+
sti();
+
rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
+
kfree_s(rtr, sizeof(struct rt_req));
+
cli();
+
}
+
+
ip_rt_bh_mask &= ~RT_BH_REDIRECT;
+
+
ip_rt_fast_unlock();
+
}
+}
+
+/*
+ * rt_{del|add|flush} called only from USER process. Waiting is OK.
+ */
+
+static int rt_del(__u32 dst, __u32 mask,
+
struct device * dev, __u32 gtw, short rt_flags, short metric)
+{
+
int retval;
DEF00009310
RHT-BR00029935
+
+
while (ip_rt_lock)
+
sleep_on(&rt_wait);
+
ip_rt_fast_lock();
+
retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
+
ip_rt_unlock();
+
wake_up(&rt_wait);
+
return retval;
+}
+
+static void rt_add(short flags, __u32 dst, __u32 mask,
+
__u32 gw, struct device *dev, unsigned short mss,
+
unsigned long window, unsigned short irtt, short metric)
+{
+
while (ip_rt_lock)
+
sleep_on(&rt_wait);
+
ip_rt_fast_lock();
+
fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
+
ip_rt_unlock();
+
wake_up(&rt_wait);
+}
+
+void ip_rt_flush(struct device *dev)
+{
+
while (ip_rt_lock)
+
sleep_on(&rt_wait);
+
ip_rt_fast_lock();
+
fib_flush_1(dev);
+
ip_rt_unlock();
+
wake_up(&rt_wait);
+}
+
+/*
+
Called by ICMP module.
+ */
+
+void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
+{
+
struct rt_req * rtr;
+
struct rtable * rt;
+
+
rt = ip_rt_route(dst, 0);
+
if (!rt)
+
return;
+
+
if (rt->rt_gateway != src ||
+
rt->rt_dev != dev ||
+
((gw^dev->pa_addr)&dev->pa_mask) ||
+
ip_chk_addr(gw))
+
{
+
ip_rt_put(rt);
+
return;
+
}
+
ip_rt_put(rt);
+
+
ip_rt_fast_lock();
+
if (ip_rt_lock == 1)
DEF00009311
RHT-BR00029936
+
{
+
rt_redirect_1(dst, gw, dev);
+
ip_rt_unlock();
+
return;
+
}
+
+
rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
+
if (rtr)
+
{
+
rtr->dst = dst;
+
rtr->gw = gw;
+
rtr->dev = dev;
+
rt_req_enqueue(&rt_backlog, rtr);
+
ip_rt_bh_mask |= RT_BH_REDIRECT;
+
}
+
ip_rt_unlock();
+}
+
+
+static __inline__ void rt_garbage_collect(void)
+{
+
if (ip_rt_lock == 1)
+
{
+
rt_garbage_collect_1();
+
return;
+
}
+
ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
+}
+
+static void rt_cache_add(unsigned hash, struct rtable * rth)
+{
+
unsigned long
flags;
+
struct rtable
**rthp;
+
__u32
daddr = rth->rt_dst;
+
unsigned long
now = jiffies;
+
+#if RT_CACHE_DEBUG >= 2
+
if (ip_rt_lock != 1)
+
{
+
printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
+
return;
+
}
+#endif
+
+
save_flags(flags);
+
+
if (rth->rt_dev->header_cache_bind)
+
{
+
struct rtable * rtg = rth;
+
+
if (rth->rt_gateway != daddr)
+
{
+
ip_rt_fast_unlock();
+
rtg = ip_rt_route(rth->rt_gateway, 0);
+
ip_rt_fast_lock();
+
}
+
DEF00009312
RHT-BR00029937
+
if (rtg)
+
{
+
if (rtg == rth)
+
rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg>rt_dev, ETH_P_IP, rtg->rt_dst);
+
else
+
{
+
if (rtg->rt_hh)
+
ATOMIC_INCR(&rtg->rt_hh->hh_refcnt);
+
rth->rt_hh = rtg->rt_hh;
+
ip_rt_put(rtg);
+
}
+
}
+
}
+
+
if (rt_cache_size >= RT_CACHE_SIZE_MAX)
+
rt_garbage_collect();
+
+
cli();
+
rth->rt_next = ip_rt_hash_table[hash];
+#if RT_CACHE_DEBUG >= 2
+
if (rth->rt_next)
+
{
+
struct rtable * trth;
+
printk("rt_cache @%02x: %08x", hash, daddr);
+
for (trth=rth->rt_next; trth; trth=trth->rt_next)
+
printk(" . %08x", trth->rt_dst);
+
printk("\n");
+
}
+#endif
+
ip_rt_hash_table[hash] = rth;
+
rthp = &rth->rt_next;
+
sti();
+
rt_cache_size++;
+
+
/*
+
* Cleanup duplicate (and aged off) entries.
+
*/
+
+
while ((rth = *rthp) != NULL)
+
{
+
+
cli();
+
if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
+
|| rth->rt_dst == daddr)
+
{
+
*rthp = rth->rt_next;
+
rt_cache_size--;
+
sti();
+#if RT_CACHE_DEBUG >= 2
+
printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
+#endif
+
rt_free(rth);
+
continue;
+
}
+
sti();
+
rthp = &rth->rt_next;
DEF00009313
RHT-BR00029938
+
+
}
}
restore_flags(flags);
/*
- *
Process a route add request from the user
+
RT should be already locked.
+
+
We could improve this by keeping a chain of say 32 struct rtable's
+
last freed for fast recycling.
+
+ */
+
+struct rtable * ip_rt_slow_route (__u32 daddr, int local)
+{
+
unsigned hash = ip_rt_hash_code(daddr)^local;
+
struct rtable * rth;
+
struct fib_node * f;
+
struct fib_info * fi;
+
__u32 saddr;
+
+#if RT_CACHE_DEBUG >= 2
+
printk("rt_cache miss @%08x\n", daddr);
+#endif
+
+
rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
+
if (!rth)
+
{
+
ip_rt_unlock();
+
return NULL;
+
}
+
+
if (local)
+
f = fib_lookup_local(daddr);
+
else
+
f = fib_lookup (daddr);
+
+
if (f)
+
{
+
fi = f->fib_info;
+
f->fib_use++;
+
}
+
+
if (!f || (fi->fib_flags & RTF_REJECT))
+
{
+#if RT_CACHE_DEBUG >= 2
+
printk("rt_route failed @%08x\n", daddr);
+#endif
+
ip_rt_unlock();
+
kfree_s(rth, sizeof(struct rtable));
+
return NULL;
+
}
+
+
saddr = fi->fib_dev->pa_addr;
+
+
if (daddr == fi->fib_dev->pa_addr)
+
{
DEF00009314
RHT-BR00029939
+
f->fib_use--;
+
if ((f = fib_loopback) != NULL)
+
{
+
f->fib_use++;
+
fi = f->fib_info;
+
}
+
}
+
+
if (!f)
+
{
+
ip_rt_unlock();
+
kfree_s(rth, sizeof(struct rtable));
+
return NULL;
+
}
+
+
rth->rt_dst = daddr;
+
rth->rt_src = saddr;
+
rth->rt_lastuse
= jiffies;
+
rth->rt_refcnt
= 1;
+
rth->rt_use = 1;
+
rth->rt_next
= NULL;
+
rth->rt_hh = NULL;
+
rth->rt_gateway
= fi->fib_gateway;
+
rth->rt_dev = fi->fib_dev;
+
rth->rt_mtu = fi->fib_mtu;
+
rth->rt_window
= fi->fib_window;
+
rth->rt_irtt
= fi->fib_irtt;
+
rth->rt_tos = f->fib_tos;
+
rth->rt_flags
= fi->fib_flags | RTF_HOST;
+
if (local)
+
rth->rt_flags
|= RTF_LOCAL;
+
+
if (!(rth->rt_flags & RTF_GATEWAY))
+
rth->rt_gateway = rth->rt_dst;
+
+
if (ip_rt_lock == 1)
+
rt_cache_add(hash, rth);
+
else
+
{
+
rt_free(rth);
+#if RT_CACHE_DEBUG >= 1
+
printk("rt_cache: route to %08x was born dead\n", daddr);
+#endif
+
}
+
+
ip_rt_unlock();
+
return rth;
+}
+
+void ip_rt_put(struct rtable * rt)
+{
+
if (rt)
+
ATOMIC_DECR(&rt->rt_refcnt);
+}
+
+struct rtable * ip_rt_route(__u32 daddr, int local)
+{
DEF00009315
RHT-BR00029940
+
struct rtable * rth;
+
+
ip_rt_fast_lock();
+
+
for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth>rt_next)
+
{
+
if (rth->rt_dst == daddr)
+
{
+
rth->rt_lastuse = jiffies;
+
ATOMIC_INCR(&rth->rt_use);
+
ATOMIC_INCR(&rth->rt_refcnt);
+
ip_rt_unlock();
+
return rth;
+
}
+
}
+
return ip_rt_slow_route (daddr, local);
+}
+
+
+/*
+ *
Process a route add request from the user, or from a kernel
+ *
task.
*/
-static int rt_new(struct rtentry *r)
+int ip_rt_new(struct rtentry *r)
{
int err;
char * devname;
@@ -465,7 +1629,7 @@ static int rt_new(struct rtentry *r)
/*
*
BSD emulation: Permits route add someroute gw one-of-my-addresses
*
to indicate which iface. Not as clean as the nice Linux dev
technique
*
but people keep using it...
+
*
but people keep using it... (and gated likes it ;))
*/
if (!dev && (flags & RTF_GATEWAY))
@@ -522,8 +1686,8 @@ static int rt_new(struct rtentry *r)
/*
*
Add the route
*/
ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r>rt_irtt, metric);
+
+
rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r>rt_irtt, metric);
return 0;
}
@@ -539,6 +1703,7 @@ static int rt_kill(struct rtentry *r)
struct sockaddr_in *gtw;
char *devname;
int err;
DEF00009316
RHT-BR00029941
+
struct device * dev = NULL;
trg = (struct sockaddr_in *) &r->rt_dst;
msk = (struct sockaddr_in *) &r->rt_genmask;
@@ -548,159 +1713,20 @@ static int rt_kill(struct rtentry *r)
err = getname(devname, &devname);
if (err)
return err;
+
dev = dev_get(devname);
+
putname(devname);
+
if (!dev)
+
return -ENODEV;
}
/*
* metric can become negative here if it wasn't filled in
* but that's a fortunate accident; we really use that in rt_del.
*/
err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr,
devname,
+
err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr,
dev,
(__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
if ( devname != NULL )
putname(devname);
return err;
}
-/*
- *
Called from the PROCfs module. This outputs /proc/net/route.
- *
- *
We preserve the old format but pad the buffers out. This means that
- *
we can spin over the other entries as we read them. Remember the
- *
gated BGP4 code could need to read 60,000+ routes on occasion (thats
- *
about 7Mb of data). To do that ok we will need to also cache the
- *
last route we got to (reads will generally be following on from
- *
one another without gaps).
- */
-int rt_get_info(char *buffer, char **start, off_t offset, int length, int
dummy)
-{
struct rtable *r;
int len=128;
off_t pos=0;
off_t begin=0;
char temp[129];
if(offset<128)
sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway
\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
pos=128;
for (r = rt_base; r != NULL; r = r->rt_next)
{
/*
*
Spin through entries until we are ready
DEF00009317
RHT-BR00029942
*/
if(pos+128rt_dev->name, (unsigned long)r->rt_dst, (unsigned
long)r->rt_gateway,
r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric,
(unsigned long)r->rt_mask, (int)r->rt_mss, r->rt_window,
(int)r->rt_irtt);
sprintf(buffer+len,"%-127s\n",temp);
len+=128;
pos+=128;
if(posoffset+length)
break;
}
*start=buffer+(offset-begin);
len-=(offset-begin);
if(len>length)
len=length;
return len;
-}
-/*
- *
This is hackish, but results in better code. Use "-S" to see why.
- */
-#define early_out ({ goto no_route; 1; })
-/*
- *
Route a packet. This needs to be fairly quick. Florian & Co.
- *
suggested a unified ARP and IP routing cache. Done right its
- *
probably a brilliant idea. I'd actually suggest a unified
- *
ARP/IP routing/Socket pointer cache. Volunteers welcome
- */
-struct rtable * ip_rt_route(__u32 daddr, struct options *opt, __u32
*src_addr)
-{
struct rtable *rt;
for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
{
if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
break;
/*
*
broadcast addresses can be special cases..
DEF00009318
RHT-BR00029943
*/
if (rt->rt_flags & RTF_GATEWAY)
continue;
if ((rt->rt_dev->flags & IFF_BROADCAST) &&
(rt->rt_dev->pa_brdaddr == daddr))
break;
}
if(rt->rt_flags&RTF_REJECT)
return NULL;
if(src_addr!=NULL)
*src_addr= rt->rt_dev->pa_addr;
if (daddr == rt->rt_dev->pa_addr) {
if ((rt = rt_loopback) == NULL)
goto no_route;
}
rt->rt_use++;
return rt;
-no_route:
return NULL;
-}
-struct rtable * ip_rt_local(__u32 daddr, struct options *opt, __u32
*src_addr)
-{
struct rtable *rt;
for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
{
/*
*
No routed addressing.
*/
if (rt->rt_flags&RTF_GATEWAY)
continue;
if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
break;
/*
*
broadcast addresses can be special cases..
*/
if ((rt->rt_dev->flags & IFF_BROADCAST) &&
rt->rt_dev->pa_brdaddr == daddr)
break;
}
if(src_addr!=NULL)
*src_addr= rt->rt_dev->pa_addr;
if (daddr == rt->rt_dev->pa_addr) {
if ((rt = rt_loopback) == NULL)
goto no_route;
}
rt->rt_use++;
return rt;
DEF00009319
RHT-BR00029944
-no_route:
return NULL;
-}
/*
*
Handle IP routing ioctl calls. These are used to manipulate the routing
tables
*/
@@ -720,8 +1746,15 @@ int ip_rt_ioctl(unsigned int cmd, void *arg)
if (err)
return err;
memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt);
+
return (cmd == SIOCDELRT) ? rt_kill(&rt) : ip_rt_new(&rt);
}
return -EINVAL;
}
+
+void ip_rt_advice(struct rtable **rp, int advice)
+{
+
/* Thanks! */
+
return;
+}
+
-1.6.5
DEF00009320
RHT-BR00029945
Linux 1.3.42 - route.c
/*
* INET
An implementation of the TCP/IP protocol suite for the LINUX
*
operating system. INET is implemented using the BSD Socket
*
interface as the means of communication with the user level.
*
*
ROUTE - implementation of the IP router.
*
* Version:
@(#)route.c
1.0.14 05/31/93
*
* Authors:
Ross Biro,
*
Fred N. van Kempen,
*
Alan Cox,
*
Linus Torvalds,
*
* Fixes:
*
Alan Cox
:
Verify area fixes.
*
Alan Cox
:
cli() protects routing changes
*
Rui Oliveira
:
ICMP routing table updates
*
(rco@di.uminho.pt)
Routing table insertion and update
*
Linus Torvalds :
Rewrote bits to be sensible
*
Alan Cox
:
Added BSD route gw semantics
*
Alan Cox
:
Super /proc >4K
*
Alan Cox
:
MTU in route table
*
Alan Cox
:
MSS actually. Also added the window
*
clamper.
*
Sam Lantinga
:
Fixed route matching in rt_del()
*
Alan Cox
:
Routing cache support.
*
Alan Cox
:
Removed compatibility cruft.
*
Alan Cox
:
RTF_REJECT support.
*
Alan Cox
:
TCP irtt support.
*
Jonathan Naylor :
Added Metric support.
*
Miquel van Smoorenburg :
BSD API fixes.
*
Miquel van Smoorenburg :
Metrics.
*
Alan Cox
:
Use __u32 properly
*
Alan Cox
:
Aligned routing errors more closely with BSD
*
our system is still very different.
*
Alan Cox
:
Faster /proc handling
*
Alexey Kuznetsov
:
Massive rework to support tree based
routing,
*
routing caches and better behaviour.
*
*
This program is free software; you can redistribute it and/or
*
modify it under the terms of the GNU General Public License
*
as published by the Free Software Foundation; either version
*
2 of the License, or (at your option) any later version.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
Page 1
DEF00009321
KS-DEF-0000686
Linux 1.3.42 - route.c
#include
#include
#include
#include
#include
#include
#include
/*
* Forwarding Information Base definitions.
*/
struct fib_node
{
struct fib_node
__u32
unsigned long
struct fib_info
short
unsigned char
};
*fib_next;
fib_dst;
fib_use;
*fib_info;
fib_metric;
fib_tos;
/*
* This structure contains data shared by many of routes.
*/
struct fib_info
{
struct fib_info
struct fib_info
__u32
struct device
int
unsigned long
unsigned short
unsigned short
unsigned short
};
*fib_next;
*fib_prev;
fib_gateway;
*fib_dev;
fib_refcnt;
fib_window;
fib_flags;
fib_mtu;
fib_irtt;
struct fib_zone
{
struct fib_zone
struct fib_node
struct fib_node
int
int
__u32
};
*fz_next;
**fz_hash_table;
*fz_list;
fz_nent;
fz_logmask;
fz_mask;
static
static
static
static
*fib_zones[33];
*fib_zone_list;
*fib_loopback = NULL;
*fib_info_list;
struct
struct
struct
struct
fib_zone
fib_zone
fib_node
fib_info
/*
* Backlogging.
*/
#define RT_BH_REDIRECT
#define RT_BH_GARBAGE_COLLECT
#define RT_BH_FREE
0
1
2
struct rt_req
Page 2
DEF00009322
KS-DEF-0000687
Linux 1.3.42 - route.c
{
struct rt_req * rtr_next;
struct device *dev;
__u32 dst;
__u32 gw;
unsigned char tos;
};
int
unsigned
static struct rt_req
ip_rt_lock;
ip_rt_bh_mask;
*rt_backlog;
/*
* Route cache.
*/
struct
static
static
struct
rtable
int
struct rtable
wait_queue
static
static
static
static
void
void
void
void
*ip_rt_hash_table[RT_HASH_DIVISOR];
rt_cache_size;
*rt_free_queue;
*rt_wait;
rt_kick_backlog(void);
rt_cache_add(unsigned hash, struct rtable * rth);
rt_cache_flush(void);
rt_garbage_collect_1(void);
/*
* Evaluate mask length.
*/
static __inline__ int rt_logmask(__u32 mask)
{
if (!(mask = ntohl(mask)))
return 32;
return ffz(~mask);
}
/*
* Create mask from length.
*/
static __inline__ __u32 rt_mask(int logmask)
{
if (logmask >= 32)
return 0;
return htonl(~((1<>logmask);
}
/*
* Free FIB node.
*/
static void fib_free_node(struct fib_node * f)
{
struct fib_info * fi = f->fib_info;
if (!--fi->fib_refcnt)
{
#if RT_CACHE_DEBUG >= 2
Page 3
DEF00009323
KS-DEF-0000688
Linux 1.3.42 - route.c
printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway,
fi->fib_dev->name);
#endif
if (fi->fib_next)
fi->fib_next->fib_prev = fi->fib_prev;
if (fi->fib_prev)
fi->fib_prev->fib_next = fi->fib_next;
if (fi == fib_info_list)
fib_info_list = fi->fib_next;
}
kfree_s(f, sizeof(struct fib_node));
}
/*
* Find gateway route by address.
*/
static struct fib_node * fib_lookup_gateway(__u32 dst)
{
struct fib_zone * fz;
struct fib_node * f;
for (fz = fib_zone_list; fz; fz = fz->fz_next)
{
if (fz->fz_hash_table)
f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
else
f = fz->fz_list;
for ( ; f; f = f->fib_next)
{
if ((dst ^ f->fib_dst) & fz->fz_mask)
continue;
if (f->fib_info->fib_flags & RTF_GATEWAY)
return NULL;
return f;
}
}
return NULL;
}
/*
* Find local route by address.
* FIXME: I use "longest match" principle. If destination
*
has some non-local route, I'll not search shorter matches.
*
It's possible, I'm wrong, but I wanted to prevent following
*
situation:
*
route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
*
route add 193.233.7.0
netmask 255.255.255.0 eth1
*
(Two ethernets connected by serial line, one is small and other is large)
*
Host 193.233.7.129 is locally unreachable,
*
but old (<=1.3.37) code will send packets destined for it to eth1.
*
*/
static struct fib_node * fib_lookup_local(__u32 dst)
{
struct fib_zone * fz;
struct fib_node * f;
for (fz = fib_zone_list; fz; fz = fz->fz_next)
{
int longest_match_found = 0;
Page 4
DEF00009324
KS-DEF-0000689
Linux 1.3.42 - route.c
if (fz->fz_hash_table)
f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
else
f = fz->fz_list;
for ( ; f; f = f->fib_next)
{
if ((dst ^ f->fib_dst) & fz->fz_mask)
continue;
if (!(f->fib_info->fib_flags & RTF_GATEWAY))
return f;
longest_match_found = 1;
}
if (longest_match_found)
return NULL;
}
return NULL;
}
/*
* Main lookup routine.
*
IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible
*
by user. It doesn't route non-CIDR broadcasts by default.
*
*
F.e.
*
ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast
193.233.7.255
*
is valid, but if you really are not able (not allowed, do not want) to
*
use CIDR compliant broadcast 193.233.7.127, you should add host route:
*
route add -host 193.233.7.255 eth0
*/
static struct fib_node * fib_lookup(__u32 dst)
{
struct fib_zone * fz;
struct fib_node * f;
for (fz = fib_zone_list; fz; fz = fz->fz_next)
{
if (fz->fz_hash_table)
f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
else
f = fz->fz_list;
for ( ; f; f = f->fib_next)
{
if ((dst ^ f->fib_dst) & fz->fz_mask)
continue;
return f;
}
}
return NULL;
}
static __inline__ struct device * get_gw_dev(__u32 gw)
{
struct fib_node * f;
f = fib_lookup_gateway(gw);
if (f)
return f->fib_info->fib_dev;
return NULL;
}
Page 5
DEF00009325
KS-DEF-0000690
Linux 1.3.42 - route.c
/*
*
*
*
*
*
*/
Used by 'rt_add()' when we can't get the netmask any other way..
If the lower byte or two are zero, we guess the mask based on the
number of zero 8-bit net numbers, otherwise we use the "default"
masks judging by the destination address and our device netmask.
static __u32 unsigned long default_mask(__u32 dst)
{
dst = ntohl(dst);
if (IN_CLASSA(dst))
return htonl(IN_CLASSA_NET);
if (IN_CLASSB(dst))
return htonl(IN_CLASSB_NET);
return htonl(IN_CLASSC_NET);
}
/*
*
*/
If no mask is specified then generate a default entry.
static __u32 guess_mask(__u32 dst, struct device * dev)
{
__u32 mask;
if (!dst)
return 0;
mask = default_mask(dst);
if ((dst ^ dev->pa_addr) & mask)
return mask;
return dev->pa_mask;
}
/*
*
*/
Check if a mask is acceptable.
static inline int bad_mask(__u32 mask, __u32 addr)
{
if (addr & (mask = ~mask))
return 1;
mask = ntohl(mask);
if (mask & (mask+1))
return 1;
return 0;
}
static int fib_del_list(struct fib_node **fp, __u32 dst,
struct device * dev, __u32 gtw, short flags, short metric, __u32
mask)
{
struct fib_node *f;
int found=0;
while((f = *fp) != NULL)
{
struct fib_info * fi = f->fib_info;
Page 6
DEF00009326
KS-DEF-0000691
Linux 1.3.42 - route.c
/*
*
Make sure the destination and netmask match.
*
metric, gateway and device are also checked
*
if they were specified.
*/
if (f->fib_dst != dst ||
(gtw && fi->fib_gateway != gtw) ||
(metric >= 0 && f->fib_metric != metric) ||
(dev && fi->fib_dev != dev) )
{
fp = &f->fib_next;
continue;
}
cli();
*fp = f->fib_next;
if (fib_loopback == f)
fib_loopback = NULL;
sti();
ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric,
fi->fib_dev->name);
fib_free_node(f);
found++;
}
return found;
}
static __inline__ int fib_del_1(__u32 dst, __u32 mask,
struct device * dev, __u32 gtw, short flags, short metric)
{
struct fib_node **fp;
struct fib_zone *fz;
int found=0;
if (!mask)
{
for (fz=fib_zone_list; fz; fz = fz->fz_next)
{
int tmp;
if (fz->fz_hash_table)
fp = &fz->fz_hash_table[fz_hash_code(dst,
fz->fz_logmask)];
else
fp = &fz->fz_list;
tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
fz->fz_nent -= tmp;
found += tmp;
}
}
else
{
if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
{
if (fz->fz_hash_table)
fp = &fz->fz_hash_table[fz_hash_code(dst,
fz->fz_logmask)];
else
fp = &fz->fz_list;
found = fib_del_list(fp, dst, dev, gtw, flags, metric,
mask);
fz->fz_nent -= found;
}
Page 7
DEF00009327
KS-DEF-0000692
Linux 1.3.42 - route.c
}
if (found)
{
rt_cache_flush();
return 0;
}
return -ESRCH;
}
static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
unsigned short flags, unsigned short mss,
unsigned long window, unsigned short irtt)
{
struct fib_info * fi;
if (!(flags & RTF_MSS))
{
mss = dev->mtu;
#ifdef CONFIG_NO_PATH_MTU_DISCOVERY
/*
*
If MTU was not specified, use default.
*
If you want to increase MTU for some net (local subnet)
*
use "route add .... mss xxx".
*
*
The MTU isnt currently always used and computed as it
*
should be as far as I can tell. [Still verifying this is
right]
*/
if ((flags & RTF_GATEWAY) && mss > 576)
mss = 576;
#endif
}
if (!(flags & RTF_WINDOW))
window = 0;
if (!(flags & RTF_IRTT))
irtt = 0;
for (fi=fib_info_list; fi; fi = fi->fib_next)
{
if (fi->fib_gateway != gw ||
fi->fib_dev != dev ||
fi->fib_flags != flags ||
fi->fib_mtu != mss ||
fi->fib_window != window ||
fi->fib_irtt != irtt)
continue;
fi->fib_refcnt++;
#if RT_CACHE_DEBUG >= 2
printk("fib_create_info: fi %08x/%s is duplicate\n",
fi->fib_gateway, fi->fib_dev->name);
#endif
return fi;
}
fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
if (!fi)
return NULL;
memset(fi, 0, sizeof(struct fib_info));
fi->fib_flags = flags;
fi->fib_dev = dev;
fi->fib_gateway = gw;
fi->fib_mtu = mss;
Page 8
DEF00009328
KS-DEF-0000693
Linux 1.3.42 - route.c
fi->fib_window = window;
fi->fib_refcnt++;
fi->fib_next = fib_info_list;
fi->fib_prev = NULL;
if (fib_info_list)
fib_info_list->fib_prev = fi;
fib_info_list = fi;
#if RT_CACHE_DEBUG >= 2
printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway,
fi->fib_dev->name);
#endif
return fi;
}
static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
__u32 gw, struct device *dev, unsigned short mss,
unsigned long window, unsigned short irtt, short metric)
{
struct fib_node *f, *f1;
struct fib_node **fp;
struct fib_node **dup_fp = NULL;
struct fib_zone * fz;
struct fib_info * fi;
int logmask;
if (flags & RTF_HOST)
mask = 0xffffffff;
/*
* If mask is not specified, try to guess it.
*/
else if (!mask)
{
if (!((dst ^ dev->pa_addr) & dev->pa_mask))
{
mask = dev->pa_mask;
flags &= ~RTF_GATEWAY;
if (flags & RTF_DYNAMIC)
{
printk("Dynamic route to my own net rejected\n");
return;
}
}
else
mask = guess_mask(dst, dev);
dst &= mask;
}
/*
*
*/
A gateway must be reachable and not a local address
if (gw == dev->pa_addr)
flags &= ~RTF_GATEWAY;
if (flags & RTF_GATEWAY)
{
/*
*
Don't try to add a gateway we can't reach..
*/
if (dev != get_gw_dev(gw))
return;
Page 9
DEF00009329
KS-DEF-0000694
Linux 1.3.42 - route.c
flags |= RTF_GATEWAY;
}
else
gw = 0;
/*
*
*/
Allocate an entry and fill it in.
f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
if (f == NULL)
return;
memset(f, 0, sizeof(struct fib_node));
f->fib_dst = dst;
f->fib_metric = metric;
f->fib_tos
= 0;
if
{
((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
kfree_s(f, sizeof(struct fib_node));
return;
}
f->fib_info = fi;
logmask = rt_logmask(mask);
fz = fib_zones[logmask];
if (!fz)
{
int i;
fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
if (!fz)
{
fib_free_node(f);
return;
}
memset(fz, 0, sizeof(struct fib_zone));
fz->fz_logmask = logmask;
fz->fz_mask = mask;
for (i=logmask-1; i>=0; i--)
if (fib_zones[i])
break;
cli();
if (i<0)
{
fz->fz_next = fib_zone_list;
fib_zone_list = fz;
}
else
{
fz->fz_next = fib_zones[i]->fz_next;
fib_zones[i]->fz_next = fz;
}
fib_zones[logmask] = fz;
sti();
}
/*
* If zone overgrows RTZ_HASHING_LIMIT, create hash table.
*/
Page 10
DEF00009330
KS-DEF-0000695
Linux 1.3.42 - route.c
if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32)
{
struct fib_node ** ht;
#if RT_CACHE_DEBUG
printk("fib_add_1: hashing for zone %d started\n", logmask);
#endif
ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL);
if (ht)
{
memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
cli();
f1 = fz->fz_list;
while (f1)
{
struct fib_node * next;
unsigned hash = fz_hash_code(f1->fib_dst, logmask);
next = f1->fib_next;
f1->fib_next = ht[hash];
ht[hash] = f1;
f1 = next;
}
fz->fz_list = NULL;
fz->fz_hash_table = ht;
sti();
}
}
if (fz->fz_hash_table)
fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
else
fp = &fz->fz_list;
/*
* Scan list to find the first route with the same destination
*/
while ((f1 = *fp) != NULL)
{
if (f1->fib_dst == dst)
break;
fp = &f1->fib_next;
}
/*
* Find route with the same destination and less (or equal) metric.
*/
while ((f1 = *fp) != NULL && f1->fib_dst == dst)
{
if (f1->fib_metric >= metric)
break;
/*
*
Record route with the same destination and gateway,
*
but less metric. We'll delete it
*
after instantiation of new route.
*/
if (f1->fib_info->fib_gateway == gw)
dup_fp = fp;
fp = &f1->fib_next;
}
/*
* Is it already present?
Page 11
DEF00009331
KS-DEF-0000696
Linux 1.3.42 - route.c
*/
if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
{
fib_free_node(f);
return;
}
/*
* Insert new entry to the list.
*/
cli();
f->fib_next = f1;
*fp = f;
if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
fib_loopback = f;
sti();
fz->fz_nent++;
ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric,
fi->fib_dev->name);
/*
*
Delete route with the same destination and gateway.
*
Note that we should have at most one such route.
*/
if (dup_fp)
fp = dup_fp;
else
fp = &f->fib_next;
while ((f1 = *fp) != NULL && f1->fib_dst == dst)
{
if (f1->fib_info->fib_gateway == gw)
{
cli();
*fp = f1->fib_next;
if (fib_loopback == f1)
fib_loopback = NULL;
sti();
ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric,
f1->fib_info->fib_dev->name);
fib_free_node(f1);
fz->fz_nent--;
break;
}
fp = &f1->fib_next;
}
rt_cache_flush();
return;
}
static int rt_flush_list(struct fib_node ** fp, struct device *dev)
{
int found = 0;
struct fib_node *f;
while ((f = *fp) != NULL) {
if (f->fib_info->fib_dev != dev) {
fp = &f->fib_next;
continue;
}
cli();
Page 12
DEF00009332
KS-DEF-0000697
Linux 1.3.42 - route.c
*fp = f->fib_next;
if (fib_loopback == f)
fib_loopback = NULL;
sti();
fib_free_node(f);
found++;
}
return found;
}
static __inline__ void fib_flush_1(struct device *dev)
{
struct fib_zone *fz;
int found = 0;
for (fz = fib_zone_list; fz; fz = fz->fz_next)
{
if (fz->fz_hash_table)
{
int i;
int tmp = 0;
for (i=0; ifz_hash_table[i], dev);
fz->fz_nent -= tmp;
found += tmp;
}
else
{
int tmp;
tmp = rt_flush_list(&fz->fz_list, dev);
fz->fz_nent -= tmp;
found += tmp;
}
}
if (found)
rt_cache_flush();
}
/*
*
*
*
*
*
*
*
*
*/
Called from the PROCfs module. This outputs /proc/net/route.
We preserve the old format but pad the buffers out. This means that
we can spin over the other entries as we read them. Remember the
gated BGP4 code could need to read 60,000+ routes on occasion (thats
about 7Mb of data). To do that ok we will need to also cache the
last route we got to (reads will generally be following on from
one another without gaps).
int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
{
struct fib_zone *fz;
struct fib_node *f;
int len=0;
off_t pos=0;
char temp[129];
int i;
pos = 128;
if (offset<128)
Page 13
DEF00009333
KS-DEF-0000698
Linux 1.3.42 - route.c
{
sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway
\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
len = 128;
}
while
(ip_rt_lock)
sleep_on(&rt_wait);
ip_rt_fast_lock();
for (fz=fib_zone_list; fz; fz = fz->fz_next)
{
int maxslot;
struct fib_node ** fp;
if (fz->fz_nent == 0)
continue;
if (pos + 128*fz->fz_nent <= offset)
{
pos += 128*fz->fz_nent;
len = 0;
continue;
}
if (fz->fz_hash_table)
{
maxslot = RTZ_HASH_DIVISOR;
fp
= fz->fz_hash_table;
}
else
{
maxslot = 1;
fp
= &fz->fz_list;
}
for (i=0; i < maxslot; i++, fp++)
{
for (f = *fp; f; f = f->fib_next)
{
struct fib_info * fi;
/*
*
Spin through entries until we are ready
*/
pos += 128;
if (pos <= offset)
{
len=0;
continue;
}
fi = f->fib_info;
sprintf(temp,
"%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
fi->fib_dev->name, (unsigned
long)f->fib_dst, (unsigned long)fi->fib_gateway,
fi->fib_flags, 0, f->fib_use, f->fib_metric,
(unsigned long)fz->fz_mask,
(int)fi->fib_mtu, fi->fib_window, (int)fi->fib_irtt);
sprintf(buffer+len,"%-127s\n",temp);
Page 14
DEF00009334
KS-DEF-0000699
Linux 1.3.42 - route.c
len += 128;
if (pos >= offset+length)
goto done;
}
}
}
done:
ip_rt_unlock();
wake_up(&rt_wait);
*start = buffer+len-(pos-offset);
len = pos - offset;
if (len>length)
len = length;
return len;
}
int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int
dummy)
{
int len=0;
off_t pos=0;
char temp[129];
struct rtable *r;
int i;
pos = 128;
if (offset<128)
{
sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway
\tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP\n");
len = 128;
}
while
(ip_rt_lock)
sleep_on(&rt_wait);
ip_rt_fast_lock();
for (i = 0; irt_next)
{
/*
*
Spin through entries until we are ready
*/
pos += 128;
if (pos <= offset)
{
len = 0;
continue;
}
sprintf(temp,
"%s\t%08lX\t%08lX\t%02X\t%ld\t%lu\t%d\t%08lX\t%d\t%lu\t%u\t%ld\t%1d",
r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned
long)r->rt_gateway,
r->rt_flags, r->rt_refcnt, r->rt_use, 0,
(unsigned long)r->rt_src, (int)r->rt_mtu,
r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ?
Page 15
DEF00009335
KS-DEF-0000700
Linux 1.3.42 - route.c
r->rt_hh->hh_uptodate : 0);
sprintf(buffer+len,"%-127s\n",temp);
len += 128;
if (pos >= offset+length)
goto done;
}
}
done:
ip_rt_unlock();
wake_up(&rt_wait);
*start = buffer+len-(pos-offset);
len = pos-offset;
if (len>length)
len = length;
return len;
}
static void rt_free(struct rtable * rt)
{
unsigned long flags;
save_flags(flags);
cli();
if (!rt->rt_refcnt)
{
struct hh_cache * hh = rt->rt_hh;
rt->rt_hh = NULL;
if (hh && !--hh->hh_refcnt)
{
restore_flags(flags);
kfree_s(hh, sizeof(struct hh_cache));
}
restore_flags(flags);
kfree_s(rt, sizeof(struct rt_table));
return;
}
rt->rt_next = rt_free_queue;
rt->rt_flags &= ~RTF_UP;
rt_free_queue = rt;
ip_rt_bh_mask |= RT_BH_FREE;
#if RT_CACHE_DEBUG >= 2
printk("rt_free: %08x\n", rt->rt_dst);
#endif
restore_flags(flags);
}
/*
* RT "bottom half" handlers. Called with masked inetrrupts.
*/
static __inline__ void rt_kick_free_queue(void)
{
struct rtable *rt, **rtp;
rtp = &rt_free_queue;
while ((rt = *rtp) != NULL)
{
if (!rt->rt_refcnt)
{
Page 16
DEF00009336
KS-DEF-0000701
Linux 1.3.42 - route.c
struct hh_cache * hh = rt->rt_hh;
#if RT_CACHE_DEBUG >= 2
__u32 daddr = rt->rt_dst;
#endif
*rtp = rt->rt_next;
rt->rt_hh = NULL;
if (hh && !--hh->hh_refcnt)
{
sti();
kfree_s(hh, sizeof(struct hh_cache));
}
sti();
kfree_s(rt, sizeof(struct rt_table));
#if RT_CACHE_DEBUG >= 2
printk("rt_kick_free_queue: %08x is free\n", daddr);
#endif
cli();
continue;
}
rtp = &rt->rt_next;
}
}
void ip_rt_run_bh() {
unsigned long flags;
save_flags(flags);
cli();
if (ip_rt_bh_mask && !ip_rt_lock)
{
if (ip_rt_bh_mask & RT_BH_REDIRECT)
rt_kick_backlog();
if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
{
ip_rt_fast_lock();
ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
sti();
rt_garbage_collect_1();
cli();
ip_rt_fast_unlock();
}
if (ip_rt_bh_mask & RT_BH_FREE)
rt_kick_free_queue();
}
restore_flags(flags);
}
void ip_rt_check_expire()
{
ip_rt_fast_lock();
if (ip_rt_lock == 1)
{
int i;
struct rtable *rth, **rthp;
unsigned long flags;
unsigned long now = jiffies;
save_flags(flags);
for (i=0; irt_next;
/*
* Cleanup aged off entries.
*/
cli();
if (!rth->rt_refcnt && rth->rt_lastuse +
RT_CACHE_TIMEOUT < now)
{
*rthp = rth_next;
sti();
rt_cache_size--;
#if RT_CACHE_DEBUG >= 2
printk("rt_check_expire clean %02x@%08x\n",
i, rth->rt_dst);
#endif
rt_free(rth);
continue;
}
sti();
if (!rth_next)
break;
/*
* LRU ordering.
*/
if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOULD <
rth_next->rt_lastuse ||
(rth->rt_lastuse < rth_next->rt_lastuse &&
rth->rt_use < rth_next->rt_use))
{
#if RT_CACHE_DEBUG >= 2
printk("rt_check_expire bubbled
%02x@%08x<->%08x\n", i, rth->rt_dst, rth_next->rt_dst);
#endif
cli();
*rthp = rth_next;
rth->rt_next = rth_next->rt_next;
rth_next->rt_next = rth;
sti();
rthp = &rth_next->rt_next;
continue;
}
rthp = &rth->rt_next;
}
}
restore_flags(flags);
rt_kick_free_queue();
}
ip_rt_unlock();
}
static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
{
struct rtable *rt;
unsigned long hash = ip_rt_hash_code(dst);
Page 18
DEF00009338
KS-DEF-0000703
Linux 1.3.42 - route.c
if (gw == dev->pa_addr)
return;
if (dev != get_gw_dev(gw))
return;
rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
if (rt == NULL)
return;
memset(rt, 0, sizeof(struct rtable));
rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY | RTF_UP;
rt->rt_dst = dst;
rt->rt_dev = dev;
rt->rt_gateway = gw;
rt->rt_src = dev->pa_addr;
rt->rt_mtu = dev->mtu;
#ifdef CONFIG_NO_PATH_MTU_DISCOVERY
if (dev->mtu > 576)
rt->rt_mtu = 576;
#endif
rt->rt_lastuse = jiffies;
rt->rt_refcnt = 1;
rt_cache_add(hash, rt);
ip_rt_put(rt);
return;
}
static void rt_cache_flush(void)
{
int i;
struct rtable * rth, * next;
for (i=0; irt_next;
rt_cache_size--;
nr++;
rth->rt_next = NULL;
rt_free(rth);
}
#if RT_CACHE_DEBUG >= 2
if (nr > 0)
printk("rt_cache_flush: %d@%02x\n", nr, i);
#endif
}
#if RT_CACHE_DEBUG >= 1
if (rt_cache_size)
{
printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
rt_cache_size = 0;
}
Page 19
DEF00009339
KS-DEF-0000704
Linux 1.3.42 - route.c
#endif
}
static void rt_garbage_collect_1(void)
{
int i;
unsigned expire = RT_CACHE_TIMEOUT>>1;
struct rtable * rth, **rthp;
unsigned long now = jiffies;
for (;;)
{
for (i=0; irt_next)
{
if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) >
now)
continue;
rt_cache_size--;
cli();
*rthp=rth->rt_next;
rth->rt_next = NULL;
sti();
rt_free(rth);
break;
}
}
if (rt_cache_size < RT_CACHE_SIZE_MAX)
return;
expire >>= 1;
}
}
static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
{
unsigned long flags;
struct rt_req * tail;
save_flags(flags);
cli();
tail = *q;
if (!tail)
rtr->rtr_next = rtr;
else
{
rtr->rtr_next = tail->rtr_next;
tail->rtr_next = rtr;
}
*q = rtr;
restore_flags(flags);
return;
}
/*
* Caller should mask interrupts.
*/
static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
{
Page 20
DEF00009340
KS-DEF-0000705
Linux 1.3.42 - route.c
struct rt_req * rtr;
if (*q)
{
rtr = (*q)->rtr_next;
(*q)->rtr_next = rtr->rtr_next;
if (rtr->rtr_next == rtr)
*q = NULL;
rtr->rtr_next = NULL;
return rtr;
}
return NULL;
}
/*
Called with masked interrupts
*/
static void rt_kick_backlog()
{
if (!ip_rt_lock)
{
struct rt_req * rtr;
ip_rt_fast_lock();
while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
{
sti();
rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
kfree_s(rtr, sizeof(struct rt_req));
cli();
}
ip_rt_bh_mask &= ~RT_BH_REDIRECT;
ip_rt_fast_unlock();
}
}
/*
* rt_{del|add|flush} called only from USER process. Waiting is OK.
*/
static int rt_del(__u32 dst, __u32 mask,
struct device * dev, __u32 gtw, short rt_flags, short metric)
{
int retval;
while (ip_rt_lock)
sleep_on(&rt_wait);
ip_rt_fast_lock();
retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
ip_rt_unlock();
wake_up(&rt_wait);
return retval;
}
static void rt_add(short flags, __u32 dst, __u32 mask,
__u32 gw, struct device *dev, unsigned short mss,
unsigned long window, unsigned short irtt, short metric)
{
while (ip_rt_lock)
Page 21
DEF00009341
KS-DEF-0000706
Linux 1.3.42 - route.c
sleep_on(&rt_wait);
ip_rt_fast_lock();
fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
ip_rt_unlock();
wake_up(&rt_wait);
}
void ip_rt_flush(struct device *dev)
{
while (ip_rt_lock)
sleep_on(&rt_wait);
ip_rt_fast_lock();
fib_flush_1(dev);
ip_rt_unlock();
wake_up(&rt_wait);
}
/*
Called by ICMP module.
*/
void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
{
struct rt_req * rtr;
struct rtable * rt;
rt = ip_rt_route(dst, 0);
if (!rt)
return;
if (rt->rt_gateway != src ||
rt->rt_dev != dev ||
((gw^dev->pa_addr)&dev->pa_mask) ||
ip_chk_addr(gw))
{
ip_rt_put(rt);
return;
}
ip_rt_put(rt);
ip_rt_fast_lock();
if (ip_rt_lock == 1)
{
rt_redirect_1(dst, gw, dev);
ip_rt_unlock();
return;
}
rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
if (rtr)
{
rtr->dst = dst;
rtr->gw = gw;
rtr->dev = dev;
rt_req_enqueue(&rt_backlog, rtr);
ip_rt_bh_mask |= RT_BH_REDIRECT;
}
ip_rt_unlock();
}
static __inline__ void rt_garbage_collect(void)
{
Page 22
DEF00009342
KS-DEF-0000707
Linux 1.3.42 - route.c
if (ip_rt_lock == 1)
{
rt_garbage_collect_1();
return;
}
ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
}
static void rt_cache_add(unsigned hash, struct rtable * rth)
{
unsigned long
flags;
struct rtable
**rthp;
__u32
daddr = rth->rt_dst;
unsigned long
now = jiffies;
#if RT_CACHE_DEBUG >= 2
if (ip_rt_lock != 1)
{
printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
return;
}
#endif
save_flags(flags);
if (rth->rt_dev->header_cache_bind)
{
struct rtable * rtg = rth;
if (rth->rt_gateway != daddr)
{
ip_rt_fast_unlock();
rtg = ip_rt_route(rth->rt_gateway, 0);
ip_rt_fast_lock();
}
if (rtg)
{
if (rtg == rth)
rtg->rt_dev->header_cache_bind(&rtg->rt_hh,
rtg->rt_dev, ETH_P_IP, rtg->rt_dst);
else
{
if (rtg->rt_hh)
ATOMIC_INCR(&rtg->rt_hh->hh_refcnt);
rth->rt_hh = rtg->rt_hh;
ip_rt_put(rtg);
}
}
}
if (rt_cache_size >= RT_CACHE_SIZE_MAX)
rt_garbage_collect();
cli();
rth->rt_next = ip_rt_hash_table[hash];
#if RT_CACHE_DEBUG >= 2
if (rth->rt_next)
{
struct rtable * trth;
printk("rt_cache @%02x: %08x", hash, daddr);
for (trth=rth->rt_next; trth; trth=trth->rt_next)
printk(" . %08x", trth->rt_dst);
Page 23
DEF00009343
KS-DEF-0000708
Linux 1.3.42 - route.c
printk("\n");
}
#endif
ip_rt_hash_table[hash] = rth;
rthp = &rth->rt_next;
sti();
rt_cache_size++;
/*
* Cleanup duplicate (and aged off) entries.
*/
while ((rth = *rthp) != NULL)
{
cli();
if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
|| rth->rt_dst == daddr)
{
*rthp = rth->rt_next;
rt_cache_size--;
sti();
#if RT_CACHE_DEBUG >= 2
printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
#endif
rt_free(rth);
continue;
}
sti();
rthp = &rth->rt_next;
}
restore_flags(flags);
}
/*
RT should be already locked.
We could improve this by keeping a chain of say 32 struct rtable's
last freed for fast recycling.
*/
struct rtable * ip_rt_slow_route (__u32 daddr, int local)
{
unsigned hash = ip_rt_hash_code(daddr)^local;
struct rtable * rth;
struct fib_node * f;
struct fib_info * fi;
__u32 saddr;
#if RT_CACHE_DEBUG >= 2
printk("rt_cache miss @%08x\n", daddr);
#endif
rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
if (!rth)
{
ip_rt_unlock();
return NULL;
}
if (local)
f = fib_lookup_local(daddr);
Page 24
DEF00009344
KS-DEF-0000709
Linux 1.3.42 - route.c
else
f = fib_lookup (daddr);
if (f)
{
fi = f->fib_info;
f->fib_use++;
}
if (!f || (fi->fib_flags & RTF_REJECT))
{
#if RT_CACHE_DEBUG >= 2
printk("rt_route failed @%08x\n", daddr);
#endif
ip_rt_unlock();
kfree_s(rth, sizeof(struct rtable));
return NULL;
}
saddr = fi->fib_dev->pa_addr;
if (daddr == fi->fib_dev->pa_addr)
{
f->fib_use--;
if ((f = fib_loopback) != NULL)
{
f->fib_use++;
fi = f->fib_info;
}
}
if (!f)
{
ip_rt_unlock();
kfree_s(rth, sizeof(struct rtable));
return NULL;
}
rth->rt_dst
= daddr;
rth->rt_src
= saddr;
rth->rt_lastuse = jiffies;
rth->rt_refcnt = 1;
rth->rt_use
= 1;
rth->rt_next
= NULL;
rth->rt_hh
= NULL;
rth->rt_gateway = fi->fib_gateway;
rth->rt_dev
= fi->fib_dev;
rth->rt_mtu
= fi->fib_mtu;
rth->rt_window = fi->fib_window;
rth->rt_irtt
= fi->fib_irtt;
rth->rt_tos
= f->fib_tos;
rth->rt_flags
= fi->fib_flags | RTF_HOST;
if (local)
rth->rt_flags
|= RTF_LOCAL;
if (!(rth->rt_flags & RTF_GATEWAY))
rth->rt_gateway = rth->rt_dst;
if (ip_rt_lock == 1)
rt_cache_add(hash, rth);
else
{
rt_free(rth);
Page 25
DEF00009345
KS-DEF-0000710
Linux 1.3.42 - route.c
#if RT_CACHE_DEBUG >= 1
printk("rt_cache: route to %08x was born dead\n", daddr);
#endif
}
ip_rt_unlock();
return rth;
}
void ip_rt_put(struct rtable * rt)
{
if (rt)
ATOMIC_DECR(&rt->rt_refcnt);
}
struct rtable * ip_rt_route(__u32 daddr, int local)
{
struct rtable * rth;
ip_rt_fast_lock();
for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth;
rth=rth->rt_next)
{
if (rth->rt_dst == daddr)
{
rth->rt_lastuse = jiffies;
ATOMIC_INCR(&rth->rt_use);
ATOMIC_INCR(&rth->rt_refcnt);
ip_rt_unlock();
return rth;
}
}
return ip_rt_slow_route (daddr, local);
}
/*
*
*
*/
Process a route add request from the user, or from a kernel
task.
int ip_rt_new(struct rtentry *r)
{
int err;
char * devname;
struct device * dev = NULL;
unsigned long flags;
__u32 daddr, mask, gw;
short metric;
/*
*
*/
If a device is specified find it.
if ((devname = r->rt_dev) != NULL)
{
err = getname(devname, &devname);
if (err)
return err;
dev = dev_get(devname);
putname(devname);
if (!dev)
Page 26
DEF00009346
KS-DEF-0000711
Linux 1.3.42 - route.c
return -ENODEV;
}
/*
*
*/
If the device isn't INET, don't allow it
if (r->rt_dst.sa_family != AF_INET)
return -EAFNOSUPPORT;
/*
*
*
*/
Make local copies of the important bits
We decrement the metric by one for BSD compatibility.
flags = r->rt_flags;
daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
mask = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
gw
= (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
/*
*
*
technique
*
*/
BSD emulation: Permits route add someroute gw one-of-my-addresses
to indicate which iface. Not as clean as the nice Linux dev
but people keep using it...
(and gated likes it ;))
if (!dev && (flags & RTF_GATEWAY))
{
struct device *dev2;
for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next)
{
if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw)
{
flags &= ~RTF_GATEWAY;
dev = dev2;
break;
}
}
}
/*
*
*/
Ignore faulty masks
if (bad_mask(mask, daddr))
mask=0;
/*
*
*/
Set the mask to nothing for host routes.
if (flags & RTF_HOST)
mask = 0xffffffff;
else if (mask && r->rt_genmask.sa_family != AF_INET)
return -EAFNOSUPPORT;
/*
*
*/
You can only gateway IP via IP..
if (flags & RTF_GATEWAY)
Page 27
DEF00009347
KS-DEF-0000712
Linux 1.3.42 - route.c
{
if (r->rt_gateway.sa_family != AF_INET)
return -EAFNOSUPPORT;
if (!dev)
dev = get_gw_dev(gw);
}
else if (!dev)
dev = ip_dev_check(daddr);
/*
*
*/
Unknown device.
if (dev == NULL)
return -ENETUNREACH;
/*
*
*/
Add the route
rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt,
metric);
return 0;
}
/*
*
*/
Remove a route, as requested by the user.
static int rt_kill(struct rtentry *r)
{
struct sockaddr_in *trg;
struct sockaddr_in *msk;
struct sockaddr_in *gtw;
char *devname;
int err;
struct device * dev = NULL;
trg = (struct sockaddr_in *) &r->rt_dst;
msk = (struct sockaddr_in *) &r->rt_genmask;
gtw = (struct sockaddr_in *) &r->rt_gateway;
if ((devname = r->rt_dev) != NULL)
{
err = getname(devname, &devname);
if (err)
return err;
dev = dev_get(devname);
putname(devname);
if (!dev)
return -ENODEV;
}
/*
* metric can become negative here if it wasn't filled in
* but that's a fortunate accident; we really use that in rt_del.
*/
err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev,
(__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
return err;
}
/*
*
Handle IP routing ioctl calls. These are used to manipulate the routing
Page 28
DEF00009348
KS-DEF-0000713
Linux 1.3.42 - route.c
tables
*/
int ip_rt_ioctl(unsigned int cmd, void *arg)
{
int err;
struct rtentry rt;
switch(cmd)
{
case SIOCADDRT:
/* Add a route */
case SIOCDELRT:
/* Delete a route */
if (!suser())
return -EPERM;
err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
if (err)
return err;
memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
return (cmd == SIOCDELRT) ? rt_kill(&rt) : ip_rt_new(&rt);
}
return -EINVAL;
}
void ip_rt_advice(struct rtable **rp, int advice)
{
/* Thanks! */
return;
}
Page 29
DEF00009349
KS-DEF-0000714
DEF00009350
DEF00009351
DEF00009352
DEF00009353
DEF00009354
DEF00009355
DEF00009356
DEF00009357
DEF00009358
DEF00009359
DEF00009360
DEF00009361
DEF00009362
DEF00009363
DEF00009364
DEF00009365
DEF00009366
DEF00009367
DEF00009368
DEF00009369
DEF00009370
DEF00009371
DEF00009372
DEF00009373
DEF00009374
DEF00009375
DEF00009376
DEF00009377
DEF00009378
DEF00009379
DEF00009380
DEF00009381
DEF00009382
DEF00009383
DEF00009384
DEF00009385
DEF00009386
DEF00009387
DEF00009388
DEF00009389
DEF00009390
DEF00009391
DEF00009392
DEF00009393
DEF00009394
DEF00009395
DEF00009396
DEF00009397
DEF00009398
DEF00009399
DEF00009400
DEF00009401
DEF00009402
DEF00009403
DEF00009404
DEF00009405
DEF00009406
DEF00009407
DEF00009408
DEF00009409
DEF00009410
DEF00009411
DEF00009412
DEF00009413
DEF00009414
DEF00009415
1/*
2 * INET
An implementation of the TCP/IP protocol suite for the LINUX
3*
operating system. INET is implemented using the BSD Socket
4*
interface as the means of communication with the user level.
5*
6*
ROUTE - implementation of the IP router.
7*
8 * Version: @(#)route.c 1.0.14 05/31/93
9*
10 * Authors: Ross Biro,
11 *
Fred N. van Kempen,
12 *
Alan Cox,
13 *
Linus Torvalds,
14 *
15 * Fixes:
16 *
Alan Cox
:
Verify area fixes.
17 *
Alan Cox
:
cli() protects routing changes
18 *
Rui Oliveira :
ICMP routing table updates
19 *
(rco@di.uminho.pt)
Routing table insertion and update
20 *
Linus Torvalds :
Rewrote bits to be sensible
21 *
Alan Cox
:
Added BSD route gw semantics
22 *
Alan Cox
:
Super /proc >4K
23 *
Alan Cox
:
MTU in route table
24 *
Alan Cox
:
MSS actually. Also added the window
25 *
clamper.
26 *
Sam Lantinga :
Fixed route matching in rt_del()
27 *
Alan Cox
:
Routing cache support.
28 *
Alan Cox
:
Removed compatibility cruft.
29 *
Alan Cox
:
RTF_REJECT support.
30 *
Alan Cox
:
TCP irtt support.
31 *
Jonathan Naylor :
Added Metric support.
32 *
Miquel van Smoorenburg :
BSD API fixes.
33 *
Miquel van Smoorenburg :
Metrics.
34 *
Alan Cox
:
Use __u32 properly
35 *
Alan Cox
:
Aligned routing errors more closely with BSD
36 *
our system is still very different.
37 *
Alan Cox
:
Faster /proc handling
38 *
Alexey Kuznetsov
:
Massive rework to support tree based routing,
39 *
routing caches and better behaviour.
40 *
41 *
Olaf Erb
:
irtt wasn't being copied right.
42 *
Bjorn Ekwall :
Kerneld route support.
43 *
Alan Cox
:
Multicast fixed (I hope)
44 *
Pavel Krauz :
Limited broadcast fixed
45 *
46 *
This program is free software; you can redistribute it and/or
47 *
modify it under the terms of the GNU General Public License
48 *
as published by the Free Software Foundation; either version
49 *
2 of the License, or (at your option) any later version.
50 */
51
DEF00009416
DEF00008567
52#include
53#include
54#include
55#include
56#include
57#include
58#include
59#include
60#include
61#include
62#include
63#include
64#include
65#include
66#include
67#include
68#include
69#include
70#include
71#include
72#include
73#include
74#include
75#include
76#ifdef CONFIG_KERNELD
77#include
78#endif
79
80/*
81 * Forwarding Information Base definitions.
82 */
83
84struct fib_node
85{
86
struct fib_node
*fib_next;
87
__u32
fib_dst;
88
unsigned long
fib_use;
89
struct fib_info
*fib_info;
90
short
fib_metric;
91
unsigned char
fib_tos;
92};
93
94/*
95 * This structure contains data shared by many of routes.
96 */
97
98struct fib_info
99{
100
struct fib_info
*fib_next;
101
struct fib_info
*fib_prev;
102
__u32
fib_gateway;
DEF00009417
DEF00008568
103
struct device
*fib_dev;
104
int
fib_refcnt;
105
unsigned long
fib_window;
106
unsigned short
fib_flags;
107
unsigned short
fib_mtu;
108
unsigned short
fib_irtt;
109};
110
111struct fib_zone
112{
113
struct fib_zone *fz_next;
114
struct fib_node **fz_hash_table;
115
struct fib_node *fz_list;
116
int
fz_nent;
117
int
fz_logmask;
118
__u32
fz_mask;
119};
120
121static struct fib_zone *fib_zones[33];
122static struct fib_zone *fib_zone_list;
123static struct fib_node *fib_loopback = NULL;
124static struct fib_info *fib_info_list;
125
126/*
127 * Backlogging.
128 */
129
130#define RT_BH_REDIRECT
0
131#define RT_BH_GARBAGE_COLLECT 1
132#define RT_BH_FREE
2
133
134struct rt_req
135{
136
struct rt_req * rtr_next;
137
struct device *dev;
138
__u32 dst;
139
__u32 gw;
140
unsigned char tos;
141};
142
143int
ip_rt_lock;
144unsigned
ip_rt_bh_mask;
145static struct rt_req *rt_backlog;
146
147/*
148 * Route cache.
149 */
150
151struct rtable
*ip_rt_hash_table[RT_HASH_DIVISOR];
152static int
rt_cache_size;
153static struct rtable *rt_free_queue;
DEF00009418
DEF00008569
154struct wait_queue
*rt_wait;
155
156static void rt_kick_backlog(void);
157static void rt_cache_add(unsigned hash, struct rtable * rth);
158static void rt_cache_flush(void);
159static void rt_garbage_collect_1(void);
160
161/*
162 * Evaluate mask length.
163 */
164
165static __inline__ int rt_logmask(__u32 mask)
166{
167
if (!(mask = ntohl(mask)))
168
return 32;
169
return ffz(~mask);
170}
171
172/*
173 * Create mask from length.
174 */
175
176static __inline__ __u32 rt_mask(int logmask)
177{
178
if (logmask >= 32)
179
return 0;
180
return htonl(~((1<>logmask);
186}
187
188/*
189 * Free FIB node.
190 */
191
192static void fib_free_node(struct fib_node * f)
193{
194
struct fib_info * fi = f->fib_info;
195
if (!--fi->fib_refcnt)
196
{
197#if RT_CACHE_DEBUG >= 2
198
printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev->name);
199#endif
200
if (fi->fib_next)
201
fi->fib_next->fib_prev = fi->fib_prev;
202
if (fi->fib_prev)
203
fi->fib_prev->fib_next = fi->fib_next;
204
if (fi == fib_info_list)
DEF00009419
DEF00008570
205
fib_info_list = fi->fib_next;
206
}
207
kfree_s(f, sizeof(struct fib_node));
208}
209
210/*
211 * Find gateway route by address.
212 */
213
214static struct fib_node * fib_lookup_gateway(__u32 dst)
215{
216
struct fib_zone * fz;
217
struct fib_node * f;
218
219
for (fz = fib_zone_list; fz; fz = fz->fz_next)
220
{
221
if (fz->fz_hash_table)
222
f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
223
else
224
f = fz->fz_list;
225
226
for ( ; f; f = f->fib_next)
227
{
228
if ((dst ^ f->fib_dst) & fz->fz_mask)
229
continue;
230
if (f->fib_info->fib_flags & RTF_GATEWAY)
231
return NULL;
232
return f;
233
}
234
}
235
return NULL;
236}
237
238/*
239 * Find local route by address.
240 * FIXME: I use "longest match" principle. If destination
241 *
has some non-local route, I'll not search shorter matches.
242 *
It's possible, I'm wrong, but I wanted to prevent following
243 *
situation:
244 *
route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
245 *
route add 193.233.7.0 netmask 255.255.255.0 eth1
246 *
(Two ethernets connected by serial line, one is small and other is large)
247 *
Host 193.233.7.129 is locally unreachable,
248 *
but old (<=1.3.37) code will send packets destined for it to eth1.
249 *
250 */
251
252static struct fib_node * fib_lookup_local(__u32 dst)
253{
254
struct fib_zone * fz;
255
struct fib_node * f;
DEF00009420
DEF00008571
256
257
for (fz = fib_zone_list; fz; fz = fz->fz_next)
258
{
259
int longest_match_found = 0;
260
261
if (fz->fz_hash_table)
262
f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
263
else
264
f = fz->fz_list;
265
266
for ( ; f; f = f->fib_next)
267
{
268
if ((dst ^ f->fib_dst) & fz->fz_mask)
269
continue;
270
if (!(f->fib_info->fib_flags & RTF_GATEWAY))
271
return f;
272
longest_match_found = 1;
273
}
274
if (longest_match_found)
275
return NULL;
276
}
277
return NULL;
278}
279
280/*
281 * Main lookup routine.
282 *
IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible
283 *
by user. It doesn't route non-CIDR broadcasts by default.
284 *
285 *
F.e.
286 *
ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast 193.233.7.255
287 *
is valid, but if you really are not able (not allowed, do not want) to
288 *
use CIDR compliant broadcast 193.233.7.127, you should add host route:
289 *
route add -host 193.233.7.255 eth0
290 */
291
292static struct fib_node * fib_lookup(__u32 dst)
293{
294
struct fib_zone * fz;
295
struct fib_node * f;
296
297
for (fz = fib_zone_list; fz; fz = fz->fz_next)
298
{
299
if (fz->fz_hash_table)
300
f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
301
else
302
f = fz->fz_list;
303
304
for ( ; f; f = f->fib_next)
305
{
306
if ((dst ^ f->fib_dst) & fz->fz_mask)
DEF00009421
DEF00008572
307
continue;
308
return f;
309
}
310
}
311
return NULL;
312}
313
314static __inline__ struct device * get_gw_dev(__u32 gw)
315{
316
struct fib_node * f;
317
f = fib_lookup_gateway(gw);
318
if (f)
319
return f->fib_info->fib_dev;
320
return NULL;
321}
322
323/*
324 *
Check if a mask is acceptable.
325 */
326
327static inline int bad_mask(__u32 mask, __u32 addr)
328{
329
if (addr & (mask = ~mask))
330
return 1;
331
mask = ntohl(mask);
332
if (mask & (mask+1))
333
return 1;
334
return 0;
335}
336
337
338static int fib_del_list(struct fib_node **fp, __u32 dst,
339
struct device * dev, __u32 gtw, short flags, short metric, __u32 mask)
340{
341
struct fib_node *f;
342
int found=0;
343
344
while((f = *fp) != NULL)
345
{
346
struct fib_info * fi = f->fib_info;
347
348
/*
349
*
Make sure the destination and netmask match.
350
*
metric, gateway and device are also checked
351
*
if they were specified.
352
*/
353
if (f->fib_dst != dst ||
354
(gtw && fi->fib_gateway != gtw) ||
355
(metric >= 0 && f->fib_metric != metric) ||
356
(dev && fi->fib_dev != dev) )
357
{
DEF00009422
DEF00008573
358
fp = &f->fib_next;
359
continue;
360
}
361
cli();
362
*fp = f->fib_next;
363
if (fib_loopback == f)
364
fib_loopback = NULL;
365
sti();
366
ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi->fib_dev->name);
367
fib_free_node(f);
368
found++;
369
}
370
return found;
371}
372
373static __inline__ int fib_del_1(__u32 dst, __u32 mask,
374
struct device * dev, __u32 gtw, short flags, short metric)
375{
376
struct fib_node **fp;
377
struct fib_zone *fz;
378
int found=0;
379
380
if (!mask)
381
{
382
for (fz=fib_zone_list; fz; fz = fz->fz_next)
383
{
384
int tmp;
385
if (fz->fz_hash_table)
386
fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
387
else
388
fp = &fz->fz_list;
389
390
tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
391
fz->fz_nent -= tmp;
392
found += tmp;
393
}
394
}
395
else
396
{
397
if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
398
{
399
if (fz->fz_hash_table)
400
fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
401
else
402
fp = &fz->fz_list;
403
404
found = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
405
fz->fz_nent -= found;
406
}
407
}
408
DEF00009423
DEF00008574
409
if (found)
410
{
411
rt_cache_flush();
412
return 0;
413
}
414
return -ESRCH;
415}
416
417
418static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
419
unsigned short flags, unsigned short mss,
420
unsigned long window, unsigned short irtt)
421{
422
struct fib_info * fi;
423
424
if (!(flags & RTF_MSS))
425
{
426
mss = dev->mtu;
427#ifdef CONFIG_NO_PATH_MTU_DISCOVERY
428
/*
429
*
If MTU was not specified, use default.
430
*
If you want to increase MTU for some net (local subnet)
431
*
use "route add .... mss xxx".
432
*
433
*
The MTU isn't currently always used and computed as it
434
*
should be as far as I can tell. [Still verifying this is right]
435
*/
436
if ((flags & RTF_GATEWAY) && mss > 576)
437
mss = 576;
438#endif
439
}
440
if (!(flags & RTF_WINDOW))
441
window = 0;
442
if (!(flags & RTF_IRTT))
443
irtt = 0;
444
445
for (fi=fib_info_list; fi; fi = fi->fib_next)
446
{
447
if (fi->fib_gateway != gw ||
448
fi->fib_dev != dev ||
449
fi->fib_flags != flags ||
450
fi->fib_mtu != mss ||
451
fi->fib_window != window ||
452
fi->fib_irtt != irtt)
453
continue;
454
fi->fib_refcnt++;
455#if RT_CACHE_DEBUG >= 2
456
printk("fib_create_info: fi %08x/%s is duplicate\n", fi->fib_gateway, fi->fib_dev->name);
457#endif
458
return fi;
459
}
DEF00009424
DEF00008575
460
fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
461
if (!fi)
462
return NULL;
463
memset(fi, 0, sizeof(struct fib_info));
464
fi->fib_flags = flags;
465
fi->fib_dev = dev;
466
fi->fib_gateway = gw;
467
fi->fib_mtu = mss;
468
fi->fib_window = window;
469
fi->fib_refcnt++;
470
fi->fib_next = fib_info_list;
471
fi->fib_prev = NULL;
472
fi->fib_irtt = irtt;
473
if (fib_info_list)
474
fib_info_list->fib_prev = fi;
475
fib_info_list = fi;
476#if RT_CACHE_DEBUG >= 2
477
printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi->fib_dev->name);
478#endif
479
return fi;
480}
481
482
483static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
484
__u32 gw, struct device *dev, unsigned short mss,
485
unsigned long window, unsigned short irtt, short metric)
486{
487
struct fib_node *f, *f1;
488
struct fib_node **fp;
489
struct fib_node **dup_fp = NULL;
490
struct fib_zone * fz;
491
struct fib_info * fi;
492
int logmask;
493
494
/*
495
* Allocate an entry and fill it in.
496
*/
497
498
f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
499
if (f == NULL)
500
return;
501
502
memset(f, 0, sizeof(struct fib_node));
503
f->fib_dst = dst;
504
f->fib_metric = metric;
505
f->fib_tos = 0;
506
507
if ((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
508
{
509
kfree_s(f, sizeof(struct fib_node));
510
return;
DEF00009425
DEF00008576
511
}
512
f->fib_info = fi;
513
514
logmask = rt_logmask(mask);
515
fz = fib_zones[logmask];
516
517
518
if (!fz)
519
{
520
int i;
521
fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
522
if (!fz)
523
{
524
fib_free_node(f);
525
return;
526
}
527
memset(fz, 0, sizeof(struct fib_zone));
528
fz->fz_logmask = logmask;
529
fz->fz_mask = mask;
530
for (i=logmask-1; i>=0; i--)
531
if (fib_zones[i])
532
break;
533
cli();
534
if (i<0)
535
{
536
fz->fz_next = fib_zone_list;
537
fib_zone_list = fz;
538
}
539
else
540
{
541
fz->fz_next = fib_zones[i]->fz_next;
542
fib_zones[i]->fz_next = fz;
543
}
544
fib_zones[logmask] = fz;
545
sti();
546
}
547
548
/*
549
* If zone overgrows RTZ_HASHING_LIMIT, create hash table.
550
*/
551
552
if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32)
553
{
554
struct fib_node ** ht;
555#if RT_CACHE_DEBUG >= 2
556
printk("fib_add_1: hashing for zone %d started\n", logmask);
557#endif
558
ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL);
559
560
if (ht)
561
{
DEF00009426
DEF00008577
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
cli();
f1 = fz->fz_list;
while (f1)
{
struct fib_node * next;
unsigned hash = fz_hash_code(f1->fib_dst, logmask);
next = f1->fib_next;
f1->fib_next = ht[hash];
ht[hash] = f1;
f1 = next;
}
fz->fz_list = NULL;
fz->fz_hash_table = ht;
sti();
}
}
if (fz->fz_hash_table)
fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
else
fp = &fz->fz_list;
/*
* Scan list to find the first route with the same destination
*/
while ((f1 = *fp) != NULL)
{
if (f1->fib_dst == dst)
break;
fp = &f1->fib_next;
}
/*
* Find route with the same destination and less (or equal) metric.
*/
while ((f1 = *fp) != NULL && f1->fib_dst == dst)
{
if (f1->fib_metric >= metric)
break;
/*
*
Record route with the same destination and gateway,
*
but less metric. We'll delete it
*
after instantiation of new route.
*/
if (f1->fib_info->fib_gateway == gw &&
(gw || f1->fib_info->fib_dev == dev))
dup_fp = fp;
fp = &f1->fib_next;
}
DEF00009427
DEF00008578
613
/*
614
* Is it already present?
615
*/
616
617
if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
618
{
619
fib_free_node(f);
620
return;
621
}
622
623
/*
624
* Insert new entry to the list.
625
*/
626
627
cli();
628
f->fib_next = f1;
629
*fp = f;
630
if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
631
fib_loopback = f;
632
sti();
633
fz->fz_nent++;
634
ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi->fib_dev->name);
635
636
/*
637
* Delete route with the same destination and gateway.
638
* Note that we should have at most one such route.
639
*/
640
if (dup_fp)
641
fp = dup_fp;
642
else
643
fp = &f->fib_next;
644
645
while ((f1 = *fp) != NULL && f1->fib_dst == dst)
646
{
647
if (f1->fib_info->fib_gateway == gw &&
648
(gw || f1->fib_info->fib_dev == dev))
649
{
650
cli();
651
*fp = f1->fib_next;
652
if (fib_loopback == f1)
653
fib_loopback = NULL;
654
sti();
655
ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric, f1->fib_info>fib_dev->name);
656
fib_free_node(f1);
657
fz->fz_nent--;
658
break;
659
}
660
fp = &f1->fib_next;
661
}
662
rt_cache_flush();
DEF00009428
DEF00008579
663
return;
664}
665
666static int rt_flush_list(struct fib_node ** fp, struct device *dev)
667{
668
int found = 0;
669
struct fib_node *f;
670
671
while ((f = *fp) != NULL) {
672/*
673 *
"Magic" device route is allowed to point to loopback,
674 *
discard it too.
675 */
676
if (f->fib_info->fib_dev != dev &&
677
(f->fib_info->fib_dev != &loopback_dev || f->fib_dst != dev->pa_addr)) {
678
fp = &f->fib_next;
679
continue;
680
}
681
cli();
682
*fp = f->fib_next;
683
if (fib_loopback == f)
684
fib_loopback = NULL;
685
sti();
686
fib_free_node(f);
687
found++;
688
}
689
return found;
690}
691
692static __inline__ void fib_flush_1(struct device *dev)
693{
694
struct fib_zone *fz;
695
int found = 0;
696
697
for (fz = fib_zone_list; fz; fz = fz->fz_next)
698
{
699
if (fz->fz_hash_table)
700
{
701
int i;
702
int tmp = 0;
703
for (i=0; ifz_hash_table[i], dev);
705
fz->fz_nent -= tmp;
706
found += tmp;
707
}
708
else
709
{
710
int tmp;
711
tmp = rt_flush_list(&fz->fz_list, dev);
712
fz->fz_nent -= tmp;
713
found += tmp;
DEF00009429
DEF00008580
714
}
715
}
716
717
if (found)
718
rt_cache_flush();
719}
720
721
722/*
723 *
Called from the PROCfs module. This outputs /proc/net/route.
724 *
725 *
We preserve the old format but pad the buffers out. This means that
726 *
we can spin over the other entries as we read them. Remember the
727 *
gated BGP4 code could need to read 60,000+ routes on occasion (that's
728 *
about 7Mb of data). To do that ok we will need to also cache the
729 *
last route we got to (reads will generally be following on from
730 *
one another without gaps).
731 */
732
733int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
734{
735
struct fib_zone *fz;
736
struct fib_node *f;
737
int len=0;
738
off_t pos=0;
739
char temp[129];
740
int i;
741
742
pos = 128;
743
744
if (offset<128)
745
{
746
sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway
\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
747
len = 128;
748
}
749
750
while (ip_rt_lock)
751
sleep_on(&rt_wait);
752
ip_rt_fast_lock();
753
754
for (fz=fib_zone_list; fz; fz = fz->fz_next)
755
{
756
int maxslot;
757
struct fib_node ** fp;
758
759
if (fz->fz_nent == 0)
760
continue;
761
762
if (pos + 128*fz->fz_nent <= offset)
763
{
DEF00009430
DEF00008581
764
pos += 128*fz->fz_nent;
765
len = 0;
766
continue;
767
}
768
769
if (fz->fz_hash_table)
770
{
771
maxslot = RTZ_HASH_DIVISOR;
772
fp
= fz->fz_hash_table;
773
}
774
else
775
{
776
maxslot = 1;
777
fp
= &fz->fz_list;
778
}
779
780
for (i=0; i < maxslot; i++, fp++)
781
{
782
783
for (f = *fp; f; f = f->fib_next)
784
{
785
struct fib_info * fi;
786
/*
787
*
Spin through entries until we are ready
788
*/
789
pos += 128;
790
791
if (pos <= offset)
792
{
793
len=0;
794
continue;
795
}
796
797
fi = f->fib_info;
798
sprintf(temp,
"%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
799
fi->fib_dev->name, (unsigned long)f->fib_dst, (unsigned long)fi>fib_gateway,
800
fi->fib_flags, 0, f->fib_use, f->fib_metric,
801
(unsigned long)fz->fz_mask, (int)fi->fib_mtu, fi->fib_window, (int)fi>fib_irtt);
802
sprintf(buffer+len,"%-127s\n",temp);
803
804
len += 128;
805
if (pos >= offset+length)
806
goto done;
807
}
808
}
809
}
810
811done:
DEF00009431
DEF00008582
812
ip_rt_unlock();
813
wake_up(&rt_wait);
814
815
*start = buffer+len-(pos-offset);
816
len = pos - offset;
817
if (len>length)
818
len = length;
819
return len;
820}
821
822int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
823{
824
int len=0;
825
off_t pos=0;
826
char temp[129];
827
struct rtable *r;
828
int i;
829
830
pos = 128;
831
832
if (offset<128)
833
{
834
sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway
\tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP");
835
len = 128;
836
}
837
838
839
while (ip_rt_lock)
840
sleep_on(&rt_wait);
841
ip_rt_fast_lock();
842
843
for (i = 0; irt_next)
846
{
847
/*
848
*
Spin through entries until we are ready
849
*/
850
pos += 128;
851
852
if (pos <= offset)
853
{
854
len = 0;
855
continue;
856
}
857
858
sprintf(temp,
"%s\t%08lX\t%08lX\t%02X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%d\t%1d",
859
r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
860
r->rt_flags, r->rt_refcnt, r->rt_use, 0,
DEF00009432
DEF00008583
861
(unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ?
r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0);
862
sprintf(buffer+len,"%-127s\n",temp);
863
len += 128;
864
if (pos >= offset+length)
865
goto done;
866
}
867
}
868
869done:
870
ip_rt_unlock();
871
wake_up(&rt_wait);
872
873
*start = buffer+len-(pos-offset);
874
len = pos-offset;
875
if (len>length)
876
len = length;
877
return len;
878}
879
880
881static void rt_free(struct rtable * rt)
882{
883
unsigned long flags;
884
885
save_flags(flags);
886
cli();
887
if (!rt->rt_refcnt)
888
{
889
struct hh_cache * hh = rt->rt_hh;
890
rt->rt_hh = NULL;
891
restore_flags(flags);
892
if (hh && atomic_dec_and_test(&hh->hh_refcnt))
893
kfree_s(hh, sizeof(struct hh_cache));
894
kfree_s(rt, sizeof(struct rt_table));
895
return;
896
}
897
rt->rt_next = rt_free_queue;
898
rt->rt_flags &= ~RTF_UP;
899
rt_free_queue = rt;
900
ip_rt_bh_mask |= RT_BH_FREE;
901#if RT_CACHE_DEBUG >= 2
902
printk("rt_free: %08x\n", rt->rt_dst);
903#endif
904
restore_flags(flags);
905}
906
907/*
908 * RT "bottom half" handlers. Called with masked interrupts.
909 */
910
DEF00009433
DEF00008584
911static __inline__ void rt_kick_free_queue(void)
912{
913
struct rtable *rt, **rtp;
914
915
rtp = &rt_free_queue;
916
917
while ((rt = *rtp) != NULL)
918
{
919
if (!rt->rt_refcnt)
920
{
921
struct hh_cache * hh = rt->rt_hh;
922#if RT_CACHE_DEBUG >= 2
923
__u32 daddr = rt->rt_dst;
924#endif
925
*rtp = rt->rt_next;
926
rt->rt_hh = NULL;
927
sti();
928
if (hh && atomic_dec_and_test(&hh->hh_refcnt))
929
kfree_s(hh, sizeof(struct hh_cache));
930
kfree_s(rt, sizeof(struct rt_table));
931#if RT_CACHE_DEBUG >= 2
932
printk("rt_kick_free_queue: %08x is free\n", daddr);
933#endif
934
cli();
935
continue;
936
}
937
rtp = &rt->rt_next;
938
}
939}
940
941void ip_rt_run_bh()
942{
943
unsigned long flags;
944
save_flags(flags);
945
cli();
946
if (ip_rt_bh_mask && !ip_rt_lock)
947
{
948
if (ip_rt_bh_mask & RT_BH_REDIRECT)
949
rt_kick_backlog();
950
951
if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
952
{
953
ip_rt_fast_lock();
954
ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
955
sti();
956
rt_garbage_collect_1();
957
cli();
958
ip_rt_fast_unlock();
959
}
960
961
if (ip_rt_bh_mask & RT_BH_FREE)
DEF00009434
DEF00008585
962
rt_kick_free_queue();
963
}
964
restore_flags(flags);
965}
966
967
968void ip_rt_check_expire()
969{
970
ip_rt_fast_lock();
971
if (ip_rt_lock == 1)
972
{
973
int i;
974
struct rtable *rth, **rthp;
975
unsigned long flags;
976
unsigned long now = jiffies;
977
978
save_flags(flags);
979
for (i=0; irt_next;
986
987
/*
988
* Cleanup aged off entries.
989
*/
990
991
cli();
992
if (!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
993
{
994
*rthp = rth_next;
995
sti();
996
rt_cache_size--;
997#if RT_CACHE_DEBUG >= 2
998
printk("rt_check_expire clean %02x@%08x\n", i, rth->rt_dst);
999#endif
1000
rt_free(rth);
1001
continue;
1002
}
1003
sti();
1004
1005
if (!rth_next)
1006
break;
1007
1008
/*
1009
* LRU ordering.
1010
*/
1011
DEF00009435
DEF00008586
1012
if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOLD < rth_next>rt_lastuse ||
1013
(rth->rt_lastuse < rth_next->rt_lastuse &&
1014
rth->rt_use < rth_next->rt_use))
1015
{
1016#if RT_CACHE_DEBUG >= 2
1017
printk("rt_check_expire bubbled %02x@%08x<->%08x\n", i, rth->rt_dst,
rth_next->rt_dst);
1018#endif
1019
cli();
1020
*rthp = rth_next;
1021
rth->rt_next = rth_next->rt_next;
1022
rth_next->rt_next = rth;
1023
sti();
1024
rthp = &rth_next->rt_next;
1025
continue;
1026
}
1027
rthp = &rth->rt_next;
1028
}
1029
}
1030
restore_flags(flags);
1031
rt_kick_free_queue();
1032
}
1033
ip_rt_unlock();
1034}
1035
1036static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
1037{
1038
struct rtable *rt;
1039
unsigned long hash = ip_rt_hash_code(dst);
1040
1041
if (gw == dev->pa_addr)
1042
return;
1043
if (dev != get_gw_dev(gw))
1044
return;
1045
rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1046
if (rt == NULL)
1047
return;
1048
memset(rt, 0, sizeof(struct rtable));
1049
rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY |
RTF_UP;
1050
rt->rt_dst = dst;
1051
rt->rt_dev = dev;
1052
rt->rt_gateway = gw;
1053
rt->rt_src = dev->pa_addr;
1054
rt->rt_mtu = dev->mtu;
1055#ifdef CONFIG_NO_PATH_MTU_DISCOVERY
1056
if (dev->mtu > 576)
1057
rt->rt_mtu = 576;
1058#endif
1059
rt->rt_lastuse = jiffies;
DEF00009436
DEF00008587
1060
rt->rt_refcnt = 1;
1061
rt_cache_add(hash, rt);
1062
ip_rt_put(rt);
1063
return;
1064}
1065
1066static void rt_cache_flush(void)
1067{
1068
int i;
1069
struct rtable * rth, * next;
1070
1071
for (i=0; irt_next;
1088
rt_cache_size--;
1089
nr++;
1090
rth->rt_next = NULL;
1091
rt_free(rth);
1092
}
1093#if RT_CACHE_DEBUG >= 2
1094
if (nr > 0)
1095
printk("rt_cache_flush: %d@%02x\n", nr, i);
1096#endif
1097
}
1098#if RT_CACHE_DEBUG >= 1
1099
if (rt_cache_size)
1100
{
1101
printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
1102
rt_cache_size = 0;
1103
}
1104#endif
1105}
1106
1107static void rt_garbage_collect_1(void)
1108{
1109
int i;
1110
unsigned expire = RT_CACHE_TIMEOUT>>1;
DEF00009437
DEF00008588
1111
struct rtable * rth, **rthp;
1112
unsigned long now = jiffies;
1113
1114
for (;;)
1115
{
1116
for (i=0; irt_next)
1121
{
1122
if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) > now)
1123
continue;
1124
rt_cache_size--;
1125
cli();
1126
*rthp=rth->rt_next;
1127
rth->rt_next = NULL;
1128
sti();
1129
rt_free(rth);
1130
break;
1131
}
1132
}
1133
if (rt_cache_size < RT_CACHE_SIZE_MAX)
1134
return;
1135
expire >>= 1;
1136
}
1137}
1138
1139static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
1140{
1141
unsigned long flags;
1142
struct rt_req * tail;
1143
1144
save_flags(flags);
1145
cli();
1146
tail = *q;
1147
if (!tail)
1148
rtr->rtr_next = rtr;
1149
else
1150
{
1151
rtr->rtr_next = tail->rtr_next;
1152
tail->rtr_next = rtr;
1153
}
1154
*q = rtr;
1155
restore_flags(flags);
1156
return;
1157}
1158
1159/*
1160 * Caller should mask interrupts.
1161 */
DEF00009438
DEF00008589
1162
1163static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
1164{
1165
struct rt_req * rtr;
1166
1167
if (*q)
1168
{
1169
rtr = (*q)->rtr_next;
1170
(*q)->rtr_next = rtr->rtr_next;
1171
if (rtr->rtr_next == rtr)
1172
*q = NULL;
1173
rtr->rtr_next = NULL;
1174
return rtr;
1175
}
1176
return NULL;
1177}
1178
1179/*
1180 Called with masked interrupts
1181 */
1182
1183static void rt_kick_backlog()
1184{
1185
if (!ip_rt_lock)
1186
{
1187
struct rt_req * rtr;
1188
1189
ip_rt_fast_lock();
1190
1191
while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
1192
{
1193
sti();
1194
rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
1195
kfree_s(rtr, sizeof(struct rt_req));
1196
cli();
1197
}
1198
1199
ip_rt_bh_mask &= ~RT_BH_REDIRECT;
1200
1201
ip_rt_fast_unlock();
1202
}
1203}
1204
1205/*
1206 * rt_{del|add|flush} called only from USER process. Waiting is OK.
1207 */
1208
1209static int rt_del(__u32 dst, __u32 mask,
1210
struct device * dev, __u32 gtw, short rt_flags, short metric)
1211{
1212
int retval;
DEF00009439
DEF00008590
1213
1214
while (ip_rt_lock)
1215
sleep_on(&rt_wait);
1216
ip_rt_fast_lock();
1217
retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
1218
ip_rt_unlock();
1219
wake_up(&rt_wait);
1220
return retval;
1221}
1222
1223static void rt_add(short flags, __u32 dst, __u32 mask,
1224
__u32 gw, struct device *dev, unsigned short mss,
1225
unsigned long window, unsigned short irtt, short metric)
1226{
1227
while (ip_rt_lock)
1228
sleep_on(&rt_wait);
1229
ip_rt_fast_lock();
1230
fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
1231
ip_rt_unlock();
1232
wake_up(&rt_wait);
1233}
1234
1235void ip_rt_flush(struct device *dev)
1236{
1237
while (ip_rt_lock)
1238
sleep_on(&rt_wait);
1239
ip_rt_fast_lock();
1240
fib_flush_1(dev);
1241
ip_rt_unlock();
1242
wake_up(&rt_wait);
1243}
1244
1245/*
1246 Called by ICMP module.
1247 */
1248
1249void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
1250{
1251
struct rt_req * rtr;
1252
struct rtable * rt;
1253
1254
rt = ip_rt_route(dst, 0);
1255
if (!rt)
1256
return;
1257
1258
if (rt->rt_gateway != src ||
1259
rt->rt_dev != dev ||
1260
((gw^dev->pa_addr)&dev->pa_mask) ||
1261
ip_chk_addr(gw))
1262
{
1263
ip_rt_put(rt);
DEF00009440
DEF00008591
1264
return;
1265
}
1266
ip_rt_put(rt);
1267
1268
ip_rt_fast_lock();
1269
if (ip_rt_lock == 1)
1270
{
1271
rt_redirect_1(dst, gw, dev);
1272
ip_rt_unlock();
1273
return;
1274
}
1275
1276
rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
1277
if (rtr)
1278
{
1279
rtr->dst = dst;
1280
rtr->gw = gw;
1281
rtr->dev = dev;
1282
rt_req_enqueue(&rt_backlog, rtr);
1283
ip_rt_bh_mask |= RT_BH_REDIRECT;
1284
}
1285
ip_rt_unlock();
1286}
1287
1288
1289static __inline__ void rt_garbage_collect(void)
1290{
1291
if (ip_rt_lock == 1)
1292
{
1293
rt_garbage_collect_1();
1294
return;
1295
}
1296
ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
1297}
1298
1299static void rt_cache_add(unsigned hash, struct rtable * rth)
1300{
1301
unsigned long flags;
1302
struct rtable **rthp;
1303
__u32
daddr = rth->rt_dst;
1304
unsigned long now = jiffies;
1305
1306#if RT_CACHE_DEBUG >= 2
1307
if (ip_rt_lock != 1)
1308
{
1309
printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
1310
return;
1311
}
1312#endif
1313
1314
save_flags(flags);
DEF00009441
DEF00008592
1315
1316
if (rth->rt_dev->header_cache_bind)
1317
{
1318
struct rtable * rtg = rth;
1319
1320
if (rth->rt_gateway != daddr)
1321
{
1322
ip_rt_fast_unlock();
1323
rtg = ip_rt_route(rth->rt_gateway, 0);
1324
ip_rt_fast_lock();
1325
}
1326
1327
if (rtg)
1328
{
1329
if (rtg == rth)
1330
rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg->rt_dev, ETH_P_IP, rtg>rt_dst);
1331
else
1332
{
1333
if (rtg->rt_hh)
1334
atomic_inc(&rtg->rt_hh->hh_refcnt);
1335
rth->rt_hh = rtg->rt_hh;
1336
ip_rt_put(rtg);
1337
}
1338
}
1339
}
1340
1341
if (rt_cache_size >= RT_CACHE_SIZE_MAX)
1342
rt_garbage_collect();
1343
1344
cli();
1345
rth->rt_next = ip_rt_hash_table[hash];
1346#if RT_CACHE_DEBUG >= 2
1347
if (rth->rt_next)
1348
{
1349
struct rtable * trth;
1350
printk("rt_cache @%02x: %08x", hash, daddr);
1351
for (trth=rth->rt_next; trth; trth=trth->rt_next)
1352
printk(" . %08x", trth->rt_dst);
1353
printk("\n");
1354
}
1355#endif
1356
ip_rt_hash_table[hash] = rth;
1357
rthp = &rth->rt_next;
1358
sti();
1359
rt_cache_size++;
1360
1361
/*
1362
* Cleanup duplicate (and aged off) entries.
1363
*/
1364
DEF00009442
DEF00008593
1365
while ((rth = *rthp) != NULL)
1366
{
1367
1368
cli();
1369
if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1370
|| rth->rt_dst == daddr)
1371
{
1372
*rthp = rth->rt_next;
1373
rt_cache_size--;
1374
sti();
1375#if RT_CACHE_DEBUG >= 2
1376
printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
1377#endif
1378
rt_free(rth);
1379
continue;
1380
}
1381
sti();
1382
rthp = &rth->rt_next;
1383
}
1384
restore_flags(flags);
1385}
1386
1387/*
1388 RT should be already locked.
1389
1390 We could improve this by keeping a chain of say 32 struct rtable's
1391 last freed for fast recycling.
1392
1393 */
1394
1395struct rtable * ip_rt_slow_route (__u32 daddr, int local)
1396{
1397
unsigned hash = ip_rt_hash_code(daddr)^local;
1398
struct rtable * rth;
1399
struct fib_node * f;
1400
struct fib_info * fi;
1401
__u32 saddr;
1402
1403#if RT_CACHE_DEBUG >= 2
1404
printk("rt_cache miss @%08x\n", daddr);
1405#endif
1406
1407
rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1408
if (!rth)
1409
{
1410
ip_rt_unlock();
1411
return NULL;
1412
}
1413
1414
if (local)
1415
f = fib_lookup_local(daddr);
DEF00009443
DEF00008594
1416
else
1417
f = fib_lookup (daddr);
1418
1419
if (f)
1420
{
1421
fi = f->fib_info;
1422
f->fib_use++;
1423
}
1424
1425
if (!f || (fi->fib_flags & RTF_REJECT))
1426
{
1427#ifdef CONFIG_KERNELD
1428
char wanted_route[20];
1429#endif
1430#if RT_CACHE_DEBUG >= 2
1431
printk("rt_route failed @%08x\n", daddr);
1432#endif
1433
ip_rt_unlock();
1434
kfree_s(rth, sizeof(struct rtable));
1435#ifdef CONFIG_KERNELD
1436
daddr=ntohl(daddr);
1437
sprintf(wanted_route, "%d.%d.%d.%d",
1438
(int)(daddr >> 24) & 0xff, (int)(daddr >> 16) & 0xff,
1439
(int)(daddr >> 8) & 0xff, (int)daddr & 0xff);
1440
kerneld_route(wanted_route); /* Dynamic route request */
1441#endif
1442
return NULL;
1443
}
1444
1445
saddr = fi->fib_dev->pa_addr;
1446
1447
if (daddr == fi->fib_dev->pa_addr)
1448
{
1449
f->fib_use--;
1450
if ((f = fib_loopback) != NULL)
1451
{
1452
f->fib_use++;
1453
fi = f->fib_info;
1454
}
1455
}
1456
1457
if (!f)
1458
{
1459
ip_rt_unlock();
1460
kfree_s(rth, sizeof(struct rtable));
1461
return NULL;
1462
}
1463
1464
rth->rt_dst = daddr;
1465
rth->rt_src = saddr;
1466
rth->rt_lastuse = jiffies;
DEF00009444
DEF00008595
1467
rth->rt_refcnt = 1;
1468
rth->rt_use = 1;
1469
rth->rt_next = NULL;
1470
rth->rt_hh
= NULL;
1471
rth->rt_gateway = fi->fib_gateway;
1472
rth->rt_dev = fi->fib_dev;
1473
rth->rt_mtu = fi->fib_mtu;
1474
rth->rt_window = fi->fib_window;
1475
rth->rt_irtt = fi->fib_irtt;
1476
rth->rt_tos = f->fib_tos;
1477
rth->rt_flags = fi->fib_flags | RTF_HOST;
1478
if (local)
1479
rth->rt_flags |= RTF_LOCAL;
1480
1481
if (!(rth->rt_flags & RTF_GATEWAY))
1482
rth->rt_gateway = rth->rt_dst;
1483
/*
1484
* Multicast or limited broadcast is never gatewayed.
1485
*/
1486
if (MULTICAST(daddr) || daddr == 0xFFFFFFFF)
1487
rth->rt_gateway = rth->rt_dst;
1488
1489
if (ip_rt_lock == 1)
1490
rt_cache_add(hash, rth);
1491
else
1492
{
1493
rt_free(rth);
1494#if RT_CACHE_DEBUG >= 1
1495
printk(KERN_DEBUG "rt_cache: route to %08x was born dead\n", daddr);
1496#endif
1497
}
1498
1499
ip_rt_unlock();
1500
return rth;
1501}
1502
1503void ip_rt_put(struct rtable * rt)
1504{
1505
if (rt)
1506
atomic_dec(&rt->rt_refcnt);
1507}
1508
1509struct rtable * ip_rt_route(__u32 daddr, int local)
1510{
1511
struct rtable * rth;
1512
1513
ip_rt_fast_lock();
1514
1515
for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth->rt_next)
1516
{
1517
if (rth->rt_dst == daddr)
DEF00009445
DEF00008596
1518
{
1519
rth->rt_lastuse = jiffies;
1520
atomic_inc(&rth->rt_use);
1521
atomic_inc(&rth->rt_refcnt);
1522
ip_rt_unlock();
1523
return rth;
1524
}
1525
}
1526
return ip_rt_slow_route (daddr, local);
1527}
1528
1529/*
1530 *
Process a route add request from the user, or from a kernel
1531 *
task.
1532 */
1533
1534int ip_rt_new(struct rtentry *r)
1535{
1536
int err;
1537
char * devname;
1538
struct device * dev = NULL;
1539
unsigned long flags;
1540
__u32 daddr, mask, gw;
1541
short metric;
1542
1543
/*
1544
* If a device is specified find it.
1545
*/
1546
1547
if ((devname = r->rt_dev) != NULL)
1548
{
1549
err = getname(devname, &devname);
1550
if (err)
1551
return err;
1552
dev = dev_get(devname);
1553
putname(devname);
1554
if (!dev)
1555
return -ENODEV;
1556
}
1557
1558
/*
1559
* If the device isn't INET, don't allow it
1560
*/
1561
1562
if (r->rt_dst.sa_family != AF_INET)
1563
return -EAFNOSUPPORT;
1564
1565
/*
1566
* Make local copies of the important bits
1567
* We decrement the metric by one for BSD compatibility.
1568
*/
DEF00009446
DEF00008597
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
flags = r->rt_flags;
daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
mask = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
gw = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
/*
*
*
*
*/
BSD emulation: Permits route add someroute gw one-of-my-addresses
to indicate which iface. Not as clean as the nice Linux dev technique
but people keep using it... (and gated likes it ;))
if (!dev && (flags & RTF_GATEWAY))
{
struct device *dev2;
for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next)
{
if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw)
{
flags &= ~RTF_GATEWAY;
dev = dev2;
break;
}
}
}
if (flags & RTF_HOST)
mask = 0xffffffff;
else if (mask && r->rt_genmask.sa_family != AF_INET)
return -EAFNOSUPPORT;
if (flags & RTF_GATEWAY)
{
if (r->rt_gateway.sa_family != AF_INET)
return -EAFNOSUPPORT;
/*
*
*
*/
Don't try to add a gateway we can't reach..
Tunnel devices are exempt from this rule.
if (!dev)
dev = get_gw_dev(gw);
else if (dev != get_gw_dev(gw) && dev->type != ARPHRD_TUNNEL)
return -EINVAL;
if (!dev)
return -ENETUNREACH;
}
else
{
DEF00009447
DEF00008598
1620
gw = 0;
1621
if (!dev)
1622
dev = ip_dev_bynet(daddr, mask);
1623
if (!dev)
1624
return -ENETUNREACH;
1625
if (!mask)
1626
{
1627
if (((daddr ^ dev->pa_addr) & dev->pa_mask) == 0)
1628
mask = dev->pa_mask;
1629
}
1630
}
1631
1632#ifndef CONFIG_IP_CLASSLESS
1633
if (!mask)
1634
mask = ip_get_mask(daddr);
1635#endif
1636
1637
if (bad_mask(mask, daddr))
1638
return -EINVAL;
1639
1640
/*
1641
* Add the route
1642
*/
1643
1644
rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
1645
return 0;
1646}
1647
1648
1649/*
1650 *
Remove a route, as requested by the user.
1651 */
1652
1653int ip_rt_kill(struct rtentry *r)
1654{
1655
struct sockaddr_in *trg;
1656
struct sockaddr_in *msk;
1657
struct sockaddr_in *gtw;
1658
char *devname;
1659
int err;
1660
struct device * dev = NULL;
1661
1662
trg = (struct sockaddr_in *) &r->rt_dst;
1663
msk = (struct sockaddr_in *) &r->rt_genmask;
1664
gtw = (struct sockaddr_in *) &r->rt_gateway;
1665
if ((devname = r->rt_dev) != NULL)
1666
{
1667
err = getname(devname, &devname);
1668
if (err)
1669
return err;
1670
dev = dev_get(devname);
DEF00009448
DEF00008599
1671
putname(devname);
1672
if (!dev)
1673
return -ENODEV;
1674
}
1675
/*
1676
* metric can become negative here if it wasn't filled in
1677
* but that's a fortunate accident; we really use that in rt_del.
1678
*/
1679
err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev,
1680
(__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
1681
return err;
1682}
1683
1684/*
1685 *
Handle IP routing ioctl calls. These are used to manipulate the routing tables
1686 */
1687
1688int ip_rt_ioctl(unsigned int cmd, void *arg)
1689{
1690
int err;
1691
struct rtentry rt;
1692
1693
switch(cmd)
1694
{
1695
case SIOCADDRT:
/* Add a route */
1696
case SIOCDELRT:
/* Delete a route */
1697
if (!suser())
1698
return -EPERM;
1699
err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
1700
if (err)
1701
return err;
1702
memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
1703
return (cmd == SIOCDELRT) ? ip_rt_kill(&rt) : ip_rt_new(&rt);
1704
}
1705
1706
return -EINVAL;
1707}
1708
1709void ip_rt_advice(struct rtable **rp, int advice)
1710{
1711
/* Thanks! */
1712
return;
1713}
1714
1715void ip_rt_update(int event, struct device *dev)
1716{
1717/*
1718 *
This causes too much grief to do now.
1719 */
1720#ifdef COMING_IN_2_1
1721
if (event == NETDEV_UP)
DEF00009449
DEF00008600
1722
rt_add(RTF_HOST|RTF_UP, dev->pa_addr, ~0, 0, dev, 0, 0, 0, 0);
1723
else if (event == NETDEV_DOWN)
1724
rt_del(dev->pa_addr, ~0, dev, 0, RTF_HOST|RTF_UP, 0);
1725#endif
1726}
DEF00009450
DEF00008601
DEF00009451
DEF00009452
DEF00009453
DEF00009454
Highly Confidential - Plaintiff's Outside Counsel Only
SOF0062017
Highly Confidential - Plaintiff's Outside Counsel Only
SOF0062018
Highly Confidential - Plaintiff's Outside Counsel Only
SOF0062019
From:
Sent:
To:
Cc:
Subject:
Alexey Kuznetsov [kuznet@ms2.inr.ac.ru]
Friday, December 03, 2010 5:58 AM
Absher, Alton
'kuznet@parallels.com'
Re: Linux route.c question
Hello!
> If you are willing to help us, we are willing to pay your standard consulting rate for
your time. Please let me know if you would be willing to speak with us. If so, please
provide me with (1) your hourly rate, and (2) the best times to talk with us for about
thirty minutes.
I have already spent time analyzing the patent 5893120 after Nathan Day of SoftLayer
Technologies (who is probably one of your clients now) contacted me about year ago.
Is this the same case?
So, probably I can help. 1. For free. 2. By e-mail.
FYI I am afraid I am not a correct person to contact. My analysis showed that code written
by me does not actually collide with forementioned patent, my code uses quite different
techniques.
But current linux kernel actually contains logic which could be considered as infringing
the patent: it was commited on Juanuary 2008 by Eric Dumazet .
(commit 29e75252da20f3ab9e132c68c9aed156b87beae6). Even though Eric wrote this piece, the
idea was floating for ages, I thought it was either mine or David Miller's, we did not
implement this earlier only because it was not considered enough important.
But unfortunately I could not find any references describing the idea before 1999, when
the patent was issued. So, I must say the position can be difficult to defend.
I believe you should seek for an expert in loopholes of patent rules, the algorithm is
still not direct replica of one described in the patent and expert could find a place to
stand.
Alexey
1
CONFIDENTIAL
KTS0000242
From:
Sent:
To:
Subject:
Alexey Kuznetsov [kuznet@ms2.inr.ac.ru]
Friday, December 10, 2010 6:38 AM
Absher, Alton
Re: Linux route.c question
Hello!
> Thank you for returning my email. Yes, this is the same case.
(10 minutes or less) telephone conversation first?
Can we have a very brief
Well, OK.
But, first, I need some confirmation of your identity. I am not utterly paranoid, so that
e-mail from any person at redhat.com would be enough.
Alexey
1
CONFIDENTIAL
KTS0000243
From:
Sent:
To:
Subject:
Absher, Alton
Wednesday, December 08, 2010 5:58 PM
'Alexey Kuznetsov'
RE: Linux route.c question
Hello Alexey,
Thank you for returning my email. Yes, this is the same case. Can we have a very brief
(10 minutes or less) telephone conversation first? Let me know a convenient time for you.
I recognize that we are in different time zones, but I am available to talk at any time
that is convenient for you.
Regards,
Alton
-----Original Message----From: Alexey Kuznetsov [mailto:kuznet@ms2.inr.ac.ru]
Sent: Friday, December 03, 2010 5:58 AM
To: Absher, Alton
Cc: 'kuznet@parallels.com'
Subject: Re: Linux route.c question
Hello!
> If you are willing to help us, we are willing to pay your standard consulting rate for
your time. Please let me know if you would be willing to speak with us. If so, please
provide me with (1) your hourly rate, and (2) the best times to talk with us for about
thirty minutes.
I have already spent time analyzing the patent 5893120 after Nathan Day of SoftLayer
Technologies (who is probably one of your clients now) contacted me about year ago.
Is this the same case?
So, probably I can help. 1. For free. 2. By e-mail.
FYI I am afraid I am not a correct person to contact. My analysis showed that code written
by me does not actually collide with forementioned patent, my code uses quite different
techniques.
But current linux kernel actually contains logic which could be considered as infringing
the patent: it was commited on Juanuary 2008 by Eric Dumazet .
(commit 29e75252da20f3ab9e132c68c9aed156b87beae6). Even though Eric wrote this piece, the
idea was floating for ages, I thought it was either mine or David Miller's, we did not
implement this earlier only because it was not considered enough important.
But unfortunately I could not find any references describing the idea before 1999, when
the patent was issued. So, I must say the position can be difficult to defend.
I believe you should seek for an expert in loopholes of patent rules, the algorithm is
still not direct replica of one described in the patent and expert could find a place to
stand.
Alexey
1
CONFIDENTIAL
KTS0000244
Page 1 of 1
From:
Absher, Alton
Sent:
Thursday, December 02, 2010 2:26 PM
To:
'kuznet@parallels.com'
Subject: Linux route.c question
Dear Mr. Kuznetsov,
I am a patent attorney representing Red Hat and several of Red Hat’s customers who have been
sued for patent infringement based on code that you contributed to the Linux
kernel. Specifically, they are alleging that the code that manages the Linux routing cache
infringes a patent.
If you are willing to help us, we are willing to pay your standard consulting rate for your time.
Please let me know if you would be willing to speak with us. If so, please provide me with (1)
your hourly rate, and (2) the best times to talk with us for about thirty minutes.
Regards,
Alton Absher
Alton Absher
Kilpatrick Stockton LLP
1001 West Fourth Street | Winston-Salem, NC 27101-2400
office 336 607 7307 | cell 336 926 0211 | fax 336 734 2755
aabsher@kilpatrickstockton.com | My Profile
CONFIDENTIAL
KTS0000245
From:
Sent:
To:
Subject:
Alexey Kuznetsov [kuznet@ms2.inr.ac.ru]
Monday, December 13, 2010 4:45 AM
Absher, Alton
Re: Linux route.c question
Hello!
F.e. you may call today (Monday).
Phone: +7 (495) 7832977 ext. 70427
For me convenient time is 16:00 GMT (I assume you are in timezone GMT-5, so that this
should be 11:00 for you)
Otherwise, we can schedule call for Wednesday, the same time.
Alexey
1
CONFIDENTIAL
KTS0000246
From:
Sent:
To:
Subject:
Alexey Kuznetsov [kuznet@ms2.inr.ac.ru]
Monday, December 13, 2010 9:03 AM
Absher, Alton
Re: Linux route.c question
Hello!
> Today at 16:00 GMT works for me.
OK.
> In connection with the call, please see the attached code and change log.
briefly discuss the rt_cache_add() function.
We will
Thanks. I did not even look so far behind. :-) Is not this enough to invalidate the
patent?
Alexey
1
CONFIDENTIAL
KTS0000247
From:
Sent:
To:
Subject:
Absher, Alton
Monday, December 13, 2010 8:47 AM
'Alexey Kuznetsov'
RE: Linux route.c question
Attachments:
Linux 1.3.42 - route.c; Linux 1.3.42 - route.c - Nov. 17 1995 changelog.txt
Linux 1.3.42 route.c (40 KB)...
Linux 1.3.42 route.c - Nov. ...
Hello Alexey,
Today at 16:00 GMT works for me. In connection with the call, please see the attached
code and change log. We will briefly discuss the rt_cache_add() function.
Regards,
Alton
Alton Absher
Kilpatrick Stockton LLP
1001 West Fourth Street | Winston-Salem, NC 27101-2400 office 336 607 7307 | cell 336 926
0211 | fax 336 734 2755 aabsher@kilpatrickstockton.com | www.kilpatrickstockton.com
-----Original Message----From: Alexey Kuznetsov [mailto:kuznet@ms2.inr.ac.ru]
Sent: Monday, December 13, 2010 4:45 AM
To: Absher, Alton
Subject: Re: Linux route.c question
Hello!
F.e. you may call today (Monday).
Phone: +7 (495) 7832977 ext. 70427
For me convenient time is 16:00 GMT (I assume you are in timezone GMT-5, so that this
should be 11:00 for you)
Otherwise, we can schedule call for Wednesday, the same time.
Alexey
1
CONFIDENTIAL
KTS0000173
Page 1 of 1
From:
Absher, Alton
Sent:
Tuesday, December 14, 2010 5:38 PM
To:
Alexey Kuznetsov
Subject:
route.c declaration
Attachments: Declaration-of-Alexey-Kuznetsov.pdf; Exhibit_A.pdf; Exhibit_B.pdf; Exhibit_C.pdf; Exhibit_D.pdf;
Exhibit_E.pdf; Exhibit_F.pdf
Alexey,
It was nice speaking with you on Monday. As we discussed, I have drafted a declaration for you
to review and sign. Please review the statements to confirm that you have personal knowledge
that they are true. If you have questions, please let me know so that we can set up a call to
discuss. If you have personal knowledge that the statements in the declaration are true, please
sign it, and email me a signed copy.
I also need for you to mail (snail mail) me the original after you sign. If you have access to
FedEx, I can give you a number to charge the shipping to (so that you don't have to spend any of
your money to ship it). If FedEx is not convenient for you, let me know and we can make
another arrangement.
Thank you again for your help.
Regards,
Alton
Alton Absher
Kilpatrick Stockton LLP
1001 West Fourth Street | Winston-Salem, NC 27101-2400
office 336 607 7307 | cell 336 926 0211 | fax 336 734 2755
aabsher@kilpatrickstockton.com | My Profile
CONFIDENTIAL
KTS0000001
From:
Sent:
To:
Subject:
Alexey Kuznetsov [kuznet@ms2.inr.ac.ru]
Wednesday, December 15, 2010 8:53 AM
Absher, Alton
Re: route.c declaration
Attachments:
Document (1).pdf; Document (2).pdf
Document (1).pdf Document (2).pdf
(657 KB)
(641 KB)
On Tue, Dec 14, 2010 at 05:38:03PM -0500, Absher, Alton wrote:
> Alexey,
>
> It was nice speaking with you on Monday. As we discussed, I have drafted a declaration
for you to review and sign. Please review the statements to confirm that you have
personal knowledge that they are true. If you have questions, please let me know so that
we can set up a call to discuss. If you have personal knowledge that the statements in
the declaration are true, please sign it, and email me a signed copy.
Everything is correct. Scans of two pages of signed document are enclosed.
> I also need for you to mail (snail mail) me the original after you sign. If you have
access to FedEx, I can give you a number to charge the shipping to (so that you don't have
to spend any of your money to ship it). If FedEx is not convenient for you, let me know
and we can make another arrangement.
Seems, fedex is OK.
Alexey
1
CONFIDENTIAL
KTS0000237
From:
Sent:
To:
Subject:
Absher, Alton
Wednesday, December 15, 2010 9:01 AM
'Alexey Kuznetsov'
RE: route.c declaration
Thanks Alexey. Our FedEx number is 027406777.
the address below:
Please send the signed original to me at
Alton Absher
Kilpatrick Stockton LLP
1001 West Fourth Street
Winston-Salem, NC 27101-2400
United States of America
-----Original Message----From: Alexey Kuznetsov [mailto:kuznet@ms2.inr.ac.ru]
Sent: Wednesday, December 15, 2010 8:53 AM
To: Absher, Alton
Subject: Re: route.c declaration
On Tue, Dec 14, 2010 at 05:38:03PM -0500, Absher, Alton wrote:
> Alexey,
>
> It was nice speaking with you on Monday. As we discussed, I have drafted a declaration
for you to review and sign. Please review the statements to confirm that you have
personal knowledge that they are true. If you have questions, please let me know so that
we can set up a call to discuss. If you have personal knowledge that the statements in
the declaration are true, please sign it, and email me a signed copy.
Everything is correct. Scans of two pages of signed document are enclosed.
> I also need for you to mail (snail mail) me the original after you sign. If you have
access to FedEx, I can give you a number to charge the shipping to (so that you don't have
to spend any of your money to ship it). If FedEx is not convenient for you, let me know
and we can make another arrangement.
Seems, fedex is OK.
Alexey
1
CONFIDENTIAL
KTS0000240
Disclaimer: Justia Dockets & Filings provides public litigation records from the federal appellate and district courts. These filings and docket sheets should not be considered findings of fact or liability, nor do they necessarily reflect the view of Justia.
Why Is My Information Online?