|
|
@ -1,25 +1,94 @@ |
|
|
|
""" |
|
|
|
Intake a map of IP prefixes -> AS numbers and output instructions that will |
|
|
|
allow a decoder to match an IP address to an ASN by following a sequence |
|
|
|
of instructions. |
|
|
|
|
|
|
|
The instructions describe a prefix tree that can be navigated using the bits of |
|
|
|
an IP address (i.e. 0 for left child, 1 for right child, leaf nodes |
|
|
|
corresponding to a given ASN). The types of instructions are denoted by the |
|
|
|
*Type() functions defined below. Once an IP address specifies a bit for which |
|
|
|
there is no path in the tree (i.e. the part of its address more specific than |
|
|
|
any known network prefix), the tree returns a "default" ASN value that has been |
|
|
|
set based on the last valid location in the tree. |
|
|
|
|
|
|
|
See `testmap.py:Interpret` for an illustration of how the decoding process |
|
|
|
works. |
|
|
|
|
|
|
|
Before the prefix tree is encoded into instructions using bits, it is compacted |
|
|
|
(e.g. duplicate subtrees removed) and annotated with which default ASN values |
|
|
|
should be set for particular regions of the tree. |
|
|
|
""" |
|
|
|
import sys |
|
|
|
import re |
|
|
|
import ipaddress |
|
|
|
from collections import namedtuple |
|
|
|
|
|
|
|
|
|
|
|
def Parse(entries: list): |
|
|
|
""" |
|
|
|
Read in a file of the format |
|
|
|
|
|
|
|
def Parse(entries): |
|
|
|
1.0.0.0/24 AS13335 # ipv4.dump:4856343 |
|
|
|
1.0.4.0/22 AS56203 # ipv4.dump:2759291 |
|
|
|
... |
|
|
|
|
|
|
|
Ignoring comments following '#'. Creates an Entry object for each line. |
|
|
|
Maps IPv4 networks into IPv6 space. |
|
|
|
|
|
|
|
Args: |
|
|
|
entries: modified in place with the new Entrys. |
|
|
|
""" |
|
|
|
for line in sys.stdin: |
|
|
|
line = line.split('#')[0].lstrip(' ').rstrip(' \r\n') |
|
|
|
prefix, asn = line.split(' ') |
|
|
|
assert(len(asn) > 2 and asn[:2] == "AS") |
|
|
|
network = ipaddress.ip_network(prefix) |
|
|
|
|
|
|
|
prefix_len = network.prefixlen |
|
|
|
net_addr = int.from_bytes(network.network_address.packed, 'big') |
|
|
|
|
|
|
|
# Map an IPv4 prefix into IPv6 space. |
|
|
|
if isinstance(network, ipaddress.IPv4Network): |
|
|
|
entries.append((network.prefixlen + 96, int.from_bytes(network.network_address.packed, 'big') + 0xffff00000000, int(asn[2:]))) |
|
|
|
elif isinstance(network, ipaddress.IPv6Network): |
|
|
|
entries.append((network.prefixlen, int.from_bytes(network.network_address.packed, 'big'), int(asn[2:]))) |
|
|
|
prefix_len += 96 |
|
|
|
net_addr += 0xffff00000000 |
|
|
|
|
|
|
|
entries.append(Entry(prefix_len, net_addr, int(asn[2:]))) |
|
|
|
|
|
|
|
# Add a list of (prefixlen, addrbits, asn) entries to a tree |
|
|
|
def UpdateTree(gtree, addrlen, entries): |
|
|
|
|
|
|
|
Entry = namedtuple('Entry', ( |
|
|
|
# The length of the network prefix in bits. E.g. '26' for 255.255.0.0/26. |
|
|
|
'prefix_len', |
|
|
|
|
|
|
|
# An int containing the bits of the network address. |
|
|
|
'net_addr', |
|
|
|
|
|
|
|
# An int for the autonomous system (AS) number. |
|
|
|
'asn', |
|
|
|
)) |
|
|
|
|
|
|
|
|
|
|
|
def UpdateTree(gtree, addrlen: int, entries: [Entry]): |
|
|
|
""" |
|
|
|
Returns a prefix tree such that following a path down through the |
|
|
|
tree based on the bits of a network prefix (in order of most significant |
|
|
|
bit) leads to an ASN. |
|
|
|
|
|
|
|
Args: |
|
|
|
gtree: tree structure to encode the mappings into. Modified in-place. |
|
|
|
addrlen: The maximum number of bits in a network address. |
|
|
|
This is 128 for IPv6 (16 bytes). |
|
|
|
entries: The network prefix -> ASN mappings to encode. |
|
|
|
""" |
|
|
|
for prefix, val, asn in sorted(entries): |
|
|
|
tree = gtree |
|
|
|
default = None |
|
|
|
|
|
|
|
# Iterate through each bit in the network prefix, starting with the |
|
|
|
# most significant bit. |
|
|
|
for i in range(prefix): |
|
|
|
bit = (val >> (addrlen - 1 - i)) & 1 |
|
|
|
|
|
|
|
# If we have passed the end of the network prefix, all entries |
|
|
|
# under subsequent bits will be associated with the same ASN. |
|
|
|
needs_inner = i < prefix - 1 |
|
|
|
if tree[bit] is None: |
|
|
|
if needs_inner: |
|
|
@ -44,9 +113,20 @@ def UpdateTree(gtree, addrlen, entries): |
|
|
|
tree = tree[bit] |
|
|
|
return gtree |
|
|
|
|
|
|
|
# Remove redundancy from a tree. |
|
|
|
# If approx is True, unassigned ranges may get reassigned to arbitrary ASNs. |
|
|
|
def CompactTree(tree, approx=True): |
|
|
|
|
|
|
|
def CompactTree(tree, approx=True) -> (list, set): |
|
|
|
""" |
|
|
|
Remove redundancy from a tree. |
|
|
|
|
|
|
|
E.g. if all nodes in a subtree point to the same ASN, compact the subtree |
|
|
|
into a single int. |
|
|
|
|
|
|
|
Returns: |
|
|
|
(the compacted tree, a set of all ASNs in the tree) |
|
|
|
|
|
|
|
Args: |
|
|
|
approx: if True, unassigned ranges may get reassigned to arbitrary ASNs. |
|
|
|
""" |
|
|
|
num = 0 |
|
|
|
if tree is None: |
|
|
|
return (tree, set()) |
|
|
@ -63,60 +143,119 @@ def CompactTree(tree, approx=True): |
|
|
|
return tree[0], set([tree[0]]) |
|
|
|
return (tree, allas) |
|
|
|
|
|
|
|
# Get the (key, value) with maximum value from a dict. |
|
|
|
def DictMax(d): |
|
|
|
mk = None |
|
|
|
mv = None |
|
|
|
for k, v in d.items(): |
|
|
|
if mv is None or v > mv: |
|
|
|
mk, mv = k, v |
|
|
|
return mk, mv |
|
|
|
|
|
|
|
# Annotate internal nodes in the tree with the most common leafs below it. |
|
|
|
# The binary serialization later uses this. |
|
|
|
def PropTree(tree, approx=True): |
|
|
|
|
|
|
|
def PropTree(tree, approx=True) -> (list, Counter, bool): |
|
|
|
""" |
|
|
|
Annotate internal nodes in the tree with the most common leafs below it. |
|
|
|
The binary serialization later uses this. |
|
|
|
|
|
|
|
This changes the shape of the `tree` datastructure from |
|
|
|
`[left_child, right_child]` to `[lc, rc, max_ASN_in_tree]`. |
|
|
|
|
|
|
|
Returns: |
|
|
|
(tree, Counter of ASNs in tree, whether or not tree is empty) |
|
|
|
""" |
|
|
|
if tree is None: |
|
|
|
return (tree, {}, True) |
|
|
|
return (tree, Counter(), True) |
|
|
|
if isinstance(tree, int): |
|
|
|
return (tree, {tree: 1}, False) |
|
|
|
return (tree, Counter({tree: 1}), False) |
|
|
|
tree[0], leftcnt, leftnone = PropTree(tree[0], approx) |
|
|
|
tree[1], rightcnt, rightnone = PropTree(tree[1], approx) |
|
|
|
allcnt = {k: leftcnt.get(k, 0) + rightcnt.get(k, 0) for k in set(leftcnt) | set(rightcnt)} |
|
|
|
allcnt = leftcnt + rightcnt |
|
|
|
allnone = leftnone | rightnone |
|
|
|
maxasn, maxcount = DictMax(allcnt) |
|
|
|
maxasn, maxcount = allcnt.most_common(1)[0] |
|
|
|
if maxcount is not None and maxcount >= 2 and (approx or not allnone): |
|
|
|
return ([tree[0], tree[1], maxasn], {maxasn: 1}, allnone) |
|
|
|
return ([tree[0], tree[1], maxasn], Counter({maxasn: 1}), allnone) |
|
|
|
return (tree, allcnt, allnone) |
|
|
|
|
|
|
|
def EncodeBits(val, minval, bit_sizes): |
|
|
|
|
|
|
|
def EncodeBits(val, minval, bit_sizes) -> [int]: |
|
|
|
""" |
|
|
|
Perform a variable-length encoding of a value to bits, least significant |
|
|
|
bit first. |
|
|
|
|
|
|
|
For each `bit_sizes` passed, attempt to encode the value with that number |
|
|
|
of bits + 1. Normalize the encoded value by `minval` to potentially save |
|
|
|
bits - the value will be corrected during decoding. |
|
|
|
|
|
|
|
Returns: |
|
|
|
a list of bits representing the value to encode. |
|
|
|
""" |
|
|
|
val -= minval |
|
|
|
ret = [] |
|
|
|
for pos in range(len(bit_sizes)): |
|
|
|
bit_size = bit_sizes[pos] |
|
|
|
|
|
|
|
# If the value will not fit in `bit_size` bits, absorb the largest |
|
|
|
# value for this bitsize and continue to the next smallest size. |
|
|
|
if val >= (1 << bit_size): |
|
|
|
val -= (1 << bit_size) |
|
|
|
ret += [1] |
|
|
|
else: |
|
|
|
# If we aren't encoding the largest possible value per the largest |
|
|
|
# bitsize... |
|
|
|
if (pos + 1 < len(bit_sizes)): |
|
|
|
ret += [0] |
|
|
|
|
|
|
|
# Use remaining bits to encode the rest of val. |
|
|
|
for b in range(bit_size): |
|
|
|
ret += [(val >> (bit_size - 1 - b)) & 1] |
|
|
|
return ret |
|
|
|
|
|
|
|
# Couldn't fit val into any of the bit_sizes |
|
|
|
assert(False) |
|
|
|
|
|
|
|
def EncodeType(v): |
|
|
|
def MatchType() -> [int]: |
|
|
|
""" |
|
|
|
The match instruction descends into the tree based on a bit path. If at any |
|
|
|
point the match fails to hit a valid path through the tree, it will fail |
|
|
|
and return the current default ASN (which changes as we move through the |
|
|
|
tree). |
|
|
|
""" |
|
|
|
return EncodeType(2) |
|
|
|
|
|
|
|
def JumpType() -> [int]: |
|
|
|
""" |
|
|
|
The jump instruction allows us to quickly seek to one side of the tree |
|
|
|
or the other. By encoding the length of the left child, we can skip over |
|
|
|
it to the right child if need be. |
|
|
|
""" |
|
|
|
return EncodeType(1) |
|
|
|
|
|
|
|
def LeafType() -> [int]: |
|
|
|
"""The leaf instruction encodes an ASN at the end of a bit path.""" |
|
|
|
return EncodeType(0) |
|
|
|
|
|
|
|
def SetNewDefaultType() -> [int]: |
|
|
|
""" |
|
|
|
This instruction establishes a new default ASN to return should we fail |
|
|
|
while traversing this path. |
|
|
|
""" |
|
|
|
return EncodeType(3) |
|
|
|
|
|
|
|
def EncodeType(v) -> [int]: |
|
|
|
return EncodeBits(v, 0, [0, 0, 1]) |
|
|
|
|
|
|
|
def EncodeASN(v): |
|
|
|
def EncodeASN(v) -> [int]: |
|
|
|
# It's reasonable to ask why "15" (indicating 16 bits) is the minimum size |
|
|
|
# we might try to pack an ASN into, given there are many ASNs below 2**16. |
|
|
|
# |
|
|
|
# The reason that we start at 15 here is because we want the first bitsize |
|
|
|
# we specify to contain ~50% of the values we are trying to encode - this |
|
|
|
# is because each separate bitsize we try will add a digit to our encoded |
|
|
|
# values, so we simultaneously want to minimize the number of bitsizes we |
|
|
|
# allow while also minimizing the bit length of the encoded data, which |
|
|
|
# is a trade-off. |
|
|
|
return EncodeBits(v, 1, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24]) |
|
|
|
|
|
|
|
def EncodeMatch(v): |
|
|
|
def EncodeMatch(v) -> [int]: |
|
|
|
return EncodeBits(v, 2, [1, 2, 3, 4, 5, 6, 7, 8]) |
|
|
|
|
|
|
|
def EncodeJump(v): |
|
|
|
def EncodeJump(v) -> [int]: |
|
|
|
return EncodeBits(v, 17, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]) |
|
|
|
|
|
|
|
def EncodeBytes(bits): |
|
|
|
def EncodeBytes(bits) -> [int]: |
|
|
|
"""Encode a sequence of bits as a sequence of bytes.""" |
|
|
|
val = 0 |
|
|
|
nbits = 0 |
|
|
|
bytes = [] |
|
|
@ -135,6 +274,9 @@ def TreeSer(tree, default): |
|
|
|
match = 1 |
|
|
|
assert(tree is not None) |
|
|
|
assert(not (isinstance(tree, int) and tree == default)) |
|
|
|
|
|
|
|
# If one side of the tree is empty (i.e. represents a path without |
|
|
|
# choices), encode a match instruction up to 8 bits. |
|
|
|
while isinstance(tree, list) and match <= 0xFF: |
|
|
|
if tree[0] is None or tree[0] == default: |
|
|
|
match = (match << 1) + 1 |
|
|
@ -145,21 +287,30 @@ def TreeSer(tree, default): |
|
|
|
else: |
|
|
|
break |
|
|
|
if match >= 2: |
|
|
|
return EncodeType(2) + EncodeMatch(match) + TreeSer(tree, default) |
|
|
|
return MatchType() + EncodeMatch(match) + TreeSer(tree, default) |
|
|
|
|
|
|
|
# Leaf node: return the ASN. |
|
|
|
if isinstance(tree, int): |
|
|
|
return EncodeType(0) + EncodeASN(tree) |
|
|
|
return LeafType() + EncodeASN(tree) |
|
|
|
|
|
|
|
# Return the tree along with a new "default" ASN value should we fail to |
|
|
|
# match while along this path. |
|
|
|
if len(tree) > 2 and tree[2] != default: |
|
|
|
return EncodeType(3) + EncodeASN(tree[2]) + TreeSer(tree, tree[2]) |
|
|
|
return SetNewDefaultType() + EncodeASN(tree[2]) + TreeSer(tree, tree[2]) |
|
|
|
|
|
|
|
left = TreeSer(tree[0], default) |
|
|
|
right = TreeSer(tree[1], default) |
|
|
|
return EncodeType(1) + EncodeJump(len(left)) + left + right |
|
|
|
|
|
|
|
# Start the program by specifying a possible jump to either child of the |
|
|
|
# first node. |
|
|
|
return JumpType() + EncodeJump(len(left)) + left + right |
|
|
|
|
|
|
|
def BuildTree(entries, approx=True): |
|
|
|
tree = [None, None] |
|
|
|
tree = UpdateTree(tree, 128, entries) |
|
|
|
return tree |
|
|
|
|
|
|
|
entries = [] |
|
|
|
entries: [Entry] = [] |
|
|
|
print("[INFO] Loading", file=sys.stderr) |
|
|
|
Parse(entries) |
|
|
|
print("[INFO] Read %i prefixes" % len(entries), file=sys.stderr) |
|
|
|