forked from hush/asmap
Gleb Naumenko
5 years ago
6 changed files with 144 additions and 0 deletions
@ -0,0 +1,25 @@ |
|||
This set of scripts allows to download, parse and aggregate BGP announcement dumps from open repositories to be used in asmap construction. |
|||
|
|||
### Pre-reqs |
|||
|
|||
``./setup.sh`` |
|||
|
|||
### Use |
|||
|
|||
0. ``./prepare.sh`` deletes old data. |
|||
1. ``./download_dumps.py`` downloads RIPE dumps for a selected date (configured in the file) to the `dumps` folder. |
|||
2. ``./quagga_parse.sh`` reads dumps from the `dumps` folder and |
|||
writes the human readable interpretation to the `paths` folder. |
|||
3. ``./quagga_aggregate.py`` goes through the interpreted dumps in ``paths`` folder, aggregates paths and assigns every IP prefix to the first element of the common suffix of the asn path. |
|||
|
|||
Resulting ``prefix_asns.out`` can be fed to ``../buildmap.py``. |
|||
|
|||
### Rationale |
|||
|
|||
Consider the following scenario: |
|||
1.2.3.4: A -> B -> C -> X |
|||
1.2.3.4: A -> F -> C -> X |
|||
|
|||
In this case, {C, X} is the common suffix, and we will map 1.2.3.4 to C, because C represents the single infrastructure required to reach that IP address. |
|||
|
|||
Note that diversifying by C would implicitly diversify by X too. |
@ -0,0 +1,22 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import urllib.request |
|||
import datetime |
|||
|
|||
providers = range(1, 24) |
|||
date = datetime.date.today() |
|||
|
|||
dumps_dir = "dumps/" |
|||
|
|||
for provider in providers: |
|||
provider = ("{:02d}".format(provider)) |
|||
link = "http://data.ris.ripe.net/rrc{0}/latest-bview.gz".format(provider) |
|||
dump_name = "dump_{0}_{1}.gz".format(provider, date) |
|||
print(link) |
|||
try: |
|||
dump = urllib.request.urlopen(link) |
|||
except Exception: |
|||
print('Failed to download: ' + link) |
|||
continue |
|||
with open(dumps_dir + dump_name,'wb+') as output: |
|||
output.write(dump.read()) |
@ -0,0 +1,5 @@ |
|||
#!/bin/bash |
|||
|
|||
rm dumps/* |
|||
rm paths/* |
|||
rm prefix_asns.out |
@ -0,0 +1,72 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import sys |
|||
import os |
|||
import re |
|||
|
|||
PARSED_DUMPS_DIR = 'paths/' |
|||
RESULT_OUTPUT = 'prefix_asns.out' |
|||
first_octet = re.compile(r"^[^.|:]*") |
|||
|
|||
|
|||
# Remove duplicate asns in a row |
|||
# [1, 1, 2, 3, 3, 3] -> [1, 2, 3] |
|||
def dedup(asn_path): |
|||
i = len(asn_path) - 2 |
|||
while i > 0: |
|||
if asn_path[i] == asn_path[i - 1]: |
|||
asn_path = asn_path[0:i] + asn_path[i+1:] |
|||
i -= 1 |
|||
return asn_path |
|||
|
|||
def find_common_suffixes(prefix_asn_paths, common_asn_suffix): |
|||
for prefix, asn_lists in prefix_asn_paths.items(): |
|||
asn_lists = [dedup(asn_list.split(' ')) for asn_list in asn_lists] # preprocess |
|||
asn_lists = [asn_list for asn_list in asn_lists if asn_list != [] and asn_list != ['']] # this very rarely happens in dumps |
|||
if len(asn_lists) == 0: |
|||
continue |
|||
asn_lists.sort(key = len) |
|||
cur_asn_suffix = asn_lists[0] # represents the common sub-path (from the end) of asns to a prefix |
|||
for asn_list in asn_lists[1:]: |
|||
if cur_asn_suffix == asn_list: |
|||
continue |
|||
if cur_asn_suffix[-1] != asn_list[-1]: # multi-homed |
|||
break |
|||
cur_asn_suffix_len = len(cur_asn_suffix) |
|||
for i in range(1, cur_asn_suffix_len): # position from the end |
|||
if cur_asn_suffix[len(cur_asn_suffix) - i - 1] != asn_list[len(asn_list) - i - 1]: |
|||
cur_asn_suffix = cur_asn_suffix[len(cur_asn_suffix) - i:] |
|||
break |
|||
common_asn_suffix[prefix] = cur_asn_suffix |
|||
|
|||
def process_files(): |
|||
res = dict() |
|||
files = os.listdir(PARSED_DUMPS_DIR) |
|||
step = 40 |
|||
for i in range(1, 256, step): # process ip range chunks so that memory is not filled |
|||
print("Working on chunk: ", i, flush=True) |
|||
announcements = dict() |
|||
for file_name in files: |
|||
print('Reading file: ', file_name, flush=True) |
|||
with open(PARSED_DUMPS_DIR + file_name, "r") as file: |
|||
for line in file: |
|||
announcement_data = re.sub(r'{[^>]+}', ' ', line.strip()) # removes {} sets in AS path |
|||
announcement_data = announcement_data.split('|') |
|||
prefix = announcement_data[0] |
|||
first_oc = re.search(first_octet, prefix).group(0) |
|||
if first_oc == '' or int(first_oc) > i + step: # passed current chunk |
|||
break |
|||
if int(first_oc) < i: # current chunk is ahead |
|||
continue |
|||
asns = announcement_data[1] |
|||
announcements.setdefault(prefix, set()).add(asns) |
|||
find_common_suffixes(announcements, res) |
|||
return res |
|||
|
|||
def dump_result(prefix_unique_asn_suffixes): |
|||
with open(RESULT_OUTPUT, 'w+') as file: |
|||
for prefix, unique_asn_suffix in prefix_unique_asn_suffixes.items(): |
|||
file.write("%s AS%s\n" % (prefix, unique_asn_suffix[0])) |
|||
|
|||
res = process_files() |
|||
dump_result(res) |
@ -0,0 +1,8 @@ |
|||
#!/bin/bash |
|||
|
|||
for mrt in `ls dumps`; do |
|||
/bin/echo -n "processing $mrt... " |
|||
OUT=$mrt |
|||
/usr/local/bin/bgpdump -vm dumps/$mrt | cut -d '|' -f '6,7' > paths/$OUT |
|||
done |
|||
|
@ -0,0 +1,12 @@ |
|||
#!/bin/bash |
|||
|
|||
mkdir dumps |
|||
mkdir paths |
|||
|
|||
wget http://ris.ripe.net/source/bgpdump/libbgpdump-1.6.0.tgz |
|||
tar zxvf libbgpdump-1.6.0.tgz |
|||
rm libbgpdump-1.6.0.tgz |
|||
cd libbgpdump-1.6.0 |
|||
./bootstrap.sh |
|||
make install |
|||
cd .. |
Loading…
Reference in new issue