Browse Source

Add scripts to download and process BGP dumps

master
Gleb Naumenko 5 years ago
parent
commit
cb24d25f13
  1. 25
      remote_dumps/README.md
  2. 22
      remote_dumps/download_dumps.py
  3. 5
      remote_dumps/prepare.sh
  4. 72
      remote_dumps/quagga_aggregate.py
  5. 8
      remote_dumps/quagga_parse.sh
  6. 12
      remote_dumps/setup.sh

25
remote_dumps/README.md

@ -0,0 +1,25 @@
This set of scripts allows to download, parse and aggregate BGP announcement dumps from open repositories to be used in asmap construction.
### Pre-reqs
``./setup.sh``
### Use
0. ``./prepare.sh`` deletes old data.
1. ``./download_dumps.py`` downloads RIPE dumps for a selected date (configured in the file) to the `dumps` folder.
2. ``./quagga_parse.sh`` reads dumps from the `dumps` folder and
writes the human readable interpretation to the `paths` folder.
3. ``./quagga_aggregate.py`` goes through the interpreted dumps in ``paths`` folder, aggregates paths and assigns every IP prefix to the first element of the common suffix of the asn path.
Resulting ``prefix_asns.out`` can be fed to ``../buildmap.py``.
### Rationale
Consider the following scenario:
1.2.3.4: A -> B -> C -> X
1.2.3.4: A -> F -> C -> X
In this case, {C, X} is the common suffix, and we will map 1.2.3.4 to C, because C represents the single infrastructure required to reach that IP address.
Note that diversifying by C would implicitly diversify by X too.

22
remote_dumps/download_dumps.py

@ -0,0 +1,22 @@
#!/usr/bin/env python
import urllib.request
import datetime
providers = range(1, 24)
date = datetime.date.today()
dumps_dir = "dumps/"
for provider in providers:
provider = ("{:02d}".format(provider))
link = "http://data.ris.ripe.net/rrc{0}/latest-bview.gz".format(provider)
dump_name = "dump_{0}_{1}.gz".format(provider, date)
print(link)
try:
dump = urllib.request.urlopen(link)
except Exception:
print('Failed to download: ' + link)
continue
with open(dumps_dir + dump_name,'wb+') as output:
output.write(dump.read())

5
remote_dumps/prepare.sh

@ -0,0 +1,5 @@
#!/bin/bash
rm dumps/*
rm paths/*
rm prefix_asns.out

72
remote_dumps/quagga_aggregate.py

@ -0,0 +1,72 @@
#!/usr/bin/env python
import sys
import os
import re
PARSED_DUMPS_DIR = 'paths/'
RESULT_OUTPUT = 'prefix_asns.out'
first_octet = re.compile(r"^[^.|:]*")
# Remove duplicate asns in a row
# [1, 1, 2, 3, 3, 3] -> [1, 2, 3]
def dedup(asn_path):
i = len(asn_path) - 2
while i > 0:
if asn_path[i] == asn_path[i - 1]:
asn_path = asn_path[0:i] + asn_path[i+1:]
i -= 1
return asn_path
def find_common_suffixes(prefix_asn_paths, common_asn_suffix):
for prefix, asn_lists in prefix_asn_paths.items():
asn_lists = [dedup(asn_list.split(' ')) for asn_list in asn_lists] # preprocess
asn_lists = [asn_list for asn_list in asn_lists if asn_list != [] and asn_list != ['']] # this very rarely happens in dumps
if len(asn_lists) == 0:
continue
asn_lists.sort(key = len)
cur_asn_suffix = asn_lists[0] # represents the common sub-path (from the end) of asns to a prefix
for asn_list in asn_lists[1:]:
if cur_asn_suffix == asn_list:
continue
if cur_asn_suffix[-1] != asn_list[-1]: # multi-homed
break
cur_asn_suffix_len = len(cur_asn_suffix)
for i in range(1, cur_asn_suffix_len): # position from the end
if cur_asn_suffix[len(cur_asn_suffix) - i - 1] != asn_list[len(asn_list) - i - 1]:
cur_asn_suffix = cur_asn_suffix[len(cur_asn_suffix) - i:]
break
common_asn_suffix[prefix] = cur_asn_suffix
def process_files():
res = dict()
files = os.listdir(PARSED_DUMPS_DIR)
step = 40
for i in range(1, 256, step): # process ip range chunks so that memory is not filled
print("Working on chunk: ", i, flush=True)
announcements = dict()
for file_name in files:
print('Reading file: ', file_name, flush=True)
with open(PARSED_DUMPS_DIR + file_name, "r") as file:
for line in file:
announcement_data = re.sub(r'{[^>]+}', ' ', line.strip()) # removes {} sets in AS path
announcement_data = announcement_data.split('|')
prefix = announcement_data[0]
first_oc = re.search(first_octet, prefix).group(0)
if first_oc == '' or int(first_oc) > i + step: # passed current chunk
break
if int(first_oc) < i: # current chunk is ahead
continue
asns = announcement_data[1]
announcements.setdefault(prefix, set()).add(asns)
find_common_suffixes(announcements, res)
return res
def dump_result(prefix_unique_asn_suffixes):
with open(RESULT_OUTPUT, 'w+') as file:
for prefix, unique_asn_suffix in prefix_unique_asn_suffixes.items():
file.write("%s AS%s\n" % (prefix, unique_asn_suffix[0]))
res = process_files()
dump_result(res)

8
remote_dumps/quagga_parse.sh

@ -0,0 +1,8 @@
#!/bin/bash
for mrt in `ls dumps`; do
/bin/echo -n "processing $mrt... "
OUT=$mrt
/usr/local/bin/bgpdump -vm dumps/$mrt | cut -d '|' -f '6,7' > paths/$OUT
done

12
remote_dumps/setup.sh

@ -0,0 +1,12 @@
#!/bin/bash
mkdir dumps
mkdir paths
wget http://ris.ripe.net/source/bgpdump/libbgpdump-1.6.0.tgz
tar zxvf libbgpdump-1.6.0.tgz
rm libbgpdump-1.6.0.tgz
cd libbgpdump-1.6.0
./bootstrap.sh
make install
cd ..
Loading…
Cancel
Save