Просмотр исходного кода

Adds exception for tshark.

master
Lukas Prause 2 лет назад
Родитель
Сommit
c9b7d8fc0d
1 измененных файлов: 356 добавлений и 0 удалений
  1. +356
    -0
      format_throughput_pcap_to_csv.py

+ 356
- 0
format_throughput_pcap_to_csv.py Просмотреть файл

@@ -0,0 +1,356 @@
#!/usr/bin/env python3
import datetime
from io import StringIO
from math import ceil

import pandas as pd

import multiprocessing
import os
import re
import subprocess

from argparse import ArgumentParser
from time import sleep, time

# tshark -r ./tcp-cap-test/test__bandwidth_reverse_tcp_bbr_1.pcap -Y "tcp.stream eq 1" -T fields -e frame.time_relative -e ip.len -e ip.hdr_len -e tcp.hdr_len -e tcp.analysis.ack_rtt -e tcp.analysis.bytes_in_flight -e tcp.analysis.retransmission -e tcp.analysis.duplicate_ack -e ip.dst -e ip.src -e tcp.options.mss_val -E header=y -E separator=, -E quote=d
from util import chunk_list


def format_tcp_trace_to_csv(pcap_number, packets_to_keep, is_reverse=False):
txt_name = "{}{}{}.txt".format(args.folder, args.tcp_trace, pcap_number)
try:
txt_file = open(txt_name, "r")
except IOError as e:
print("\rCan not open file {}\n {} {}".format(txt_name, e.errno, e.strerror))
return
content = txt_file.read()
txt_file.close()

csv_string = ""
csv_string += "time_tcp_probe,snd_cwnd,snd_wnd,srtt\n"
uptime = None
counter = 0
lines = content.split("\n")
start_time = None
for line in lines:
counter += 1
if uptime is None:
uptime = float(line.split(" ")[0])
else:
if is_reverse:
line_filter = "src=[::ffff:{}]:{}".format(args.server, args.port)
else:
line_filter = "dest={}:{}".format(args.server, args.port)

# ignore tcp packets from iperf syn (packets to keep = len of tcp.stream.eq 1)
if line_filter in line and counter >= (len(lines) - packets_to_keep):
match = re.match(
r".* (\d+\.\d+): tcp_probe:.*snd_cwnd=(\d+).*snd_wnd=(\d+).*srtt=(\d+)",
line,
)
if match:
if start_time is None:
start_time = float(match.group(1)) - uptime
time = float(match.group(1)) - (uptime + start_time)
snd_cwnd = match.group(2)
snd_wnd = match.group(3)
srtt = match.group(4)
csv_string += "{},{},{},{}\n".format(time, snd_cwnd, snd_wnd, srtt)

csv_string_io = StringIO(csv_string)

trace_df = pd.read_csv(csv_string_io)
if len(trace_df) <= 1:
print("\rFaulty tcp trace file for pcap no: {}".format(pcap_number))
return None
return trace_df


def format_pcaps_to_csv(pcaps, dummy):
global n
for pcap in pcaps:
if pcap.endswith(".pcap") and pcap.startswith(args.prefix):
match = re.match(regex, pcap)
if match:
# metadata from pcap filename
direction = "upload"
if "_reverse_" in pcap:
direction = "download"
congestion_control = match.group(2)
pcap_number = match.group(3)

# analyse traffic from pcap (receiver side)
tshark_command = [
"tshark",
"-r",
"{}{}".format(args.folder, pcap),
# remove this for mobile measurements
# "-Y",
# "tcp.stream eq 1",
"-T",
"fields",
"-e",
"frame.time_relative",
"-e",
"ip.len",
"-e",
"ip.hdr_len",
"-e",
"tcp.hdr_len",
"-e",
"tcp.analysis.ack_rtt",
"-e",
"tcp.analysis.bytes_in_flight",
"-e",
"tcp.analysis.retransmission",
"-e",
"tcp.analysis.duplicate_ack",
"-e",
"ip.src",
"-e",
"ip.dst",
"-e",
"tcp.options.mss_val",
"-e",
"tcp.window_size",
"-e",
"frame.time_epoch",
"-e",
"tcp.stream", # have to be the last value in line!
"-E",
"header=y",
"-E",
"separator=,",
"-E",
"quote=d",
]

tshark_out = None
try:
tshark_out = subprocess.check_output(tshark_command).decode("utf-8")
except subprocess.CalledProcessError as tsharkexec:
if tsharkexec.returncode == 2:
print("\rtshark could not open pcap: {}".format(pcap))
else:
print("\rtshark exited with code: {}".format(tsharkexec.returncode))
print(tsharkexec.output)
continue

# Convert String into StringIO
csv_string_io = StringIO(tshark_out)

conv_bool = lambda x: (True if x != "" else False)

pcap_df = pd.read_csv(
csv_string_io,
converters={
"tcp.analysis.retransmission": conv_bool,
"tcp.analysis.duplicate_ack": conv_bool,
},
)

last_tcp_stream_in_pcap = pcap_df["tcp.stream"].max()
pcap_df = pcap_df.loc[pcap_df["tcp.stream"] == last_tcp_stream_in_pcap]

pcap_df["payload_size"] = pcap_df["ip.len"] - (
pcap_df["ip.hdr_len"] + pcap_df["tcp.hdr_len"]
)
pcap_df["direction"] = direction
pcap_df["congestion_control"] = congestion_control
pcap_df["pcap_number"] = pcap_number
pcap_df["datetime"] = pd.to_datetime(
pcap_df["frame.time_epoch"].apply(
lambda x: datetime.datetime.fromtimestamp(x)
)
)

pcap_df = pcap_df.drop(
columns=["tcp.stream", "ip.len", "ip.hdr_len", "tcp.hdr_len"]
)

pcap_df.rename(
columns={
"frame.time_relative": "arrival_time",
"ip.src": "src_ip",
"ip.dst": "dst_ip",
"tcp.options.mss_val": "mss",
"tcp.analysis.ack_rtt": "ack_rtt",
"tcp.analysis.bytes_in_flight": "bytes_in_flight",
"tcp.window_size": "receive_window_size",
"tcp.analysis.retransmission": "is_retranmission",
"tcp.analysis.duplicate_ack": "is_dup_ack",
"frame.time_epoch": "time_epoch",
},
inplace=True,
)

pcap_df = pcap_df.sort_values("arrival_time")
try:
# join tcp_trace data with pcap data
merge_srtt = False
if merge_srtt:
tcp_trace_df = format_tcp_trace_to_csv(
pcap_number,
len(pcap_df),
is_reverse=True if "_reverse_" in pcap else False,
)

if tcp_trace_df is None:
print(
"\rNo tcp trace file for pcap no {} found".format(
pcap_number
)
)
break
merged_df = pd.merge_asof(
pcap_df.loc[pcap_df["src_ip"] != args.server],
tcp_trace_df,
left_on="arrival_time",
right_on="time_tcp_probe",
tolerance=0.01,
)
merged_df = pd.concat(
[merged_df, pcap_df.loc[pcap_df["src_ip"] == args.server]]
)
merged_df = merged_df.sort_values("arrival_time")
merged_df.to_csv(
"{}{}".format(args.folder, pcap).replace(".pcap", ".csv")
)
else:
pcap_df.to_csv(
"{}{}".format(args.folder, pcap).replace(".pcap", ".csv")
)
except:
print("\rCould not merge data for pcap no: {}".format(pcap))
pcap_df.to_csv(
"{}{}".format(args.folder, pcap).replace(".pcap", ".csv")
)

n.value += 1

else:
print("File does not match regex: {}".format(pcap))
else:
print("File is not from type PCAP: {}".format(pcap))


from itertools import islice


def chunk(it, size):
it = iter(it)
return iter(lambda: tuple(islice(it, size)), ())


if __name__ == "__main__":

parser = ArgumentParser()
parser.add_argument("-f", "--folder", required=True, help="Folder with pcaps.")
parser.add_argument(
"-p",
"--prefix",
required=True,
help="Filename prefix e.g. 2021-03-17_bandwidth_tcp_bbr_",
)
parser.add_argument(
"-t",
"--tcp_trace",
required=True,
help="Format of tcp trace txt files e.g: 2021_03_30_bandwidth_reverse_tcp_tcp_trace_ for "
"2021_03_30_bandwidth_reverse_tcp_tcp_trace_1.txt",
)
parser.add_argument(
"-c",
"--cores",
default=1,
type=int,
help="Number of cores for multiprocessing.",
)
parser.add_argument(
"--port",
default=5201,
type=int,
help="iPerf3 port used for measurements",
)
parser.add_argument(
"--server",
default="130.75.73.69",
type=str,
help="iPerf3 server ip used for measurements",
)

args = parser.parse_args()

manager = multiprocessing.Manager()

# regex for protokoll, algo and bitrate
regex = r".*_bandwidth_[reverse_]*(.+)_(.+)_(\d+)\.pcap"

csv_header = "n,start_time,end_time,payload_size,protocol,algorithm,direction,packages_received,syns_in_pcap\n"
n = manager.Value("i", 0)
filenames = os.listdir(args.folder)
number_of_files = len(filenames)

pcap_list = []
jobs = []

st = time()

for filename in filenames:
if filename.endswith(".pcap") and filename.startswith(args.prefix):
if re.match(regex, filename):
pcap_list.append(filename)
pcap_list.sort()

print("Found {} pcap files in {} files.".format(len(pcap_list), len(filenames)))
if len(pcap_list) == 0:
print("Abort no pcaps found with prefix: {}".format(args.prefix))
print("{}{}".format(args.folder, args.prefix))
exit(1)

parts = chunk(pcap_list, ceil(len(pcap_list) / args.cores))
print("Start processing with {} jobs.".format(args.cores))


for p in parts:
process = multiprocessing.Process(target=format_pcaps_to_csv, args=(p, "dummy"))
jobs.append(process)

for j in jobs:
j.start()

print("Started all jobs.")
# Ensure all of the processes have finished
finished_job_counter = 0
working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
w = 0
while len(jobs) != finished_job_counter:
sleep(1)
print(
"\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcaps. ({}%) ".format(
working[w],
working[w],
working[w],
len(jobs),
finished_job_counter,
n.value,
len(pcap_list),
round((n.value / len(pcap_list)) * 100, 2),
),
end="",
)
finished_job_counter = 0
for j in jobs:
if not j.is_alive():
finished_job_counter += 1
if (w + 1) % len(working) == 0:
w = 0
else:
w += 1
print("")

et = time()
# get the execution time
elapsed_time = et - st
print("Execution time:", elapsed_time, "seconds")

Загрузка…
Отмена
Сохранить