Adds exception for tshark.
This commit is contained in:
356
format_throughput_pcap_to_csv.py
Executable file
356
format_throughput_pcap_to_csv.py
Executable file
@@ -0,0 +1,356 @@
|
||||
#!/usr/bin/env python3
|
||||
import datetime
|
||||
from io import StringIO
|
||||
from math import ceil
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from time import sleep, time
|
||||
|
||||
# tshark -r ./tcp-cap-test/test__bandwidth_reverse_tcp_bbr_1.pcap -Y "tcp.stream eq 1" -T fields -e frame.time_relative -e ip.len -e ip.hdr_len -e tcp.hdr_len -e tcp.analysis.ack_rtt -e tcp.analysis.bytes_in_flight -e tcp.analysis.retransmission -e tcp.analysis.duplicate_ack -e ip.dst -e ip.src -e tcp.options.mss_val -E header=y -E separator=, -E quote=d
|
||||
from util import chunk_list
|
||||
|
||||
|
||||
def format_tcp_trace_to_csv(pcap_number, packets_to_keep, is_reverse=False):
|
||||
txt_name = "{}{}{}.txt".format(args.folder, args.tcp_trace, pcap_number)
|
||||
try:
|
||||
txt_file = open(txt_name, "r")
|
||||
except IOError as e:
|
||||
print("\rCan not open file {}\n {} {}".format(txt_name, e.errno, e.strerror))
|
||||
return
|
||||
content = txt_file.read()
|
||||
txt_file.close()
|
||||
|
||||
csv_string = ""
|
||||
csv_string += "time_tcp_probe,snd_cwnd,snd_wnd,srtt\n"
|
||||
uptime = None
|
||||
counter = 0
|
||||
lines = content.split("\n")
|
||||
start_time = None
|
||||
for line in lines:
|
||||
counter += 1
|
||||
if uptime is None:
|
||||
uptime = float(line.split(" ")[0])
|
||||
else:
|
||||
if is_reverse:
|
||||
line_filter = "src=[::ffff:{}]:{}".format(args.server, args.port)
|
||||
else:
|
||||
line_filter = "dest={}:{}".format(args.server, args.port)
|
||||
|
||||
# ignore tcp packets from iperf syn (packets to keep = len of tcp.stream.eq 1)
|
||||
if line_filter in line and counter >= (len(lines) - packets_to_keep):
|
||||
match = re.match(
|
||||
r".* (\d+\.\d+): tcp_probe:.*snd_cwnd=(\d+).*snd_wnd=(\d+).*srtt=(\d+)",
|
||||
line,
|
||||
)
|
||||
if match:
|
||||
if start_time is None:
|
||||
start_time = float(match.group(1)) - uptime
|
||||
time = float(match.group(1)) - (uptime + start_time)
|
||||
snd_cwnd = match.group(2)
|
||||
snd_wnd = match.group(3)
|
||||
srtt = match.group(4)
|
||||
csv_string += "{},{},{},{}\n".format(time, snd_cwnd, snd_wnd, srtt)
|
||||
|
||||
csv_string_io = StringIO(csv_string)
|
||||
|
||||
trace_df = pd.read_csv(csv_string_io)
|
||||
if len(trace_df) <= 1:
|
||||
print("\rFaulty tcp trace file for pcap no: {}".format(pcap_number))
|
||||
return None
|
||||
return trace_df
|
||||
|
||||
|
||||
def format_pcaps_to_csv(pcaps, dummy):
|
||||
global n
|
||||
for pcap in pcaps:
|
||||
if pcap.endswith(".pcap") and pcap.startswith(args.prefix):
|
||||
match = re.match(regex, pcap)
|
||||
if match:
|
||||
# metadata from pcap filename
|
||||
direction = "upload"
|
||||
if "_reverse_" in pcap:
|
||||
direction = "download"
|
||||
congestion_control = match.group(2)
|
||||
pcap_number = match.group(3)
|
||||
|
||||
# analyse traffic from pcap (receiver side)
|
||||
tshark_command = [
|
||||
"tshark",
|
||||
"-r",
|
||||
"{}{}".format(args.folder, pcap),
|
||||
# remove this for mobile measurements
|
||||
# "-Y",
|
||||
# "tcp.stream eq 1",
|
||||
"-T",
|
||||
"fields",
|
||||
"-e",
|
||||
"frame.time_relative",
|
||||
"-e",
|
||||
"ip.len",
|
||||
"-e",
|
||||
"ip.hdr_len",
|
||||
"-e",
|
||||
"tcp.hdr_len",
|
||||
"-e",
|
||||
"tcp.analysis.ack_rtt",
|
||||
"-e",
|
||||
"tcp.analysis.bytes_in_flight",
|
||||
"-e",
|
||||
"tcp.analysis.retransmission",
|
||||
"-e",
|
||||
"tcp.analysis.duplicate_ack",
|
||||
"-e",
|
||||
"ip.src",
|
||||
"-e",
|
||||
"ip.dst",
|
||||
"-e",
|
||||
"tcp.options.mss_val",
|
||||
"-e",
|
||||
"tcp.window_size",
|
||||
"-e",
|
||||
"frame.time_epoch",
|
||||
"-e",
|
||||
"tcp.stream", # have to be the last value in line!
|
||||
"-E",
|
||||
"header=y",
|
||||
"-E",
|
||||
"separator=,",
|
||||
"-E",
|
||||
"quote=d",
|
||||
]
|
||||
|
||||
tshark_out = None
|
||||
try:
|
||||
tshark_out = subprocess.check_output(tshark_command).decode("utf-8")
|
||||
except subprocess.CalledProcessError as tsharkexec:
|
||||
if tsharkexec.returncode == 2:
|
||||
print("\rtshark could not open pcap: {}".format(pcap))
|
||||
else:
|
||||
print("\rtshark exited with code: {}".format(tsharkexec.returncode))
|
||||
print(tsharkexec.output)
|
||||
continue
|
||||
|
||||
# Convert String into StringIO
|
||||
csv_string_io = StringIO(tshark_out)
|
||||
|
||||
conv_bool = lambda x: (True if x != "" else False)
|
||||
|
||||
pcap_df = pd.read_csv(
|
||||
csv_string_io,
|
||||
converters={
|
||||
"tcp.analysis.retransmission": conv_bool,
|
||||
"tcp.analysis.duplicate_ack": conv_bool,
|
||||
},
|
||||
)
|
||||
|
||||
last_tcp_stream_in_pcap = pcap_df["tcp.stream"].max()
|
||||
pcap_df = pcap_df.loc[pcap_df["tcp.stream"] == last_tcp_stream_in_pcap]
|
||||
|
||||
pcap_df["payload_size"] = pcap_df["ip.len"] - (
|
||||
pcap_df["ip.hdr_len"] + pcap_df["tcp.hdr_len"]
|
||||
)
|
||||
pcap_df["direction"] = direction
|
||||
pcap_df["congestion_control"] = congestion_control
|
||||
pcap_df["pcap_number"] = pcap_number
|
||||
pcap_df["datetime"] = pd.to_datetime(
|
||||
pcap_df["frame.time_epoch"].apply(
|
||||
lambda x: datetime.datetime.fromtimestamp(x)
|
||||
)
|
||||
)
|
||||
|
||||
pcap_df = pcap_df.drop(
|
||||
columns=["tcp.stream", "ip.len", "ip.hdr_len", "tcp.hdr_len"]
|
||||
)
|
||||
|
||||
pcap_df.rename(
|
||||
columns={
|
||||
"frame.time_relative": "arrival_time",
|
||||
"ip.src": "src_ip",
|
||||
"ip.dst": "dst_ip",
|
||||
"tcp.options.mss_val": "mss",
|
||||
"tcp.analysis.ack_rtt": "ack_rtt",
|
||||
"tcp.analysis.bytes_in_flight": "bytes_in_flight",
|
||||
"tcp.window_size": "receive_window_size",
|
||||
"tcp.analysis.retransmission": "is_retranmission",
|
||||
"tcp.analysis.duplicate_ack": "is_dup_ack",
|
||||
"frame.time_epoch": "time_epoch",
|
||||
},
|
||||
inplace=True,
|
||||
)
|
||||
|
||||
pcap_df = pcap_df.sort_values("arrival_time")
|
||||
try:
|
||||
# join tcp_trace data with pcap data
|
||||
merge_srtt = False
|
||||
if merge_srtt:
|
||||
tcp_trace_df = format_tcp_trace_to_csv(
|
||||
pcap_number,
|
||||
len(pcap_df),
|
||||
is_reverse=True if "_reverse_" in pcap else False,
|
||||
)
|
||||
|
||||
if tcp_trace_df is None:
|
||||
print(
|
||||
"\rNo tcp trace file for pcap no {} found".format(
|
||||
pcap_number
|
||||
)
|
||||
)
|
||||
break
|
||||
merged_df = pd.merge_asof(
|
||||
pcap_df.loc[pcap_df["src_ip"] != args.server],
|
||||
tcp_trace_df,
|
||||
left_on="arrival_time",
|
||||
right_on="time_tcp_probe",
|
||||
tolerance=0.01,
|
||||
)
|
||||
merged_df = pd.concat(
|
||||
[merged_df, pcap_df.loc[pcap_df["src_ip"] == args.server]]
|
||||
)
|
||||
merged_df = merged_df.sort_values("arrival_time")
|
||||
merged_df.to_csv(
|
||||
"{}{}".format(args.folder, pcap).replace(".pcap", ".csv")
|
||||
)
|
||||
else:
|
||||
pcap_df.to_csv(
|
||||
"{}{}".format(args.folder, pcap).replace(".pcap", ".csv")
|
||||
)
|
||||
except:
|
||||
print("\rCould not merge data for pcap no: {}".format(pcap))
|
||||
pcap_df.to_csv(
|
||||
"{}{}".format(args.folder, pcap).replace(".pcap", ".csv")
|
||||
)
|
||||
|
||||
n.value += 1
|
||||
|
||||
else:
|
||||
print("File does not match regex: {}".format(pcap))
|
||||
else:
|
||||
print("File is not from type PCAP: {}".format(pcap))
|
||||
|
||||
|
||||
from itertools import islice
|
||||
|
||||
|
||||
def chunk(it, size):
|
||||
it = iter(it)
|
||||
return iter(lambda: tuple(islice(it, size)), ())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("-f", "--folder", required=True, help="Folder with pcaps.")
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--prefix",
|
||||
required=True,
|
||||
help="Filename prefix e.g. 2021-03-17_bandwidth_tcp_bbr_",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--tcp_trace",
|
||||
required=True,
|
||||
help="Format of tcp trace txt files e.g: 2021_03_30_bandwidth_reverse_tcp_tcp_trace_ for "
|
||||
"2021_03_30_bandwidth_reverse_tcp_tcp_trace_1.txt",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--cores",
|
||||
default=1,
|
||||
type=int,
|
||||
help="Number of cores for multiprocessing.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
default=5201,
|
||||
type=int,
|
||||
help="iPerf3 port used for measurements",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--server",
|
||||
default="130.75.73.69",
|
||||
type=str,
|
||||
help="iPerf3 server ip used for measurements",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
manager = multiprocessing.Manager()
|
||||
|
||||
# regex for protokoll, algo and bitrate
|
||||
regex = r".*_bandwidth_[reverse_]*(.+)_(.+)_(\d+)\.pcap"
|
||||
|
||||
csv_header = "n,start_time,end_time,payload_size,protocol,algorithm,direction,packages_received,syns_in_pcap\n"
|
||||
n = manager.Value("i", 0)
|
||||
filenames = os.listdir(args.folder)
|
||||
number_of_files = len(filenames)
|
||||
|
||||
pcap_list = []
|
||||
jobs = []
|
||||
|
||||
st = time()
|
||||
|
||||
for filename in filenames:
|
||||
if filename.endswith(".pcap") and filename.startswith(args.prefix):
|
||||
if re.match(regex, filename):
|
||||
pcap_list.append(filename)
|
||||
pcap_list.sort()
|
||||
|
||||
print("Found {} pcap files in {} files.".format(len(pcap_list), len(filenames)))
|
||||
if len(pcap_list) == 0:
|
||||
print("Abort no pcaps found with prefix: {}".format(args.prefix))
|
||||
print("{}{}".format(args.folder, args.prefix))
|
||||
exit(1)
|
||||
|
||||
parts = chunk(pcap_list, ceil(len(pcap_list) / args.cores))
|
||||
print("Start processing with {} jobs.".format(args.cores))
|
||||
|
||||
|
||||
for p in parts:
|
||||
process = multiprocessing.Process(target=format_pcaps_to_csv, args=(p, "dummy"))
|
||||
jobs.append(process)
|
||||
|
||||
for j in jobs:
|
||||
j.start()
|
||||
|
||||
print("Started all jobs.")
|
||||
# Ensure all of the processes have finished
|
||||
finished_job_counter = 0
|
||||
working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
|
||||
w = 0
|
||||
while len(jobs) != finished_job_counter:
|
||||
sleep(1)
|
||||
print(
|
||||
"\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcaps. ({}%) ".format(
|
||||
working[w],
|
||||
working[w],
|
||||
working[w],
|
||||
len(jobs),
|
||||
finished_job_counter,
|
||||
n.value,
|
||||
len(pcap_list),
|
||||
round((n.value / len(pcap_list)) * 100, 2),
|
||||
),
|
||||
end="",
|
||||
)
|
||||
finished_job_counter = 0
|
||||
for j in jobs:
|
||||
if not j.is_alive():
|
||||
finished_job_counter += 1
|
||||
if (w + 1) % len(working) == 0:
|
||||
w = 0
|
||||
else:
|
||||
w += 1
|
||||
print("")
|
||||
|
||||
et = time()
|
||||
# get the execution time
|
||||
elapsed_time = et - st
|
||||
print("Execution time:", elapsed_time, "seconds")
|
||||
Reference in New Issue
Block a user