|
|
|
|
|
|
|
|
|
|
|
#!/usr/bin/env python3 |
|
|
|
|
|
import multiprocessing |
|
|
|
|
|
import os |
|
|
|
|
|
import pickle |
|
|
|
|
|
from argparse import ArgumentParser |
|
|
|
|
|
from math import ceil |
|
|
|
|
|
from time import sleep |
|
|
|
|
|
|
|
|
|
|
|
import matplotlib |
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
from mpl_toolkits import axisartist |
|
|
|
|
|
from mpl_toolkits.axes_grid1 import host_subplot |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def csv_to_dataframe(csv_list, dummy): |
|
|
|
|
|
|
|
|
|
|
|
global n |
|
|
|
|
|
global frame_list |
|
|
|
|
|
|
|
|
|
|
|
transmission_df = None |
|
|
|
|
|
|
|
|
|
|
|
for csv in csv_list: |
|
|
|
|
|
tmp_df = pd.read_csv( |
|
|
|
|
|
"{}{}".format(args.pcap_csv_folder, csv), |
|
|
|
|
|
dtype=dict(is_retranmission=bool, is_dup_ack=bool), |
|
|
|
|
|
) |
|
|
|
|
|
tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1) |
|
|
|
|
|
tmp_df = tmp_df.set_index("datetime") |
|
|
|
|
|
tmp_df.index = pd.to_datetime(tmp_df.index) |
|
|
|
|
|
if transmission_df is None: |
|
|
|
|
|
transmission_df = tmp_df |
|
|
|
|
|
else: |
|
|
|
|
|
transmission_df = pd.concat([transmission_df, tmp_df]) |
|
|
|
|
|
|
|
|
|
|
|
n.value += 1 |
|
|
|
|
|
|
|
|
|
|
|
frame_list.append(transmission_df) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from itertools import islice |
|
|
|
|
|
|
|
|
|
|
|
def chunk(it, size): |
|
|
|
|
|
it = iter(it) |
|
|
|
|
|
return iter(lambda: tuple(islice(it, size)), ()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def plot_cdf(dataframe, column_name): |
|
|
|
|
|
stats_df = dataframe \ |
|
|
|
|
|
.groupby(column_name) \ |
|
|
|
|
|
[column_name] \ |
|
|
|
|
|
.agg("count") \ |
|
|
|
|
|
.pipe(pd.DataFrame) \ |
|
|
|
|
|
.rename(columns={column_name: "frequency"}) |
|
|
|
|
|
|
|
|
|
|
|
# PDF |
|
|
|
|
|
stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"]) |
|
|
|
|
|
|
|
|
|
|
|
# CDF |
|
|
|
|
|
stats_df["CDF"] = stats_df["PDF"].cumsum() |
|
|
|
|
|
stats_df = stats_df.reset_index() |
|
|
|
|
|
|
|
|
|
|
|
stats_df.plot(x=column_name, y=["CDF"], grid=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
parser = ArgumentParser() |
|
|
|
|
|
parser.add_argument("--serial1", required=True, help="Serial csv file1.") |
|
|
|
|
|
parser.add_argument("--serial2", required=True, help="Serial csv file2.") |
|
|
|
|
|
parser.add_argument("--folder1", required=True, help="PCAP csv folder1.") |
|
|
|
|
|
parser.add_argument("--folder2", required=True, help="PCAP csv folder2.") |
|
|
|
|
|
parser.add_argument("--save", default=None, help="Location to save pdf file.") |
|
|
|
|
|
parser.add_argument( |
|
|
|
|
|
"-c", |
|
|
|
|
|
"--cores", |
|
|
|
|
|
default=1, |
|
|
|
|
|
type=int, |
|
|
|
|
|
help="Number of cores for multiprocessing.", |
|
|
|
|
|
) |
|
|
|
|
|
parser.add_argument( |
|
|
|
|
|
"-i", |
|
|
|
|
|
"--interval", |
|
|
|
|
|
default=2, |
|
|
|
|
|
type=int, |
|
|
|
|
|
help="Time interval for rolling window.", |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
|
|
|
transmission_df_list = list() |
|
|
|
|
|
for f in [args.folder1, args.folder2]: |
|
|
|
|
|
manager = multiprocessing.Manager() |
|
|
|
|
|
n = manager.Value("i", 0) |
|
|
|
|
|
frame_list = manager.list() |
|
|
|
|
|
jobs = [] |
|
|
|
|
|
|
|
|
|
|
|
# load all pcap csv into one dataframe |
|
|
|
|
|
pcap_csv_list = list() |
|
|
|
|
|
for filename in os.listdir(args.pcap_csv_folder): |
|
|
|
|
|
if filename.endswith(".csv") and "tcp" in filename: |
|
|
|
|
|
pcap_csv_list.append(filename) |
|
|
|
|
|
|
|
|
|
|
|
parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores)) |
|
|
|
|
|
print("Start processing with {} jobs.".format(args.cores)) |
|
|
|
|
|
for p in parts: |
|
|
|
|
|
process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy")) |
|
|
|
|
|
jobs.append(process) |
|
|
|
|
|
|
|
|
|
|
|
for j in jobs: |
|
|
|
|
|
j.start() |
|
|
|
|
|
|
|
|
|
|
|
print("Started all jobs.") |
|
|
|
|
|
# Ensure all the processes have finished |
|
|
|
|
|
finished_job_counter = 0 |
|
|
|
|
|
working = ["|", "/", "-", "\\", "|", "/", "-", "\\"] |
|
|
|
|
|
w = 0 |
|
|
|
|
|
while len(jobs) != finished_job_counter: |
|
|
|
|
|
sleep(1) |
|
|
|
|
|
print( |
|
|
|
|
|
"\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format( |
|
|
|
|
|
working[w], |
|
|
|
|
|
working[w], |
|
|
|
|
|
working[w], |
|
|
|
|
|
len(jobs), |
|
|
|
|
|
finished_job_counter, |
|
|
|
|
|
n.value, |
|
|
|
|
|
len(pcap_csv_list), |
|
|
|
|
|
round((n.value / len(pcap_csv_list)) * 100, 2), |
|
|
|
|
|
), |
|
|
|
|
|
end="", |
|
|
|
|
|
) |
|
|
|
|
|
finished_job_counter = 0 |
|
|
|
|
|
for j in jobs: |
|
|
|
|
|
if not j.is_alive(): |
|
|
|
|
|
finished_job_counter += 1 |
|
|
|
|
|
if (w + 1) % len(working) == 0: |
|
|
|
|
|
w = 0 |
|
|
|
|
|
else: |
|
|
|
|
|
w += 1 |
|
|
|
|
|
print("\r\nSorting table...") |
|
|
|
|
|
|
|
|
|
|
|
transmission_df = pd.concat(frame_list) |
|
|
|
|
|
frame_list = None |
|
|
|
|
|
transmission_df = transmission_df.sort_index() |
|
|
|
|
|
|
|
|
|
|
|
print("Calculate goodput...") |
|
|
|
|
|
|
|
|
|
|
|
transmission_df["srtt"] = transmission_df["srtt"].apply(lambda x: x / 10 ** 6) |
|
|
|
|
|
|
|
|
|
|
|
# key for columns and level for index |
|
|
|
|
|
transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum") |
|
|
|
|
|
transmission_df["goodput"] = transmission_df["goodput"].apply( |
|
|
|
|
|
lambda x: ((x * 8) / args.interval) / 10**6 |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum() |
|
|
|
|
|
transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply( |
|
|
|
|
|
lambda x: ((x * 8) / args.interval) / 10 ** 6 |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
# set meta values |
|
|
|
|
|
cc_algo = transmission_df["congestion_control"].iloc[0] |
|
|
|
|
|
cc_algo = cc_algo.upper() |
|
|
|
|
|
transmission_direction = transmission_df["direction"].iloc[0] |
|
|
|
|
|
|
|
|
|
|
|
# read serial csv |
|
|
|
|
|
serial_df = pd.read_csv(args.serial_file) |
|
|
|
|
|
serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1) |
|
|
|
|
|
serial_df = serial_df.set_index("datetime") |
|
|
|
|
|
serial_df.index = pd.to_datetime(serial_df.index) |
|
|
|
|
|
serial_df.sort_index() |
|
|
|
|
|
|
|
|
|
|
|
transmission_df = pd.merge_asof( |
|
|
|
|
|
transmission_df, |
|
|
|
|
|
serial_df, |
|
|
|
|
|
tolerance=pd.Timedelta("1s"), |
|
|
|
|
|
right_index=True, |
|
|
|
|
|
left_index=True, |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
transmission_df_list.append(dict( |
|
|
|
|
|
df=transmission_df, |
|
|
|
|
|
cc_algo=cc_algo, |
|
|
|
|
|
transmission_direction=transmission_direction |
|
|
|
|
|
)) |
|
|
|
|
|
|
|
|
|
|
|
# Plot sRTT CDF |
|
|
|
|
|
plot_cdf(transmission_df_list[0]["df"], "srtt") |
|
|
|
|
|
plot_cdf(transmission_df_list[1]["df"], "srtt") |
|
|
|
|
|
plt.xscale("log") |
|
|
|
|
|
plt.xlabel("sRTT [s]") |
|
|
|
|
|
plt.ylabel("CDF") |
|
|
|
|
|
plt.legend([transmission_df_list[0]["cc_algo"], transmission_df_list[1]["cc_algo"]]) |
|
|
|
|
|
plt.title("{}".format(transmission_direction)) |
|
|
|
|
|
plt.savefig("{}{}_cdf_compare_plot.pdf".format(args.save, "srtt")) |
|
|
|
|
|
|
|
|
|
|
|
plt.clf() |
|
|
|
|
|
|
|
|
|
|
|
# Plot goodput CDF |
|
|
|
|
|
plot_cdf(transmission_df_list[0]["df"], "goodput") |
|
|
|
|
|
plot_cdf(transmission_df_list[1]["df"], "goodput") |
|
|
|
|
|
plt.xlabel("goodput [mbps]") |
|
|
|
|
|
plt.ylabel("CDF") |
|
|
|
|
|
plt.legend([transmission_df_list[0]["cc_algo"], transmission_df_list[1]["cc_algo"]]) |
|
|
|
|
|
plt.title("{}".format(transmission_direction)) |
|
|
|
|
|
plt.savefig("{}{}_cdf_compare_plot.pdf".format(args.save, "goodput")) |