Adds script for cdf plots.
This commit is contained in:
207
cdf_compare.py
Executable file
207
cdf_compare.py
Executable file
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python3
|
||||
import multiprocessing
|
||||
import os
|
||||
import pickle
|
||||
from argparse import ArgumentParser
|
||||
from math import ceil
|
||||
from time import sleep
|
||||
|
||||
import matplotlib
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits import axisartist
|
||||
from mpl_toolkits.axes_grid1 import host_subplot
|
||||
|
||||
|
||||
|
||||
def csv_to_dataframe(csv_list, dummy):
|
||||
|
||||
global n
|
||||
global frame_list
|
||||
|
||||
transmission_df = None
|
||||
|
||||
for csv in csv_list:
|
||||
tmp_df = pd.read_csv(
|
||||
"{}{}".format(args.pcap_csv_folder, csv),
|
||||
dtype=dict(is_retranmission=bool, is_dup_ack=bool),
|
||||
)
|
||||
tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
|
||||
tmp_df = tmp_df.set_index("datetime")
|
||||
tmp_df.index = pd.to_datetime(tmp_df.index)
|
||||
if transmission_df is None:
|
||||
transmission_df = tmp_df
|
||||
else:
|
||||
transmission_df = pd.concat([transmission_df, tmp_df])
|
||||
|
||||
n.value += 1
|
||||
|
||||
frame_list.append(transmission_df)
|
||||
|
||||
|
||||
from itertools import islice
|
||||
|
||||
def chunk(it, size):
|
||||
it = iter(it)
|
||||
return iter(lambda: tuple(islice(it, size)), ())
|
||||
|
||||
|
||||
def plot_cdf(dataframe, column_name):
|
||||
stats_df = dataframe \
|
||||
.groupby(column_name) \
|
||||
[column_name] \
|
||||
.agg("count") \
|
||||
.pipe(pd.DataFrame) \
|
||||
.rename(columns={column_name: "frequency"})
|
||||
|
||||
# PDF
|
||||
stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"])
|
||||
|
||||
# CDF
|
||||
stats_df["CDF"] = stats_df["PDF"].cumsum()
|
||||
stats_df = stats_df.reset_index()
|
||||
|
||||
stats_df.plot(x=column_name, y=["CDF"], grid=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--serial1", required=True, help="Serial csv file1.")
|
||||
parser.add_argument("--serial2", required=True, help="Serial csv file2.")
|
||||
parser.add_argument("--folder1", required=True, help="PCAP csv folder1.")
|
||||
parser.add_argument("--folder2", required=True, help="PCAP csv folder2.")
|
||||
parser.add_argument("--save", default=None, help="Location to save pdf file.")
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--cores",
|
||||
default=1,
|
||||
type=int,
|
||||
help="Number of cores for multiprocessing.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--interval",
|
||||
default=2,
|
||||
type=int,
|
||||
help="Time interval for rolling window.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
transmission_df_list = list()
|
||||
for f in [args.folder1, args.folder2]:
|
||||
manager = multiprocessing.Manager()
|
||||
n = manager.Value("i", 0)
|
||||
frame_list = manager.list()
|
||||
jobs = []
|
||||
|
||||
# load all pcap csv into one dataframe
|
||||
pcap_csv_list = list()
|
||||
for filename in os.listdir(args.pcap_csv_folder):
|
||||
if filename.endswith(".csv") and "tcp" in filename:
|
||||
pcap_csv_list.append(filename)
|
||||
|
||||
parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
|
||||
print("Start processing with {} jobs.".format(args.cores))
|
||||
for p in parts:
|
||||
process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
|
||||
jobs.append(process)
|
||||
|
||||
for j in jobs:
|
||||
j.start()
|
||||
|
||||
print("Started all jobs.")
|
||||
# Ensure all the processes have finished
|
||||
finished_job_counter = 0
|
||||
working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
|
||||
w = 0
|
||||
while len(jobs) != finished_job_counter:
|
||||
sleep(1)
|
||||
print(
|
||||
"\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
|
||||
working[w],
|
||||
working[w],
|
||||
working[w],
|
||||
len(jobs),
|
||||
finished_job_counter,
|
||||
n.value,
|
||||
len(pcap_csv_list),
|
||||
round((n.value / len(pcap_csv_list)) * 100, 2),
|
||||
),
|
||||
end="",
|
||||
)
|
||||
finished_job_counter = 0
|
||||
for j in jobs:
|
||||
if not j.is_alive():
|
||||
finished_job_counter += 1
|
||||
if (w + 1) % len(working) == 0:
|
||||
w = 0
|
||||
else:
|
||||
w += 1
|
||||
print("\r\nSorting table...")
|
||||
|
||||
transmission_df = pd.concat(frame_list)
|
||||
frame_list = None
|
||||
transmission_df = transmission_df.sort_index()
|
||||
|
||||
print("Calculate goodput...")
|
||||
|
||||
transmission_df["srtt"] = transmission_df["srtt"].apply(lambda x: x / 10 ** 6)
|
||||
|
||||
# key for columns and level for index
|
||||
transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum")
|
||||
transmission_df["goodput"] = transmission_df["goodput"].apply(
|
||||
lambda x: ((x * 8) / args.interval) / 10**6
|
||||
)
|
||||
|
||||
transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
|
||||
transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply(
|
||||
lambda x: ((x * 8) / args.interval) / 10 ** 6
|
||||
)
|
||||
|
||||
# set meta values
|
||||
cc_algo = transmission_df["congestion_control"].iloc[0]
|
||||
cc_algo = cc_algo.upper()
|
||||
transmission_direction = transmission_df["direction"].iloc[0]
|
||||
|
||||
# read serial csv
|
||||
serial_df = pd.read_csv(args.serial_file)
|
||||
serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
|
||||
serial_df = serial_df.set_index("datetime")
|
||||
serial_df.index = pd.to_datetime(serial_df.index)
|
||||
serial_df.sort_index()
|
||||
|
||||
transmission_df = pd.merge_asof(
|
||||
transmission_df,
|
||||
serial_df,
|
||||
tolerance=pd.Timedelta("1s"),
|
||||
right_index=True,
|
||||
left_index=True,
|
||||
)
|
||||
|
||||
transmission_df_list.append(dict(
|
||||
df=transmission_df,
|
||||
cc_algo=cc_algo,
|
||||
transmission_direction=transmission_direction
|
||||
))
|
||||
|
||||
# Plot sRTT CDF
|
||||
plot_cdf(transmission_df_list[0]["df"], "srtt")
|
||||
plot_cdf(transmission_df_list[1]["df"], "srtt")
|
||||
plt.xscale("log")
|
||||
plt.xlabel("sRTT [s]")
|
||||
plt.ylabel("CDF")
|
||||
plt.legend([transmission_df_list[0]["cc_algo"], transmission_df_list[1]["cc_algo"]])
|
||||
plt.title("{}".format(transmission_direction))
|
||||
plt.savefig("{}{}_cdf_compare_plot.pdf".format(args.save, "srtt"))
|
||||
|
||||
plt.clf()
|
||||
|
||||
# Plot goodput CDF
|
||||
plot_cdf(transmission_df_list[0]["df"], "goodput")
|
||||
plot_cdf(transmission_df_list[1]["df"], "goodput")
|
||||
plt.xlabel("goodput [mbps]")
|
||||
plt.ylabel("CDF")
|
||||
plt.legend([transmission_df_list[0]["cc_algo"], transmission_df_list[1]["cc_algo"]])
|
||||
plt.title("{}".format(transmission_direction))
|
||||
plt.savefig("{}{}_cdf_compare_plot.pdf".format(args.save, "goodput"))
|
||||
Reference in New Issue
Block a user