Explorar el Código

Adds script for cdf plots.

master
Lukas Prause hace 2 años
padre
commit
8004c74acf
Se han modificado 1 ficheros con 207 adiciones y 0 borrados
  1. +207
    -0
      cdf_compare.py

+ 207
- 0
cdf_compare.py Ver fichero

@@ -0,0 +1,207 @@
#!/usr/bin/env python3
import multiprocessing
import os
import pickle
from argparse import ArgumentParser
from math import ceil
from time import sleep

import matplotlib
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits import axisartist
from mpl_toolkits.axes_grid1 import host_subplot



def csv_to_dataframe(csv_list, dummy):

global n
global frame_list

transmission_df = None

for csv in csv_list:
tmp_df = pd.read_csv(
"{}{}".format(args.pcap_csv_folder, csv),
dtype=dict(is_retranmission=bool, is_dup_ack=bool),
)
tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
tmp_df = tmp_df.set_index("datetime")
tmp_df.index = pd.to_datetime(tmp_df.index)
if transmission_df is None:
transmission_df = tmp_df
else:
transmission_df = pd.concat([transmission_df, tmp_df])

n.value += 1

frame_list.append(transmission_df)


from itertools import islice

def chunk(it, size):
it = iter(it)
return iter(lambda: tuple(islice(it, size)), ())


def plot_cdf(dataframe, column_name):
stats_df = dataframe \
.groupby(column_name) \
[column_name] \
.agg("count") \
.pipe(pd.DataFrame) \
.rename(columns={column_name: "frequency"})

# PDF
stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"])

# CDF
stats_df["CDF"] = stats_df["PDF"].cumsum()
stats_df = stats_df.reset_index()

stats_df.plot(x=column_name, y=["CDF"], grid=True)


if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--serial1", required=True, help="Serial csv file1.")
parser.add_argument("--serial2", required=True, help="Serial csv file2.")
parser.add_argument("--folder1", required=True, help="PCAP csv folder1.")
parser.add_argument("--folder2", required=True, help="PCAP csv folder2.")
parser.add_argument("--save", default=None, help="Location to save pdf file.")
parser.add_argument(
"-c",
"--cores",
default=1,
type=int,
help="Number of cores for multiprocessing.",
)
parser.add_argument(
"-i",
"--interval",
default=2,
type=int,
help="Time interval for rolling window.",
)

args = parser.parse_args()

transmission_df_list = list()
for f in [args.folder1, args.folder2]:
manager = multiprocessing.Manager()
n = manager.Value("i", 0)
frame_list = manager.list()
jobs = []

# load all pcap csv into one dataframe
pcap_csv_list = list()
for filename in os.listdir(args.pcap_csv_folder):
if filename.endswith(".csv") and "tcp" in filename:
pcap_csv_list.append(filename)

parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
print("Start processing with {} jobs.".format(args.cores))
for p in parts:
process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
jobs.append(process)

for j in jobs:
j.start()

print("Started all jobs.")
# Ensure all the processes have finished
finished_job_counter = 0
working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
w = 0
while len(jobs) != finished_job_counter:
sleep(1)
print(
"\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
working[w],
working[w],
working[w],
len(jobs),
finished_job_counter,
n.value,
len(pcap_csv_list),
round((n.value / len(pcap_csv_list)) * 100, 2),
),
end="",
)
finished_job_counter = 0
for j in jobs:
if not j.is_alive():
finished_job_counter += 1
if (w + 1) % len(working) == 0:
w = 0
else:
w += 1
print("\r\nSorting table...")

transmission_df = pd.concat(frame_list)
frame_list = None
transmission_df = transmission_df.sort_index()

print("Calculate goodput...")

transmission_df["srtt"] = transmission_df["srtt"].apply(lambda x: x / 10 ** 6)

# key for columns and level for index
transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum")
transmission_df["goodput"] = transmission_df["goodput"].apply(
lambda x: ((x * 8) / args.interval) / 10**6
)

transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply(
lambda x: ((x * 8) / args.interval) / 10 ** 6
)

# set meta values
cc_algo = transmission_df["congestion_control"].iloc[0]
cc_algo = cc_algo.upper()
transmission_direction = transmission_df["direction"].iloc[0]

# read serial csv
serial_df = pd.read_csv(args.serial_file)
serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
serial_df = serial_df.set_index("datetime")
serial_df.index = pd.to_datetime(serial_df.index)
serial_df.sort_index()

transmission_df = pd.merge_asof(
transmission_df,
serial_df,
tolerance=pd.Timedelta("1s"),
right_index=True,
left_index=True,
)

transmission_df_list.append(dict(
df=transmission_df,
cc_algo=cc_algo,
transmission_direction=transmission_direction
))

# Plot sRTT CDF
plot_cdf(transmission_df_list[0]["df"], "srtt")
plot_cdf(transmission_df_list[1]["df"], "srtt")
plt.xscale("log")
plt.xlabel("sRTT [s]")
plt.ylabel("CDF")
plt.legend([transmission_df_list[0]["cc_algo"], transmission_df_list[1]["cc_algo"]])
plt.title("{}".format(transmission_direction))
plt.savefig("{}{}_cdf_compare_plot.pdf".format(args.save, "srtt"))

plt.clf()

# Plot goodput CDF
plot_cdf(transmission_df_list[0]["df"], "goodput")
plot_cdf(transmission_df_list[1]["df"], "goodput")
plt.xlabel("goodput [mbps]")
plt.ylabel("CDF")
plt.legend([transmission_df_list[0]["cc_algo"], transmission_df_list[1]["cc_algo"]])
plt.title("{}".format(transmission_direction))
plt.savefig("{}{}_cdf_compare_plot.pdf".format(args.save, "goodput"))

Cargando…
Cancelar
Guardar