Adds differnt plots fpr goodput, cwnd, rtt und cqi.

This commit is contained in:
Lukas Prause
2023-01-20 14:59:04 +01:00
parent 16b9929dcb
commit e1b0cfa32a

View File

@@ -1,66 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import math
import multiprocessing import multiprocessing
import os import os
from argparse import ArgumentParser from argparse import ArgumentParser
from math import ceil
from time import sleep
import matplotlib import matplotlib
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from mpl_toolkits import axisartist
from mpl_toolkits.axes_grid1 import host_subplot
def csv_to_dataframe(csv_list, dummy):
global n
global frame_list
transmission_df = None
for csv in csv_list:
tmp_df = pd.read_csv(
"{}{}".format(args.pcap_csv_folder, csv),
dtype=dict(is_retranmission=bool, is_dup_ack=bool),
)
tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
tmp_df = tmp_df.set_index("datetime")
tmp_df.index = pd.to_datetime(tmp_df.index)
if transmission_df is None:
transmission_df = tmp_df
else:
transmission_df = pd.concat([transmission_df, tmp_df])
n.value += 1
frame_list.append(transmission_df)
from itertools import islice
def chunk(it, size):
it = iter(it)
return iter(lambda: tuple(islice(it, size)), ())
def plot_cdf(dataframe, column_name):
stats_df = dataframe \
.groupby(column_name) \
[column_name] \
.agg("count") \
.pipe(pd.DataFrame) \
.rename(columns={column_name: "frequency"})
# PDF
stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"])
# CDF
stats_df["CDF"] = stats_df["PDF"].cumsum()
stats_df = stats_df.reset_index()
stats_df.plot(x=column_name, y=["CDF"], grid=True)
if __name__ == "__main__": if __name__ == "__main__":
@@ -68,13 +14,6 @@ if __name__ == "__main__":
parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.") parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.") parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
parser.add_argument("--save", default=None, help="Location to save pdf file.") parser.add_argument("--save", default=None, help="Location to save pdf file.")
parser.add_argument(
"-c",
"--cores",
default=1,
type=int,
help="Number of cores for multiprocessing.",
)
parser.add_argument( parser.add_argument(
"-i", "-i",
"--interval", "--interval",
@@ -95,179 +34,132 @@ if __name__ == "__main__":
if filename.endswith(".csv") and "tcp" in filename: if filename.endswith(".csv") and "tcp" in filename:
pcap_csv_list.append(filename) pcap_csv_list.append(filename)
parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores)) counter = 1
print("Start processing with {} jobs.".format(args.cores)) if len(pcap_csv_list) == 0:
for p in parts: print("No CSV files found.")
process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
jobs.append(process)
for j in jobs: pcap_csv_list.sort(key=lambda x: int(x.split("_")[-1].replace(".csv", "")))
j.start()
print("Started all jobs.") for csv in pcap_csv_list:
# Ensure all the processes have finished
finished_job_counter = 0 print("\rProcessing {} out of {} CSVs.\t({}%)\t".format(counter, len(pcap_csv_list), math.floor(counter/len(pcap_csv_list))))
working = ["|", "/", "-", "\\", "|", "/", "-", "\\"] transmission_df = pd.read_csv(
w = 0 "{}{}".format(args.pcap_csv_folder, csv),
while len(jobs) != finished_job_counter: dtype=dict(is_retranmission=bool, is_dup_ack=bool),
sleep(1)
print(
"\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
working[w],
working[w],
working[w],
len(jobs),
finished_job_counter,
n.value,
len(pcap_csv_list),
round((n.value / len(pcap_csv_list)) * 100, 2),
),
end="",
) )
finished_job_counter = 0 transmission_df["datetime"] = pd.to_datetime(transmission_df["datetime"]) - pd.Timedelta(hours=1)
for j in jobs: transmission_df = transmission_df.set_index("datetime")
if not j.is_alive(): transmission_df.index = pd.to_datetime(transmission_df.index)
finished_job_counter += 1 transmission_df = transmission_df.sort_index()
if (w + 1) % len(working) == 0:
w = 0 #print("Calculate goodput...")
#print(transmission_df)
# key for columns and level for index
transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum")
transmission_df["goodput"] = transmission_df["goodput"].apply(
lambda x: ((x * 8) / args.interval) / 10**6
)
transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply(
lambda x: ((x * 8) / args.interval) / 10 ** 6
)
# set meta values and remove all not needed columns
cc_algo = transmission_df["congestion_control"].iloc[0]
cc_algo = cc_algo.upper()
transmission_direction = transmission_df["direction"].iloc[0]
#transmission_df = transmission_df.filter(["goodput", "datetime", "ack_rtt", "goodput_rolling", "snd_cwnd"])
# read serial csv
serial_df = pd.read_csv(args.serial_file)
serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
serial_df = serial_df.set_index("datetime")
serial_df.index = pd.to_datetime(serial_df.index)
transmission_df = pd.merge_asof(
transmission_df,
serial_df,
tolerance=pd.Timedelta("1s"),
right_index=True,
left_index=True,
)
# transmission timeline
scaley = 1.5
scalex = 1.0
fig, ax = plt.subplots(figsize=[6.4 * scaley, 4.8 * scalex])
plt.title("{} with {}".format(transmission_direction, cc_algo))
fig.subplots_adjust(right=0.75)
twin1 = ax.twinx()
twin2 = ax.twinx()
twin3 = ax.twinx()
# Offset the right spine of twin2. The ticks and label have already been
# placed on the right by twinx above.
twin2.spines.right.set_position(("axes", 1.1))
twin3.spines.right.set_position(("axes", 1.2))
# create list fo color indices
transmission_df["index"] = transmission_df.index
color_dict = dict()
color_list = list()
i = 0
for cell_id in transmission_df["cellID"]:
if cell_id not in color_dict:
color_dict[cell_id] = i
i += 1
color_list.append(color_dict[cell_id])
transmission_df["cell_color"] = color_list
color_dict = None
color_list = None
cmap = matplotlib.cm.get_cmap("Set3")
for c in transmission_df["cell_color"].unique():
bounds = transmission_df[["index", "cell_color"]].groupby("cell_color").agg(["min", "max"]).loc[c]
ax.axvspan(bounds.min(), bounds.max(), alpha=0.3, color=cmap.colors[c])
p4, = twin3.plot(transmission_df["snd_cwnd"].dropna(), color="lime", linestyle="dashed", label="cwnd")
p3, = twin2.plot(transmission_df["ack_rtt"].dropna(), color="red", linestyle="dashdot", label="ACK RTT")
p1, = ax.plot(transmission_df["goodput_rolling"], color="blue", linestyle="solid", label="goodput")
p2, = twin1.plot(transmission_df["downlink_cqi"].dropna(), color="magenta", linestyle="dotted", label="CQI")
ax.set_xlim(transmission_df["index"].min(), transmission_df["index"].max())
ax.set_ylim(0, 500)
twin1.set_ylim(0, 15)
twin2.set_ylim(0, transmission_df["ack_rtt"].max())
twin3.set_ylim(0, transmission_df["snd_cwnd"].max() + 10)
ax.set_xlabel("arrival time")
ax.set_ylabel("Goodput [mbps]")
twin1.set_ylabel("CQI")
twin2.set_ylabel("ACK RTT [s]")
twin3.set_ylabel("cwnd")
ax.yaxis.label.set_color(p1.get_color())
twin1.yaxis.label.set_color(p2.get_color())
twin2.yaxis.label.set_color(p3.get_color())
twin3.yaxis.label.set_color(p4.get_color())
tkw = dict(size=4, width=1.5)
ax.tick_params(axis='y', colors=p1.get_color(), **tkw)
twin1.tick_params(axis='y', colors=p2.get_color(), **tkw)
twin2.tick_params(axis='y', colors=p3.get_color(), **tkw)
twin3.tick_params(axis='y', colors=p4.get_color(), **tkw)
ax.tick_params(axis='x', **tkw)
#ax.legend(handles=[p1, p2, p3])
if args.save:
plt.savefig("{}{}_plot.pdf".format(args.save, csv.replace(".csv", "")))
else: else:
w += 1 plt.show()
print("\r\nSorting table...") counter += 1
transmission_df = pd.concat(frame_list) plt.clf()
frame_list = None
transmission_df = transmission_df.sort_index()
print("Calculate goodput...")
#print(transmission_df)
# key for columns and level for index
transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum")
transmission_df["goodput"] = transmission_df["goodput"].apply(
lambda x: ((x * 8) / args.interval) / 10**6
)
transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply(
lambda x: ((x * 8) / args.interval) / 10 ** 6
)
# set meta values and remove all not needed columns
cc_algo = transmission_df["congestion_control"].iloc[0]
cc_algo = cc_algo.upper()
transmission_direction = transmission_df["direction"].iloc[0]
transmission_df = transmission_df.filter(["goodput", "datetime", "ack_rtt", "goodput_rolling"])
# read serial csv
serial_df = pd.read_csv(args.serial_file)
serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
serial_df = serial_df.set_index("datetime")
serial_df.index = pd.to_datetime(serial_df.index)
transmission_df = pd.merge_asof(
transmission_df,
serial_df,
tolerance=pd.Timedelta("1s"),
right_index=True,
left_index=True,
)
# transmission timeline
scaley = 1.5
scalex = 1.0
fig, ax = plt.subplots(figsize=[6.4 * scaley, 4.8 * scalex])
plt.title("{} with {}".format(transmission_direction, cc_algo))
fig.subplots_adjust(right=0.75)
twin1 = ax.twinx()
twin2 = ax.twinx()
# Offset the right spine of twin2. The ticks and label have already been
# placed on the right by twinx above.
twin2.spines.right.set_position(("axes", 1.2))
# create list fo color indices
transmission_df["index"] = transmission_df.index
color_dict = dict()
color_list = list()
i = 0
for cell_id in transmission_df["cellID"]:
if cell_id not in color_dict:
color_dict[cell_id] = i
i += 1
color_list.append(color_dict[cell_id])
transmission_df["cell_color"] = color_list
color_dict = None
color_list = None
cmap = matplotlib.cm.get_cmap("Set3")
for c in transmission_df["cell_color"].unique():
bounds = transmission_df[["index", "cell_color"]].groupby("cell_color").agg(["min", "max"]).loc[c]
ax.axvspan(bounds.min(), bounds.max(), alpha=0.3, color=cmap.colors[c])
p1, = ax.plot(transmission_df["goodput_rolling"], "-", color="blue", label="goodput")
p2, = twin1.plot(transmission_df["downlink_cqi"], "--", color="green", label="CQI")
p3, = twin2.plot(transmission_df["ack_rtt"], "-.", color="red", label="ACK RTT")
ax.set_xlim(transmission_df["index"].min(), transmission_df["index"].max())
ax.set_ylim(0, 500)
twin1.set_ylim(0, 15)
twin2.set_ylim(0, 1)
ax.set_xlabel("Time")
ax.set_ylabel("Goodput")
twin1.set_ylabel("CQI")
twin2.set_ylabel("ACK RTT")
ax.yaxis.label.set_color(p1.get_color())
twin1.yaxis.label.set_color(p2.get_color())
twin2.yaxis.label.set_color(p3.get_color())
tkw = dict(size=4, width=1.5)
ax.tick_params(axis='y', colors=p1.get_color(), **tkw)
twin1.tick_params(axis='y', colors=p2.get_color(), **tkw)
twin2.tick_params(axis='y', colors=p3.get_color(), **tkw)
ax.tick_params(axis='x', **tkw)
#ax.legend(handles=[p1, p2, p3])
if args.save:
plt.savefig("{}timeline_plot.pdf".format(args.save))
else:
plt.show()
#goodput cdf
plt.clf()
print("Calculate and polt goodput CDF...")
plot_cdf(transmission_df, "goodput")
plt.xlabel("goodput [mbps]")
plt.ylabel("CDF")
plt.legend([cc_algo])
plt.title("{} with {}".format(transmission_direction, cc_algo))
if args.save:
plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "goodput"))
else:
plt.show()
# rtt cdf
plt.clf()
print(transmission_df["ack_rtt"])
print("Calculate and polt rtt CDF...")
plot_cdf(transmission_df, "ack_rtt")
plt.xlabel("ACK RTT [s]")
plt.ylabel("CDF")
plt.xscale("log")
plt.legend([cc_algo])
plt.title("{} with {}".format(transmission_direction, cc_algo))
if args.save:
plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "ack_rtt"))
else:
plt.show()