From 16b9929dcb7ee21fa5f12636372b8e8783e369dc Mon Sep 17 00:00:00 2001 From: Langspielplatte Date: Fri, 20 Jan 2023 10:40:58 +0100 Subject: [PATCH] Refactor and adds ne script (WIP) for plotting single transmissions --- plot_single_transmission_timeline.py | 273 +++++++++++++++++++++++++++ plot_transmission_timeline.py | 6 - 2 files changed, 273 insertions(+), 6 deletions(-) create mode 100755 plot_single_transmission_timeline.py diff --git a/plot_single_transmission_timeline.py b/plot_single_transmission_timeline.py new file mode 100755 index 0000000..1634089 --- /dev/null +++ b/plot_single_transmission_timeline.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 +import multiprocessing +import os +from argparse import ArgumentParser +from math import ceil +from time import sleep + +import matplotlib +import pandas as pd +import matplotlib.pyplot as plt +from mpl_toolkits import axisartist +from mpl_toolkits.axes_grid1 import host_subplot + + + +def csv_to_dataframe(csv_list, dummy): + + global n + global frame_list + + transmission_df = None + + for csv in csv_list: + tmp_df = pd.read_csv( + "{}{}".format(args.pcap_csv_folder, csv), + dtype=dict(is_retranmission=bool, is_dup_ack=bool), + ) + tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1) + tmp_df = tmp_df.set_index("datetime") + tmp_df.index = pd.to_datetime(tmp_df.index) + if transmission_df is None: + transmission_df = tmp_df + else: + transmission_df = pd.concat([transmission_df, tmp_df]) + + n.value += 1 + + frame_list.append(transmission_df) + + +from itertools import islice + +def chunk(it, size): + it = iter(it) + return iter(lambda: tuple(islice(it, size)), ()) + + +def plot_cdf(dataframe, column_name): + stats_df = dataframe \ + .groupby(column_name) \ + [column_name] \ + .agg("count") \ + .pipe(pd.DataFrame) \ + .rename(columns={column_name: "frequency"}) + + # PDF + stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"]) + + # CDF + stats_df["CDF"] = stats_df["PDF"].cumsum() + stats_df = stats_df.reset_index() + + stats_df.plot(x=column_name, y=["CDF"], grid=True) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.") + parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.") + parser.add_argument("--save", default=None, help="Location to save pdf file.") + parser.add_argument( + "-c", + "--cores", + default=1, + type=int, + help="Number of cores for multiprocessing.", + ) + parser.add_argument( + "-i", + "--interval", + default=10, + type=int, + help="Time interval for rolling window.", + ) + + args = parser.parse_args() + manager = multiprocessing.Manager() + n = manager.Value("i", 0) + frame_list = manager.list() + jobs = [] + + # load all pcap csv into one dataframe + pcap_csv_list = list() + for filename in os.listdir(args.pcap_csv_folder): + if filename.endswith(".csv") and "tcp" in filename: + pcap_csv_list.append(filename) + + parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores)) + print("Start processing with {} jobs.".format(args.cores)) + for p in parts: + process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy")) + jobs.append(process) + + for j in jobs: + j.start() + + print("Started all jobs.") + # Ensure all the processes have finished + finished_job_counter = 0 + working = ["|", "/", "-", "\\", "|", "/", "-", "\\"] + w = 0 + while len(jobs) != finished_job_counter: + sleep(1) + print( + "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format( + working[w], + working[w], + working[w], + len(jobs), + finished_job_counter, + n.value, + len(pcap_csv_list), + round((n.value / len(pcap_csv_list)) * 100, 2), + ), + end="", + ) + finished_job_counter = 0 + for j in jobs: + if not j.is_alive(): + finished_job_counter += 1 + if (w + 1) % len(working) == 0: + w = 0 + else: + w += 1 + print("\r\nSorting table...") + + transmission_df = pd.concat(frame_list) + frame_list = None + transmission_df = transmission_df.sort_index() + + print("Calculate goodput...") + + #print(transmission_df) + + # key for columns and level for index + transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum") + transmission_df["goodput"] = transmission_df["goodput"].apply( + lambda x: ((x * 8) / args.interval) / 10**6 + ) + + transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum() + transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply( + lambda x: ((x * 8) / args.interval) / 10 ** 6 + ) + + # set meta values and remove all not needed columns + cc_algo = transmission_df["congestion_control"].iloc[0] + cc_algo = cc_algo.upper() + transmission_direction = transmission_df["direction"].iloc[0] + + transmission_df = transmission_df.filter(["goodput", "datetime", "ack_rtt", "goodput_rolling"]) + + # read serial csv + serial_df = pd.read_csv(args.serial_file) + serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1) + serial_df = serial_df.set_index("datetime") + serial_df.index = pd.to_datetime(serial_df.index) + + transmission_df = pd.merge_asof( + transmission_df, + serial_df, + tolerance=pd.Timedelta("1s"), + right_index=True, + left_index=True, + ) + + # transmission timeline + + scaley = 1.5 + scalex = 1.0 + fig, ax = plt.subplots(figsize=[6.4 * scaley, 4.8 * scalex]) + plt.title("{} with {}".format(transmission_direction, cc_algo)) + fig.subplots_adjust(right=0.75) + + twin1 = ax.twinx() + twin2 = ax.twinx() + # Offset the right spine of twin2. The ticks and label have already been + # placed on the right by twinx above. + twin2.spines.right.set_position(("axes", 1.2)) + + + # create list fo color indices + transmission_df["index"] = transmission_df.index + color_dict = dict() + color_list = list() + i = 0 + for cell_id in transmission_df["cellID"]: + if cell_id not in color_dict: + color_dict[cell_id] = i + i += 1 + color_list.append(color_dict[cell_id]) + + transmission_df["cell_color"] = color_list + color_dict = None + color_list = None + + cmap = matplotlib.cm.get_cmap("Set3") + for c in transmission_df["cell_color"].unique(): + bounds = transmission_df[["index", "cell_color"]].groupby("cell_color").agg(["min", "max"]).loc[c] + ax.axvspan(bounds.min(), bounds.max(), alpha=0.3, color=cmap.colors[c]) + + p1, = ax.plot(transmission_df["goodput_rolling"], "-", color="blue", label="goodput") + p2, = twin1.plot(transmission_df["downlink_cqi"], "--", color="green", label="CQI") + p3, = twin2.plot(transmission_df["ack_rtt"], "-.", color="red", label="ACK RTT") + + ax.set_xlim(transmission_df["index"].min(), transmission_df["index"].max()) + ax.set_ylim(0, 500) + twin1.set_ylim(0, 15) + twin2.set_ylim(0, 1) + + ax.set_xlabel("Time") + ax.set_ylabel("Goodput") + twin1.set_ylabel("CQI") + twin2.set_ylabel("ACK RTT") + + ax.yaxis.label.set_color(p1.get_color()) + twin1.yaxis.label.set_color(p2.get_color()) + twin2.yaxis.label.set_color(p3.get_color()) + + tkw = dict(size=4, width=1.5) + ax.tick_params(axis='y', colors=p1.get_color(), **tkw) + twin1.tick_params(axis='y', colors=p2.get_color(), **tkw) + twin2.tick_params(axis='y', colors=p3.get_color(), **tkw) + ax.tick_params(axis='x', **tkw) + + #ax.legend(handles=[p1, p2, p3]) + + if args.save: + plt.savefig("{}timeline_plot.pdf".format(args.save)) + else: + plt.show() + + #goodput cdf + plt.clf() + + print("Calculate and polt goodput CDF...") + plot_cdf(transmission_df, "goodput") + plt.xlabel("goodput [mbps]") + plt.ylabel("CDF") + plt.legend([cc_algo]) + plt.title("{} with {}".format(transmission_direction, cc_algo)) + + if args.save: + plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "goodput")) + else: + plt.show() + + # rtt cdf + plt.clf() + + print(transmission_df["ack_rtt"]) + print("Calculate and polt rtt CDF...") + plot_cdf(transmission_df, "ack_rtt") + plt.xlabel("ACK RTT [s]") + plt.ylabel("CDF") + plt.xscale("log") + plt.legend([cc_algo]) + plt.title("{} with {}".format(transmission_direction, cc_algo)) + + if args.save: + plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "ack_rtt")) + else: + plt.show() \ No newline at end of file diff --git a/plot_transmission_timeline.py b/plot_transmission_timeline.py index 8bb7a99..1634089 100755 --- a/plot_transmission_timeline.py +++ b/plot_transmission_timeline.py @@ -65,15 +65,9 @@ def plot_cdf(dataframe, column_name): if __name__ == "__main__": parser = ArgumentParser() - parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.") parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.") parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.") parser.add_argument("--save", default=None, help="Location to save pdf file.") - parser.add_argument( - "--show_providerinfo", - default=False, - help="Show providerinfo for map tiles an zoom levels.", - ) parser.add_argument( "-c", "--cores",