diff --git a/plot_single_transmission_timeline.py b/plot_single_transmission_timeline.py index 1634089..b968217 100755 --- a/plot_single_transmission_timeline.py +++ b/plot_single_transmission_timeline.py @@ -1,66 +1,12 @@ #!/usr/bin/env python3 +import math import multiprocessing import os from argparse import ArgumentParser -from math import ceil -from time import sleep import matplotlib import pandas as pd import matplotlib.pyplot as plt -from mpl_toolkits import axisartist -from mpl_toolkits.axes_grid1 import host_subplot - - - -def csv_to_dataframe(csv_list, dummy): - - global n - global frame_list - - transmission_df = None - - for csv in csv_list: - tmp_df = pd.read_csv( - "{}{}".format(args.pcap_csv_folder, csv), - dtype=dict(is_retranmission=bool, is_dup_ack=bool), - ) - tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1) - tmp_df = tmp_df.set_index("datetime") - tmp_df.index = pd.to_datetime(tmp_df.index) - if transmission_df is None: - transmission_df = tmp_df - else: - transmission_df = pd.concat([transmission_df, tmp_df]) - - n.value += 1 - - frame_list.append(transmission_df) - - -from itertools import islice - -def chunk(it, size): - it = iter(it) - return iter(lambda: tuple(islice(it, size)), ()) - - -def plot_cdf(dataframe, column_name): - stats_df = dataframe \ - .groupby(column_name) \ - [column_name] \ - .agg("count") \ - .pipe(pd.DataFrame) \ - .rename(columns={column_name: "frequency"}) - - # PDF - stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"]) - - # CDF - stats_df["CDF"] = stats_df["PDF"].cumsum() - stats_df = stats_df.reset_index() - - stats_df.plot(x=column_name, y=["CDF"], grid=True) if __name__ == "__main__": @@ -68,13 +14,6 @@ if __name__ == "__main__": parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.") parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.") parser.add_argument("--save", default=None, help="Location to save pdf file.") - parser.add_argument( - "-c", - "--cores", - default=1, - type=int, - help="Number of cores for multiprocessing.", - ) parser.add_argument( "-i", "--interval", @@ -95,179 +34,132 @@ if __name__ == "__main__": if filename.endswith(".csv") and "tcp" in filename: pcap_csv_list.append(filename) - parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores)) - print("Start processing with {} jobs.".format(args.cores)) - for p in parts: - process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy")) - jobs.append(process) + counter = 1 + if len(pcap_csv_list) == 0: + print("No CSV files found.") - for j in jobs: - j.start() + pcap_csv_list.sort(key=lambda x: int(x.split("_")[-1].replace(".csv", ""))) - print("Started all jobs.") - # Ensure all the processes have finished - finished_job_counter = 0 - working = ["|", "/", "-", "\\", "|", "/", "-", "\\"] - w = 0 - while len(jobs) != finished_job_counter: - sleep(1) - print( - "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format( - working[w], - working[w], - working[w], - len(jobs), - finished_job_counter, - n.value, - len(pcap_csv_list), - round((n.value / len(pcap_csv_list)) * 100, 2), - ), - end="", + for csv in pcap_csv_list: + + print("\rProcessing {} out of {} CSVs.\t({}%)\t".format(counter, len(pcap_csv_list), math.floor(counter/len(pcap_csv_list)))) + transmission_df = pd.read_csv( + "{}{}".format(args.pcap_csv_folder, csv), + dtype=dict(is_retranmission=bool, is_dup_ack=bool), ) - finished_job_counter = 0 - for j in jobs: - if not j.is_alive(): - finished_job_counter += 1 - if (w + 1) % len(working) == 0: - w = 0 + transmission_df["datetime"] = pd.to_datetime(transmission_df["datetime"]) - pd.Timedelta(hours=1) + transmission_df = transmission_df.set_index("datetime") + transmission_df.index = pd.to_datetime(transmission_df.index) + transmission_df = transmission_df.sort_index() + + #print("Calculate goodput...") + + #print(transmission_df) + + # key for columns and level for index + transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum") + transmission_df["goodput"] = transmission_df["goodput"].apply( + lambda x: ((x * 8) / args.interval) / 10**6 + ) + + transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum() + transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply( + lambda x: ((x * 8) / args.interval) / 10 ** 6 + ) + + # set meta values and remove all not needed columns + cc_algo = transmission_df["congestion_control"].iloc[0] + cc_algo = cc_algo.upper() + transmission_direction = transmission_df["direction"].iloc[0] + + #transmission_df = transmission_df.filter(["goodput", "datetime", "ack_rtt", "goodput_rolling", "snd_cwnd"]) + + # read serial csv + serial_df = pd.read_csv(args.serial_file) + serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1) + serial_df = serial_df.set_index("datetime") + serial_df.index = pd.to_datetime(serial_df.index) + + transmission_df = pd.merge_asof( + transmission_df, + serial_df, + tolerance=pd.Timedelta("1s"), + right_index=True, + left_index=True, + ) + + # transmission timeline + + scaley = 1.5 + scalex = 1.0 + fig, ax = plt.subplots(figsize=[6.4 * scaley, 4.8 * scalex]) + plt.title("{} with {}".format(transmission_direction, cc_algo)) + fig.subplots_adjust(right=0.75) + + twin1 = ax.twinx() + twin2 = ax.twinx() + twin3 = ax.twinx() + # Offset the right spine of twin2. The ticks and label have already been + # placed on the right by twinx above. + twin2.spines.right.set_position(("axes", 1.1)) + twin3.spines.right.set_position(("axes", 1.2)) + + + # create list fo color indices + transmission_df["index"] = transmission_df.index + color_dict = dict() + color_list = list() + i = 0 + for cell_id in transmission_df["cellID"]: + if cell_id not in color_dict: + color_dict[cell_id] = i + i += 1 + color_list.append(color_dict[cell_id]) + + transmission_df["cell_color"] = color_list + color_dict = None + color_list = None + + cmap = matplotlib.cm.get_cmap("Set3") + for c in transmission_df["cell_color"].unique(): + bounds = transmission_df[["index", "cell_color"]].groupby("cell_color").agg(["min", "max"]).loc[c] + ax.axvspan(bounds.min(), bounds.max(), alpha=0.3, color=cmap.colors[c]) + + p4, = twin3.plot(transmission_df["snd_cwnd"].dropna(), color="lime", linestyle="dashed", label="cwnd") + p3, = twin2.plot(transmission_df["ack_rtt"].dropna(), color="red", linestyle="dashdot", label="ACK RTT") + p1, = ax.plot(transmission_df["goodput_rolling"], color="blue", linestyle="solid", label="goodput") + p2, = twin1.plot(transmission_df["downlink_cqi"].dropna(), color="magenta", linestyle="dotted", label="CQI") + + ax.set_xlim(transmission_df["index"].min(), transmission_df["index"].max()) + ax.set_ylim(0, 500) + twin1.set_ylim(0, 15) + twin2.set_ylim(0, transmission_df["ack_rtt"].max()) + twin3.set_ylim(0, transmission_df["snd_cwnd"].max() + 10) + + ax.set_xlabel("arrival time") + ax.set_ylabel("Goodput [mbps]") + twin1.set_ylabel("CQI") + twin2.set_ylabel("ACK RTT [s]") + twin3.set_ylabel("cwnd") + + ax.yaxis.label.set_color(p1.get_color()) + twin1.yaxis.label.set_color(p2.get_color()) + twin2.yaxis.label.set_color(p3.get_color()) + twin3.yaxis.label.set_color(p4.get_color()) + + tkw = dict(size=4, width=1.5) + ax.tick_params(axis='y', colors=p1.get_color(), **tkw) + twin1.tick_params(axis='y', colors=p2.get_color(), **tkw) + twin2.tick_params(axis='y', colors=p3.get_color(), **tkw) + twin3.tick_params(axis='y', colors=p4.get_color(), **tkw) + ax.tick_params(axis='x', **tkw) + + #ax.legend(handles=[p1, p2, p3]) + + if args.save: + plt.savefig("{}{}_plot.pdf".format(args.save, csv.replace(".csv", ""))) else: - w += 1 - print("\r\nSorting table...") + plt.show() + counter += 1 - transmission_df = pd.concat(frame_list) - frame_list = None - transmission_df = transmission_df.sort_index() - - print("Calculate goodput...") - - #print(transmission_df) - - # key for columns and level for index - transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum") - transmission_df["goodput"] = transmission_df["goodput"].apply( - lambda x: ((x * 8) / args.interval) / 10**6 - ) - - transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum() - transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply( - lambda x: ((x * 8) / args.interval) / 10 ** 6 - ) - - # set meta values and remove all not needed columns - cc_algo = transmission_df["congestion_control"].iloc[0] - cc_algo = cc_algo.upper() - transmission_direction = transmission_df["direction"].iloc[0] - - transmission_df = transmission_df.filter(["goodput", "datetime", "ack_rtt", "goodput_rolling"]) - - # read serial csv - serial_df = pd.read_csv(args.serial_file) - serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1) - serial_df = serial_df.set_index("datetime") - serial_df.index = pd.to_datetime(serial_df.index) - - transmission_df = pd.merge_asof( - transmission_df, - serial_df, - tolerance=pd.Timedelta("1s"), - right_index=True, - left_index=True, - ) - - # transmission timeline - - scaley = 1.5 - scalex = 1.0 - fig, ax = plt.subplots(figsize=[6.4 * scaley, 4.8 * scalex]) - plt.title("{} with {}".format(transmission_direction, cc_algo)) - fig.subplots_adjust(right=0.75) - - twin1 = ax.twinx() - twin2 = ax.twinx() - # Offset the right spine of twin2. The ticks and label have already been - # placed on the right by twinx above. - twin2.spines.right.set_position(("axes", 1.2)) - - - # create list fo color indices - transmission_df["index"] = transmission_df.index - color_dict = dict() - color_list = list() - i = 0 - for cell_id in transmission_df["cellID"]: - if cell_id not in color_dict: - color_dict[cell_id] = i - i += 1 - color_list.append(color_dict[cell_id]) - - transmission_df["cell_color"] = color_list - color_dict = None - color_list = None - - cmap = matplotlib.cm.get_cmap("Set3") - for c in transmission_df["cell_color"].unique(): - bounds = transmission_df[["index", "cell_color"]].groupby("cell_color").agg(["min", "max"]).loc[c] - ax.axvspan(bounds.min(), bounds.max(), alpha=0.3, color=cmap.colors[c]) - - p1, = ax.plot(transmission_df["goodput_rolling"], "-", color="blue", label="goodput") - p2, = twin1.plot(transmission_df["downlink_cqi"], "--", color="green", label="CQI") - p3, = twin2.plot(transmission_df["ack_rtt"], "-.", color="red", label="ACK RTT") - - ax.set_xlim(transmission_df["index"].min(), transmission_df["index"].max()) - ax.set_ylim(0, 500) - twin1.set_ylim(0, 15) - twin2.set_ylim(0, 1) - - ax.set_xlabel("Time") - ax.set_ylabel("Goodput") - twin1.set_ylabel("CQI") - twin2.set_ylabel("ACK RTT") - - ax.yaxis.label.set_color(p1.get_color()) - twin1.yaxis.label.set_color(p2.get_color()) - twin2.yaxis.label.set_color(p3.get_color()) - - tkw = dict(size=4, width=1.5) - ax.tick_params(axis='y', colors=p1.get_color(), **tkw) - twin1.tick_params(axis='y', colors=p2.get_color(), **tkw) - twin2.tick_params(axis='y', colors=p3.get_color(), **tkw) - ax.tick_params(axis='x', **tkw) - - #ax.legend(handles=[p1, p2, p3]) - - if args.save: - plt.savefig("{}timeline_plot.pdf".format(args.save)) - else: - plt.show() - - #goodput cdf - plt.clf() - - print("Calculate and polt goodput CDF...") - plot_cdf(transmission_df, "goodput") - plt.xlabel("goodput [mbps]") - plt.ylabel("CDF") - plt.legend([cc_algo]) - plt.title("{} with {}".format(transmission_direction, cc_algo)) - - if args.save: - plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "goodput")) - else: - plt.show() - - # rtt cdf - plt.clf() - - print(transmission_df["ack_rtt"]) - print("Calculate and polt rtt CDF...") - plot_cdf(transmission_df, "ack_rtt") - plt.xlabel("ACK RTT [s]") - plt.ylabel("CDF") - plt.xscale("log") - plt.legend([cc_algo]) - plt.title("{} with {}".format(transmission_direction, cc_algo)) - - if args.save: - plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "ack_rtt")) - else: - plt.show() \ No newline at end of file + plt.clf() \ No newline at end of file