#!/usr/bin/env python3 import multiprocessing import os import pickle from argparse import ArgumentParser from math import ceil from time import sleep import matplotlib import pandas as pd import matplotlib.pyplot as plt from mpl_toolkits import axisartist from mpl_toolkits.axes_grid1 import host_subplot def csv_to_dataframe(csv_list, dummy): global n global frame_list transmission_df = None for csv in csv_list: tmp_df = pd.read_csv( "{}{}".format(args.pcap_csv_folder, csv), dtype=dict(is_retranmission=bool, is_dup_ack=bool), ) tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1) tmp_df = tmp_df.set_index("datetime") tmp_df.index = pd.to_datetime(tmp_df.index) if transmission_df is None: transmission_df = tmp_df else: transmission_df = pd.concat([transmission_df, tmp_df]) n.value += 1 frame_list.append(transmission_df) from itertools import islice def chunk(it, size): it = iter(it) return iter(lambda: tuple(islice(it, size)), ()) def plot_cdf(dataframe, column_name): stats_df = dataframe \ .groupby(column_name) \ [column_name] \ .agg("count") \ .pipe(pd.DataFrame) \ .rename(columns={column_name: "frequency"}) # PDF stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"]) # CDF stats_df["CDF"] = stats_df["PDF"].cumsum() stats_df = stats_df.reset_index() stats_df.plot(x=column_name, y=["CDF"], grid=True) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.") parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.") parser.add_argument("--save", default=None, help="Location to save pdf file.") parser.add_argument("--export", default=None, help="Export figure as an pickle file.") parser.add_argument( "-c", "--cores", default=1, type=int, help="Number of cores for multiprocessing.", ) parser.add_argument( "-i", "--interval", default=10, type=int, help="Time interval for rolling window.", ) args = parser.parse_args() manager = multiprocessing.Manager() n = manager.Value("i", 0) frame_list = manager.list() jobs = [] # load all pcap csv into one dataframe pcap_csv_list = list() for filename in os.listdir(args.pcap_csv_folder): if filename.endswith(".csv") and "tcp" in filename: pcap_csv_list.append(filename) parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores)) print("Start processing with {} jobs.".format(args.cores)) for p in parts: process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy")) jobs.append(process) for j in jobs: j.start() print("Started all jobs.") # Ensure all the processes have finished finished_job_counter = 0 working = ["|", "/", "-", "\\", "|", "/", "-", "\\"] w = 0 while len(jobs) != finished_job_counter: sleep(1) print( "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format( working[w], working[w], working[w], len(jobs), finished_job_counter, n.value, len(pcap_csv_list), round((n.value / len(pcap_csv_list)) * 100, 2), ), end="", ) finished_job_counter = 0 for j in jobs: if not j.is_alive(): finished_job_counter += 1 if (w + 1) % len(working) == 0: w = 0 else: w += 1 print("\r\nSorting table...") transmission_df = pd.concat(frame_list) frame_list = None transmission_df = transmission_df.sort_index() print("Calculate goodput...") #print(transmission_df) # key for columns and level for index transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum") transmission_df["goodput"] = transmission_df["goodput"].apply( lambda x: ((x * 8) / args.interval) / 10**6 ) transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum() transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply( lambda x: ((x * 8) / args.interval) / 10 ** 6 ) # set meta values and remove all not needed columns cc_algo = transmission_df["congestion_control"].iloc[0] cc_algo = cc_algo.upper() transmission_direction = transmission_df["direction"].iloc[0] transmission_df = transmission_df.filter(["goodput", "datetime", "ack_rtt", "goodput_rolling"]) # read serial csv serial_df = pd.read_csv(args.serial_file) serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1) serial_df = serial_df.set_index("datetime") serial_df.index = pd.to_datetime(serial_df.index) serial_df.sort_index() transmission_df = pd.merge_asof( transmission_df, serial_df, tolerance=pd.Timedelta("1s"), right_index=True, left_index=True, ) # transmission timeline scaley = 1.5 scalex = 1.0 fig, ax = plt.subplots(figsize=[6.4 * scaley, 4.8 * scalex]) plt.title("{} with {}".format(transmission_direction, cc_algo)) fig.subplots_adjust(right=0.75) twin1 = ax.twinx() twin2 = ax.twinx() # Offset the right spine of twin2. The ticks and label have already been # placed on the right by twinx above. twin2.spines.right.set_position(("axes", 1.2)) # create list fo color indices transmission_df["index"] = transmission_df.index color_dict = dict() color_list = list() i = 0 for cell_id in transmission_df["cellID"]: if cell_id not in color_dict: color_dict[cell_id] = i i += 1 color_list.append(color_dict[cell_id]) transmission_df["cell_color"] = color_list color_dict = None color_list = None cmap = matplotlib.cm.get_cmap("Set3") unique_cells = transmission_df["cell_color"].unique() color_list = cmap.colors * (round(len(unique_cells) / len(cmap.colors)) + 1) for c in transmission_df["cell_color"].unique(): bounds = transmission_df[["index", "cell_color"]].groupby("cell_color").agg(["min", "max"]).loc[c] ax.axvspan(bounds.min(), bounds.max(), alpha=0.3, color=color_list[c]) p1, = ax.plot(transmission_df["goodput_rolling"], "-", color="blue", label="goodput") p2, = twin1.plot(transmission_df["downlink_cqi"], "--", color="green", label="CQI") p3, = twin2.plot(transmission_df["ack_rtt"], "-.", color="red", label="ACK RTT") ax.set_xlim(transmission_df["index"].min(), transmission_df["index"].max()) ax.set_ylim(0, 500) twin1.set_ylim(0, 15) twin2.set_ylim(0, 1) ax.set_xlabel("Time") ax.set_ylabel("Goodput") twin1.set_ylabel("CQI") twin2.set_ylabel("ACK RTT") ax.yaxis.label.set_color(p1.get_color()) twin1.yaxis.label.set_color(p2.get_color()) twin2.yaxis.label.set_color(p3.get_color()) tkw = dict(size=4, width=1.5) ax.tick_params(axis='y', colors=p1.get_color(), **tkw) twin1.tick_params(axis='y', colors=p2.get_color(), **tkw) twin2.tick_params(axis='y', colors=p3.get_color(), **tkw) ax.tick_params(axis='x', **tkw) #ax.legend(handles=[p1, p2, p3]) if args.save: plt.savefig("{}timeline_plot.pdf".format(args.save)) if args.export: pickle.dump(fig, open("{}timeline_plot.pkl".format(args.export), "wb")) #goodput cdf plt.clf() print("Calculate and polt goodput CDF...") plot_cdf(transmission_df, "goodput") plt.xlabel("goodput [mbps]") plt.ylabel("CDF") plt.legend([cc_algo]) plt.title("{} with {}".format(transmission_direction, cc_algo)) if args.save: plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "goodput")) else: plt.show() # rtt cdf plt.clf() print(transmission_df["ack_rtt"]) print("Calculate and polt rtt CDF...") plot_cdf(transmission_df, "ack_rtt") plt.xlabel("ACK RTT [s]") plt.ylabel("CDF") plt.xscale("log") plt.legend([cc_algo]) plt.title("{} with {}".format(transmission_direction, cc_algo)) if args.save: plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "ack_rtt")) else: plt.show()