#!/usr/bin/env python3 import multiprocessing import os from argparse import ArgumentParser from math import ceil from time import sleep import matplotlib import pandas as pd import matplotlib.pyplot as plt from mpl_toolkits import axisartist from mpl_toolkits.axes_grid1 import host_subplot def csv_to_dataframe(csv_list, dummy): global n global frame_list transmission_df = None for csv in csv_list: tmp_df = pd.read_csv( "{}{}".format(args.pcap_csv_folder, csv), dtype=dict(is_retranmission=bool, is_dup_ack=bool), ) tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1) tmp_df = tmp_df.set_index("datetime") tmp_df.index = pd.to_datetime(tmp_df.index) if transmission_df is None: transmission_df = tmp_df else: transmission_df = pd.concat([transmission_df, tmp_df]) n.value += 1 frame_list.append(transmission_df) from itertools import islice def chunk(it, size): it = iter(it) return iter(lambda: tuple(islice(it, size)), ()) def plot_cdf(dataframe, column_name): stats_df = dataframe \ .groupby(column_name) \ [column_name] \ .agg("count") \ .pipe(pd.DataFrame) \ .rename(columns={column_name: "frequency"}) # PDF stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"]) # CDF stats_df["CDF"] = stats_df["PDF"].cumsum() stats_df = stats_df.reset_index() stats_df.plot(x=column_name, y=["CDF"], grid=True) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.") parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.") parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.") parser.add_argument("--save", default=None, help="Location to save pdf file.") parser.add_argument( "--show_providerinfo", default=False, help="Show providerinfo for map tiles an zoom levels.", ) parser.add_argument( "-c", "--cores", default=1, type=int, help="Number of cores for multiprocessing.", ) parser.add_argument( "-i", "--interval", default=10, type=int, help="Time interval for rolling window.", ) args = parser.parse_args() manager = multiprocessing.Manager() n = manager.Value("i", 0) frame_list = manager.list() jobs = [] # load all pcap csv into one dataframe pcap_csv_list = list() for filename in os.listdir(args.pcap_csv_folder): if filename.endswith(".csv") and "tcp" in filename: pcap_csv_list.append(filename) parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores)) print("Start processing with {} jobs.".format(args.cores)) for p in parts: process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy")) jobs.append(process) for j in jobs: j.start() print("Started all jobs.") # Ensure all the processes have finished finished_job_counter = 0 working = ["|", "/", "-", "\\", "|", "/", "-", "\\"] w = 0 while len(jobs) != finished_job_counter: sleep(1) print( "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format( working[w], working[w], working[w], len(jobs), finished_job_counter, n.value, len(pcap_csv_list), round((n.value / len(pcap_csv_list)) * 100, 2), ), end="", ) finished_job_counter = 0 for j in jobs: if not j.is_alive(): finished_job_counter += 1 if (w + 1) % len(working) == 0: w = 0 else: w += 1 print("\r\nSorting table...") transmission_df = pd.concat(frame_list) frame_list = None transmission_df = transmission_df.sort_index() print("Calculate goodput...") #print(transmission_df) # key for columns and level for index transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum") transmission_df["goodput"] = transmission_df["goodput"].apply( lambda x: ((x * 8) / args.interval) / 10**6 ) transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum() transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply( lambda x: ((x * 8) / args.interval) / 10 ** 6 ) # set meta values and remove all not needed columns cc_algo = transmission_df["congestion_control"].iloc[0] cc_algo = cc_algo.upper() transmission_direction = transmission_df["direction"].iloc[0] transmission_df = transmission_df.filter(["goodput", "datetime", "ack_rtt", "goodput_rolling"]) # read serial csv serial_df = pd.read_csv(args.serial_file) serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1) serial_df = serial_df.set_index("datetime") serial_df.index = pd.to_datetime(serial_df.index) transmission_df = pd.merge_asof( transmission_df, serial_df, tolerance=pd.Timedelta("1s"), right_index=True, left_index=True, ) # transmission timeline scaley = 1.5 scalex = 1.0 plt.figure(figsize=[6.4 * scaley, 4.8 * scalex]) plt.title("{} with {}".format(transmission_direction, cc_algo)) host = host_subplot(111, axes_class=axisartist.Axes) # create list fo color indices transmission_df["index"] = transmission_df.index color_dict = dict() color_list = list() i = 0 for cell_id in transmission_df["cellID"]: if cell_id not in color_dict: color_dict[cell_id] = i i += 1 color_list.append(color_dict[cell_id]) transmission_df["cell_color"] = color_list color_dict = None color_list = None cmap = matplotlib.cm.get_cmap("Set3") for c in transmission_df["cell_color"].unique(): bounds = transmission_df[["index", "cell_color"]].groupby("cell_color").agg(["min", "max"]).loc[c] host.axvspan(bounds.min(), bounds.max(), alpha=0.3, color=cmap.colors[c]) plt.subplots_adjust() host.plot(transmission_df["goodput_rolling"], "-", color="blue", label="goodput") host.set_xlabel("datetime") host.set_ylabel("goodput [Mbps]") host.set_ylim([0, 500]) # additional y axes #par11 = host.twinx() #par12 = host.twinx() # par13 = host.twinx() # axes offset #par12.axis["right"] = par12.new_fixed_axis(loc="right", offset=(60, 0)) # par13.axis["right"] = par13.new_fixed_axis(loc="right", offset=(120, 0)) #par11.axis["right"].toggle(all=True) #par12.axis["right"].toggle(all=True) # par13.axis["right"].toggle(all=True) #par11.plot(transmission_df["downlink_cqi"], "--", color="green", label="CQI") #par11.set_ylabel("CQI") #par11.set_ylim([0, 15]) #par12.plot(transmission_df["ack_rtt"], "-.", color="red", label="ACK RTT") #par12.set_ylabel("ACK RTT [s]") #par12.set_ylim([0, 1]) if args.save: plt.savefig("{}timeline_plot.pdf".format(args.save)) else: plt.show() #goodput cdf plt.clf() print("Calculate and polt goodput CDF...") plot_cdf(transmission_df, "goodput") plt.xlabel("goodput [mbps]") plt.ylabel("CDF") plt.legend([cc_algo]) plt.title("{} with {}".format(transmission_direction, cc_algo)) if args.save: plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "goodput")) else: plt.show() # rtt cdf plt.clf() print("Calculate and polt rtt CDF...") plot_cdf(transmission_df, "ack_rtt") plt.xlabel("ACK RTT [s]") plt.ylabel("CDF") plt.legend([cc_algo]) plt.title("{} with {}".format(transmission_direction, cc_algo)) if args.save: plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "ack_rtt")) else: plt.show()