|
- #!/usr/bin/env python3
- import multiprocessing
- import os
- import pickle
- from argparse import ArgumentParser
- from math import ceil
- from time import sleep
-
- import matplotlib
- import pandas as pd
- import matplotlib.pyplot as plt
- from mpl_toolkits import axisartist
- from mpl_toolkits.axes_grid1 import host_subplot
-
-
-
- def csv_to_dataframe(csv_list, dummy):
-
- global n
- global frame_list
-
- transmission_df = None
-
- for csv in csv_list:
- tmp_df = pd.read_csv(
- "{}{}".format(args.pcap_csv_folder, csv),
- dtype=dict(is_retranmission=bool, is_dup_ack=bool),
- )
- tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
- tmp_df = tmp_df.set_index("datetime")
- tmp_df.index = pd.to_datetime(tmp_df.index)
- if transmission_df is None:
- transmission_df = tmp_df
- else:
- transmission_df = pd.concat([transmission_df, tmp_df])
-
- n.value += 1
-
- frame_list.append(transmission_df)
-
-
- from itertools import islice
-
- def chunk(it, size):
- it = iter(it)
- return iter(lambda: tuple(islice(it, size)), ())
-
-
- def plot_cdf(dataframe, column_name):
- stats_df = dataframe \
- .groupby(column_name) \
- [column_name] \
- .agg("count") \
- .pipe(pd.DataFrame) \
- .rename(columns={column_name: "frequency"})
-
- # PDF
- stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"])
-
- # CDF
- stats_df["CDF"] = stats_df["PDF"].cumsum()
- stats_df = stats_df.reset_index()
-
- stats_df.plot(x=column_name, y=["CDF"], grid=True)
-
-
- if __name__ == "__main__":
- parser = ArgumentParser()
- parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
- parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
- parser.add_argument("--save", default=None, help="Location to save pdf file.")
- parser.add_argument("--export", default=None, help="Export figure as an pickle file.")
- parser.add_argument(
- "-c",
- "--cores",
- default=1,
- type=int,
- help="Number of cores for multiprocessing.",
- )
- parser.add_argument(
- "-i",
- "--interval",
- default=10,
- type=int,
- help="Time interval for rolling window.",
- )
-
- args = parser.parse_args()
- manager = multiprocessing.Manager()
- n = manager.Value("i", 0)
- frame_list = manager.list()
- jobs = []
-
- # load all pcap csv into one dataframe
- pcap_csv_list = list()
- for filename in os.listdir(args.pcap_csv_folder):
- if filename.endswith(".csv") and "tcp" in filename:
- pcap_csv_list.append(filename)
-
- parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
- print("Start processing with {} jobs.".format(args.cores))
- for p in parts:
- process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
- jobs.append(process)
-
- for j in jobs:
- j.start()
-
- print("Started all jobs.")
- # Ensure all the processes have finished
- finished_job_counter = 0
- working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
- w = 0
- while len(jobs) != finished_job_counter:
- sleep(1)
- print(
- "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
- working[w],
- working[w],
- working[w],
- len(jobs),
- finished_job_counter,
- n.value,
- len(pcap_csv_list),
- round((n.value / len(pcap_csv_list)) * 100, 2),
- ),
- end="",
- )
- finished_job_counter = 0
- for j in jobs:
- if not j.is_alive():
- finished_job_counter += 1
- if (w + 1) % len(working) == 0:
- w = 0
- else:
- w += 1
- print("\r\nSorting table...")
-
- transmission_df = pd.concat(frame_list)
- frame_list = None
- transmission_df = transmission_df.sort_index()
-
- print("Calculate goodput...")
-
- #print(transmission_df)
-
- # srtt to [s]
- transmission_df["srtt"] = transmission_df["srtt"].apply(lambda x: x / 10 ** 6)
-
- # key for columns and level for index
- transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum")
- transmission_df["goodput"] = transmission_df["goodput"].apply(
- lambda x: ((x * 8) / args.interval) / 10**6
- )
-
- transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
- transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply(
- lambda x: ((x * 8) / args.interval) / 10 ** 6
- )
-
- # set meta values and remove all not needed columns
- cc_algo = transmission_df["congestion_control"].iloc[0]
- cc_algo = cc_algo.upper()
- transmission_direction = transmission_df["direction"].iloc[0]
-
- transmission_df = transmission_df.filter(["goodput", "datetime", "srtt", "goodput_rolling"])
-
- # read serial csv
- serial_df = pd.read_csv(args.serial_file)
- serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
- serial_df = serial_df.set_index("datetime")
- serial_df.index = pd.to_datetime(serial_df.index)
- serial_df.sort_index()
-
- transmission_df = pd.merge_asof(
- transmission_df,
- serial_df,
- tolerance=pd.Timedelta("1s"),
- right_index=True,
- left_index=True,
- )
-
- # transmission timeline
-
- scaley = 1.5
- scalex = 1.0
- fig, ax = plt.subplots(figsize=[6.4 * scaley, 4.8 * scalex])
- plt.title("{} with {}".format(transmission_direction, cc_algo))
- fig.subplots_adjust(right=0.75)
-
- twin1 = ax.twinx()
- twin2 = ax.twinx()
- # Offset the right spine of twin2. The ticks and label have already been
- # placed on the right by twinx above.
- twin2.spines.right.set_position(("axes", 1.2))
-
-
- # create list fo color indices
- transmission_df["index"] = transmission_df.index
- color_dict = dict()
- color_list = list()
- i = 0
- for cell_id in transmission_df["cellID"]:
- if cell_id not in color_dict:
- color_dict[cell_id] = i
- i += 1
- color_list.append(color_dict[cell_id])
-
- transmission_df["cell_color"] = color_list
- color_dict = None
- color_list = None
-
- cmap = matplotlib.cm.get_cmap("Set3")
- unique_cells = transmission_df["cell_color"].unique()
- color_list = cmap.colors * (round(len(unique_cells) / len(cmap.colors)) + 1)
-
- for c in transmission_df["cell_color"].unique():
- bounds = transmission_df[["index", "cell_color"]].groupby("cell_color").agg(["min", "max"]).loc[c]
- ax.axvspan(bounds.min(), bounds.max(), alpha=0.3, color=color_list[c])
-
- p1, = ax.plot(transmission_df["goodput_rolling"], "-", color="blue", label="goodput")
- p2, = twin1.plot(transmission_df["downlink_cqi"], "--", color="green", label="CQI")
- p3, = twin2.plot(transmission_df["srtt"], "-.", color="red", label="sRTT")
-
- ax.set_xlim(transmission_df["index"].min(), transmission_df["index"].max())
- ax.set_ylim(0, 500)
- twin1.set_ylim(0, 15)
- twin2.set_ylim(0, 1)
-
- ax.set_xlabel("Time")
- ax.set_ylabel("Goodput")
- twin1.set_ylabel("CQI")
- twin2.set_ylabel("sRTT")
-
- ax.yaxis.label.set_color(p1.get_color())
- twin1.yaxis.label.set_color(p2.get_color())
- twin2.yaxis.label.set_color(p3.get_color())
-
- tkw = dict(size=4, width=1.5)
- ax.tick_params(axis='y', colors=p1.get_color(), **tkw)
- twin1.tick_params(axis='y', colors=p2.get_color(), **tkw)
- twin2.tick_params(axis='y', colors=p3.get_color(), **tkw)
- ax.tick_params(axis='x', **tkw)
-
- #ax.legend(handles=[p1, p2, p3])
-
- if args.save:
- plt.savefig("{}timeline_plot.pdf".format(args.save))
- if args.export:
- pickle.dump(fig, open("{}timeline_plot.pkl".format(args.export), "wb"))
-
- #goodput cdf
- plt.clf()
-
- print("Calculate and polt goodput CDF...")
- plot_cdf(transmission_df, "goodput")
- plt.xlabel("goodput [mbps]")
- plt.ylabel("CDF")
- plt.legend([cc_algo])
- plt.title("{} with {}".format(transmission_direction, cc_algo))
-
- if args.save:
- plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "goodput"))
- else:
- plt.show()
-
- # rtt cdf
- plt.clf()
-
- print("Calculate and polt rtt CDF...")
- plot_cdf(transmission_df, "srtt")
- plt.xlabel("sRTT [s]")
- plt.ylabel("CDF")
- plt.xscale("log")
- plt.legend([cc_algo])
- plt.title("{} with {}".format(transmission_direction, cc_algo))
-
- if args.save:
- plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "srtt"))
- else:
- plt.show()
|