diff --git a/calc_bandwidth_goodput_csv.py b/calc_bandwidth_goodput_csv.py new file mode 100644 index 0000000..e74b817 --- /dev/null +++ b/calc_bandwidth_goodput_csv.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +import math +import multiprocessing +import os +from argparse import ArgumentParser + +import matplotlib +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +import seaborn as sns + +sns.set() +#sns.set(font_scale=1.5) + +tex_fonts = { + "pgf.texsystem": "lualatex", + # "legend.fontsize": "x-large", + # "figure.figsize": (15, 5), + "axes.labelsize": 15, # "small", + # "axes.titlesize": "x-large", + "xtick.labelsize": 15, # "small", + "ytick.labelsize": 15, # "small", + "legend.fontsize": 15, + "axes.formatter.use_mathtext": True, + "mathtext.fontset": "dejavusans", +} + + +# plt.rcParams.update(tex_fonts) + + +def convert_cellid(value): + if isinstance(value, str): + try: + r = int(value.split(" ")[-1].replace("(", "").replace(")", "")) + return r + except Exception as e: + return -1 + else: + return int(-1) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.") + parser.add_argument( + "-p", "--pcap_csv_folder", required=True, help="PCAP csv folder." + ) + parser.add_argument("--save", required=True, help="Location to save pdf file.") + parser.add_argument( + "-i", + "--interval", + default=10, + type=int, + help="Time interval for rolling window.", + ) + + args = parser.parse_args() + + pcap_csv_list = list() + for filename in os.listdir(args.pcap_csv_folder): + if filename.endswith(".csv") and "tcp" in filename: + pcap_csv_list.append(filename) + + counter = 1 + if len(pcap_csv_list) == 0: + print("No CSV files found.") + + pcap_csv_list.sort(key=lambda x: int(x.split("_")[-1].replace(".csv", ""))) + + concat_frame = None + + for csv in pcap_csv_list: + print( + "\rProcessing {} out of {} CSVs.\t({}%)\t".format( + counter, len(pcap_csv_list), math.floor(counter / len(pcap_csv_list)) + ) + ) + + # try: + transmission_df = pd.read_csv( + "{}{}".format(args.pcap_csv_folder, csv), + dtype=dict(is_retranmission=bool, is_dup_ack=bool), + ) + + transmission_df["datetime"] = pd.to_datetime( + transmission_df["datetime"] + ) - pd.Timedelta(hours=1) + transmission_df = transmission_df.set_index("datetime") + transmission_df.index = pd.to_datetime(transmission_df.index) + transmission_df = transmission_df.sort_index() + + # srtt to [s] + transmission_df["srtt"] = transmission_df["srtt"].apply(lambda x: x / 10 ** 6) + + # key for columns and level for index + transmission_df["goodput"] = ( + transmission_df["payload_size"] + .groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))) + .transform("sum") + ) + transmission_df["goodput"] = transmission_df["goodput"].apply( + lambda x: ((x * 8) / args.interval) / 10 ** 6 + ) + + transmission_df["goodput_rolling"] = ( + transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum() + ) + transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply( + lambda x: ((x * 8) / args.interval) / 10 ** 6 + ) + + # set meta values and remove all not needed columns + cc_algo = transmission_df["congestion_control"].iloc[0] + cc_algo = cc_algo.upper() + transmission_direction = transmission_df["direction"].iloc[0] + + # transmission_df = transmission_df.filter(["goodput", "datetime", "ack_rtt", "goodput_rolling", "snd_cwnd"]) + + # read serial csv + serial_df = pd.read_csv( + args.serial_file, converters={"Cell_ID": convert_cellid} + ) + serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta( + hours=1 + ) + serial_df = serial_df.set_index("datetime") + serial_df.index = pd.to_datetime(serial_df.index) + serial_df.sort_index() + + # print(serial_df["Cell_ID"]) + + # serial_df["Cell_ID"] = serial_df["Cell_ID"].apply( + # lambda x: int(x.split(" ")[-1].replace("(", "").replace(")", ""))) + + transmission_df = pd.merge_asof( + transmission_df, + serial_df, + tolerance=pd.Timedelta("1s"), + right_index=True, + left_index=True, + ) + + transmission_df.index = transmission_df["arrival_time"] + + # replace 0 in RSRQ with Nan + transmission_df["NR5G_RSRQ_(dB)"] = transmission_df["NR5G_RSRQ_(dB)"].replace( + 0, np.NaN + ) + transmission_df["RSRQ_(dB)"] = transmission_df["RSRQ_(dB)"].replace(0, np.NaN) + + # filter active state + for i in range(1, 5): + transmission_df["LTE_SCC{}_effective_bw".format(i)] = transmission_df[ + "LTE_SCC{}_bw".format(i) + ] + + mask = transmission_df["LTE_SCC{}_state".format(i)].isin(["ACTIVE"]) + transmission_df["LTE_SCC{}_effective_bw".format(i)] = transmission_df[ + "LTE_SCC{}_effective_bw".format(i) + ].where(mask, other=0) + + # filter if sc is usesd for uplink + for i in range(1, 5): + mask = transmission_df["LTE_SCC{}_UL_Configured".format(i)].isin([False]) + transmission_df["LTE_SCC{}_effective_bw".format(i)] = transmission_df[ + "LTE_SCC{}_effective_bw".format(i) + ].where(mask, other=0) + + # sum all effective bandwidth for 5G and 4G + transmission_df["SCC1_NR5G_effective_bw"] = transmission_df[ + "SCC1_NR5G_bw" + ].fillna(0) + + transmission_df["lte_effective_bw_sum"] = ( + transmission_df["LTE_SCC1_effective_bw"].fillna(0) + + transmission_df["LTE_SCC2_effective_bw"].fillna(0) + + transmission_df["LTE_SCC3_effective_bw"].fillna(0) + + transmission_df["LTE_SCC4_effective_bw"].fillna(0) + + transmission_df["LTE_bw"].fillna(0)) + transmission_df["nr_effective_bw_sum"] = transmission_df["SCC1_NR5G_effective_bw"] + + transmission_df["effective_bw_sum"] = transmission_df["nr_effective_bw_sum"] + transmission_df[ + "lte_effective_bw_sum"] + + transmission_df = transmission_df.filter(["goodput", "effective_bw_sum"]) + + if concat_frame is None: + concat_frame = transmission_df + else: + concat_frame = pd.concat([concat_frame, transmission_df]) + + concat_frame.to_csv("_concat_bw_gp.csv".format(args.save)) + +