#!/usr/bin/env python3 import math import multiprocessing import os from argparse import ArgumentParser import matplotlib import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns sns.set() #sns.set(font_scale=1.5) tex_fonts = { "pgf.texsystem": "lualatex", # "legend.fontsize": "x-large", # "figure.figsize": (15, 5), "axes.labelsize": 15, # "small", # "axes.titlesize": "x-large", "xtick.labelsize": 15, # "small", "ytick.labelsize": 15, # "small", "legend.fontsize": 15, "axes.formatter.use_mathtext": True, "mathtext.fontset": "dejavusans", } # plt.rcParams.update(tex_fonts) def convert_cellid(value): if isinstance(value, str): try: r = int(value.split(" ")[-1].replace("(", "").replace(")", "")) return r except Exception as e: return -1 else: return int(-1) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.") parser.add_argument( "-p", "--pcap_csv_folder", required=True, help="PCAP csv folder." ) parser.add_argument("--save", required=True, help="Location to save pdf file.") parser.add_argument( "-i", "--interval", default=10, type=int, help="Time interval for rolling window.", ) args = parser.parse_args() pcap_csv_list = list() for filename in os.listdir(args.pcap_csv_folder): if filename.endswith(".csv") and "tcp" in filename: pcap_csv_list.append(filename) counter = 1 if len(pcap_csv_list) == 0: print("No CSV files found.") pcap_csv_list.sort(key=lambda x: int(x.split("_")[-1].replace(".csv", ""))) concat_frame = None for csv in pcap_csv_list: print( "\rProcessing {} out of {} CSVs.\t({}%)\t".format( counter, len(pcap_csv_list), math.floor(counter / len(pcap_csv_list)) ) ) # try: transmission_df = pd.read_csv( "{}{}".format(args.pcap_csv_folder, csv), dtype=dict(is_retranmission=bool, is_dup_ack=bool), ) transmission_df["datetime"] = pd.to_datetime( transmission_df["datetime"] ) - pd.Timedelta(hours=1) transmission_df = transmission_df.set_index("datetime") transmission_df.index = pd.to_datetime(transmission_df.index) transmission_df = transmission_df.sort_index() # srtt to [s] transmission_df["srtt"] = transmission_df["srtt"].apply(lambda x: x / 10 ** 6) # key for columns and level for index transmission_df["goodput"] = ( transmission_df["payload_size"] .groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))) .transform("sum") ) transmission_df["goodput"] = transmission_df["goodput"].apply( lambda x: ((x * 8) / args.interval) / 10 ** 6 ) transmission_df["goodput_rolling"] = ( transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum() ) transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply( lambda x: ((x * 8) / args.interval) / 10 ** 6 ) # set meta values and remove all not needed columns cc_algo = transmission_df["congestion_control"].iloc[0] cc_algo = cc_algo.upper() transmission_direction = transmission_df["direction"].iloc[0] # transmission_df = transmission_df.filter(["goodput", "datetime", "ack_rtt", "goodput_rolling", "snd_cwnd"]) # read serial csv serial_df = pd.read_csv( args.serial_file, converters={"Cell_ID": convert_cellid} ) serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta( hours=1 ) serial_df = serial_df.set_index("datetime") serial_df.index = pd.to_datetime(serial_df.index) serial_df.sort_index() # print(serial_df["Cell_ID"]) # serial_df["Cell_ID"] = serial_df["Cell_ID"].apply( # lambda x: int(x.split(" ")[-1].replace("(", "").replace(")", ""))) transmission_df = pd.merge_asof( transmission_df, serial_df, tolerance=pd.Timedelta("1s"), right_index=True, left_index=True, ) #transmission_df.index = transmission_df["arrival_time"] # replace 0 in RSRQ with Nan transmission_df["NR5G_RSRQ_(dB)"] = transmission_df["NR5G_RSRQ_(dB)"].replace( 0, np.NaN ) transmission_df["RSRQ_(dB)"] = transmission_df["RSRQ_(dB)"].replace(0, np.NaN) # filter active state for i in range(1, 5): transmission_df["LTE_SCC{}_effective_bw".format(i)] = transmission_df[ "LTE_SCC{}_bw".format(i) ] mask = transmission_df["LTE_SCC{}_state".format(i)].isin(["ACTIVE"]) transmission_df["LTE_SCC{}_effective_bw".format(i)] = transmission_df[ "LTE_SCC{}_effective_bw".format(i) ].where(mask, other=0) # filter if sc is usesd for uplink for i in range(1, 5): mask = transmission_df["LTE_SCC{}_UL_Configured".format(i)].isin([False]) transmission_df["LTE_SCC{}_effective_bw".format(i)] = transmission_df[ "LTE_SCC{}_effective_bw".format(i) ].where(mask, other=0) # sum all effective bandwidth for 5G and 4G transmission_df["SCC1_NR5G_effective_bw"] = transmission_df[ "SCC1_NR5G_bw" ].fillna(0) transmission_df["lte_effective_bw_sum"] = ( transmission_df["LTE_SCC1_effective_bw"].fillna(0) + transmission_df["LTE_SCC2_effective_bw"].fillna(0) + transmission_df["LTE_SCC3_effective_bw"].fillna(0) + transmission_df["LTE_SCC4_effective_bw"].fillna(0) + transmission_df["LTE_bw"].fillna(0)) transmission_df["nr_effective_bw_sum"] = transmission_df["SCC1_NR5G_effective_bw"] transmission_df["effective_bw_sum"] = transmission_df["nr_effective_bw_sum"] + transmission_df[ "lte_effective_bw_sum"] transmission_df = transmission_df.filter(["goodput", "effective_bw_sum", "srtt"]) transmission_df = transmission_df.reset_index(drop=True) if concat_frame is None: concat_frame = transmission_df else: concat_frame = pd.concat([concat_frame, transmission_df]) concat_frame.to_csv("{}_concat_bw_gp.csv".format(args.save))