#!/usr/bin/env python3 import multiprocessing import os from argparse import ArgumentParser from math import ceil from time import sleep import pandas as pd import geopandas as gpd import contextily as cx import matplotlib.pyplot as plt def csv_to_dataframe(csv_list, dummy): global n global frame_list transmission_df = None for csv in csv_list: tmp_df = pd.read_csv( "{}{}".format(args.pcap_csv_folder, csv), dtype=dict(is_retranmission=bool, is_dup_ack=bool), ) #tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1) tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) tmp_df = tmp_df.set_index("datetime") tmp_df.index = pd.to_datetime(tmp_df.index) if transmission_df is None: transmission_df = tmp_df else: transmission_df = pd.concat([transmission_df, tmp_df]) n.value += 1 frame_list.append(transmission_df) from itertools import islice def chunk(it, size): it = iter(it) return iter(lambda: tuple(islice(it, size)), ()) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.") parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.") parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.") parser.add_argument("--save", default=None, help="Location to save pdf file.") parser.add_argument("--time_offset", default=0, type=int, help="Minutes added to GPS datetime.") parser.add_argument("--neg_offset", default=False, action="store_true", help="Subtract GPS time offset.") parser.add_argument( "-c", "--cores", default=1, type=int, help="Number of cores for multiprocessing.", ) parser.add_argument( "-i", "--interval", default=10, type=int, help="Time interval for rolling window.", ) args = parser.parse_args() manager = multiprocessing.Manager() n = manager.Value("i", 0) frame_list = manager.list() jobs = [] # load all pcap csv into one dataframe pcap_csv_list = list() for filename in os.listdir(args.pcap_csv_folder): if filename.endswith(".csv") and "tcp" in filename: pcap_csv_list.append(filename) parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores)) print("Start processing with {} jobs.".format(args.cores)) for p in parts: process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy")) jobs.append(process) for j in jobs: j.start() print("Started all jobs.") # Ensure all of the processes have finished finished_job_counter = 0 working = ["|", "/", "-", "\\", "|", "/", "-", "\\"] w = 0 while len(jobs) != finished_job_counter: sleep(1) print( "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format( working[w], working[w], working[w], len(jobs), finished_job_counter, n.value, len(pcap_csv_list), round((n.value / len(pcap_csv_list)) * 100, 2), ), end="", ) finished_job_counter = 0 for j in jobs: if not j.is_alive(): finished_job_counter += 1 if (w + 1) % len(working) == 0: w = 0 else: w += 1 print("\r\nSorting table...") transmission_df = pd.concat(frame_list) frame_list = None transmission_df = transmission_df.sort_index() print("Calculate goodput...") transmission_df["goodput"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum() transmission_df["goodput"] = transmission_df["goodput"].apply( lambda x: ((x * 8) / args.interval) / 10**6 ) # load dataframe an put it into geopandas df = pd.read_csv(args.gps_file) df["kmh"] = df["speed (knots)"].apply(lambda x: x * 1.852) if args.time_offset > 0: if args.neg_offset: df["datetime"] = pd.to_datetime(df["datetime"]) - pd.Timedelta(minutes=args.time_offset) else: df["datetime"] = pd.to_datetime(df["datetime"]) + pd.Timedelta(minutes=args.time_offset) else: df["datetime"] = pd.to_datetime(df["datetime"]) df = df.set_index("datetime") df.index = pd.to_datetime(df.index) gdf = gpd.GeoDataFrame( df, geometry=gpd.points_from_xy(df["longitude"], df["latitude"]), crs="EPSG:4326", ) gdf = pd.merge_asof( gdf, transmission_df, tolerance=pd.Timedelta("10s"), right_index=True, left_index=True, ) # read serial csv serial_df = pd.read_csv(args.serial_file) #serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1) serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) serial_df = serial_df.set_index("datetime") serial_df.index = pd.to_datetime(serial_df.index) gdf = pd.merge_asof( gdf, serial_df, tolerance=pd.Timedelta("1s"), right_index=True, left_index=True, ) # format to needed format and add basemap as background df_wm = gdf.to_crs(epsg=3857) #df_wm.to_csv("debug-data.csv") # ax2 = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor='k') df_wm.to_csv("{}gps_plot.csv".format(args.save)) print("Saved calculations to: {}gps_plot.csv".format(args.save))