#!/usr/bin/env python3 import multiprocessing import os from argparse import ArgumentParser from math import ceil from time import sleep import pandas as pd import geopandas as gpd import contextily as cx import matplotlib.pyplot as plt def csv_to_dataframe(csv_list, dummy): global n global frame_list transmission_df = None for csv in csv_list: tmp_df = pd.read_csv( "{}{}".format(args.pcap_csv_folder, csv), dtype=dict(is_retranmission=bool, is_dup_ack=bool), ) #tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1) tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) tmp_df = tmp_df.set_index("datetime") tmp_df.index = pd.to_datetime(tmp_df.index) if transmission_df is None: transmission_df = tmp_df else: transmission_df = pd.concat([transmission_df, tmp_df]) n.value += 1 frame_list.append(transmission_df) from itertools import islice def chunk(it, size): it = iter(it) return iter(lambda: tuple(islice(it, size)), ()) if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.") parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.") parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.") parser.add_argument("-a", "--column", required=True, help="Column to plot") parser.add_argument("-l", "--label", help="Label above the plot.") parser.add_argument("--no_legend", action="store_false", default=True, help="Do not show legend.") parser.add_argument("--save", default=None, help="Location to save pdf file.") parser.add_argument("--time_offset", default=None, type=int, help="Minutes added to GPS datetime.") parser.add_argument("--no_plot", default=False, action="store_true", help="Only calculations without plotting.") parser.add_argument( "--show_providerinfo", default=False, help="Show providerinfo for map tiles an zoom levels.", ) parser.add_argument( "-c", "--cores", default=1, type=int, help="Number of cores for multiprocessing.", ) parser.add_argument( "-i", "--interval", default=10, type=int, help="Time interval for rolling window.", ) args = parser.parse_args() manager = multiprocessing.Manager() n = manager.Value("i", 0) frame_list = manager.list() jobs = [] # load all pcap csv into one dataframe pcap_csv_list = list() for filename in os.listdir(args.pcap_csv_folder): if filename.endswith(".csv") and "tcp" in filename: pcap_csv_list.append(filename) parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores)) print("Start processing with {} jobs.".format(args.cores)) for p in parts: process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy")) jobs.append(process) for j in jobs: j.start() print("Started all jobs.") # Ensure all of the processes have finished finished_job_counter = 0 working = ["|", "/", "-", "\\", "|", "/", "-", "\\"] w = 0 while len(jobs) != finished_job_counter: sleep(1) print( "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format( working[w], working[w], working[w], len(jobs), finished_job_counter, n.value, len(pcap_csv_list), round((n.value / len(pcap_csv_list)) * 100, 2), ), end="", ) finished_job_counter = 0 for j in jobs: if not j.is_alive(): finished_job_counter += 1 if (w + 1) % len(working) == 0: w = 0 else: w += 1 print("\r\nSorting table...") transmission_df = pd.concat(frame_list) frame_list = None transmission_df = transmission_df.sort_index() print("Calculate goodput...") transmission_df["goodput"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum() transmission_df["goodput"] = transmission_df["goodput"].apply( lambda x: ((x * 8) / args.interval) / 10**6 ) # load dataframe an put it into geopandas df = pd.read_csv(args.gps_file) df["kmh"] = df["speed (knots)"].apply(lambda x: x * 1.852) if args.time_offset: df["datetime"] = pd.to_datetime(df["datetime"]) + pd.Timedelta(minutes=args.time_offset) else: df["datetime"] = pd.to_datetime(df["datetime"]) df = df.set_index("datetime") df.index = pd.to_datetime(df.index) gdf = gpd.GeoDataFrame( df, geometry=gpd.points_from_xy(df["longitude"], df["latitude"]), crs="EPSG:4326", ) gdf = pd.merge_asof( gdf, transmission_df, tolerance=pd.Timedelta("10s"), right_index=True, left_index=True, ) # read serial csv serial_df = pd.read_csv(args.serial_file) #serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1) serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) serial_df = serial_df.set_index("datetime") serial_df.index = pd.to_datetime(serial_df.index) gdf = pd.merge_asof( gdf, serial_df, tolerance=pd.Timedelta("1s"), right_index=True, left_index=True, ) # format to needed format and add basemap as background df_wm = gdf.to_crs(epsg=3857) #df_wm.to_csv("debug-data.csv") # ax2 = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor='k') if args.no_plot: df_wm.to_csv("{}gps_plot.csv".format(args.save)) print("Saved calculations to: {}gps_plot.csv".format(args.save)) exit(0) print("Start plotting...") ax2 = df_wm.plot() ax2 = df_wm.plot(args.column, cmap="hot", legend=args.no_legend, ax=ax2) # ax2 = df_wm.plot.scatter(x="longitude", y="latitude", c="kmh", cmap="hot") # zoom 17 is pretty cx.add_basemap(ax2, source=cx.providers.OpenStreetMap.Mapnik, zoom=15) # gdf.plot() ax2.set_axis_off() ax2.set_title(args.label if args.label else args.column) if args.show_providerinfo: ##################################### # Identifying how many tiles latlon_outline = gdf.to_crs("epsg:4326").total_bounds def_zoom = cx.tile._calculate_zoom(*latlon_outline) print(f"Default Zoom level {def_zoom}") cx.howmany(*latlon_outline, def_zoom, ll=True) cx.howmany(*latlon_outline, def_zoom + 1, ll=True) cx.howmany(*latlon_outline, def_zoom + 2, ll=True) # Checking out some of the other providers and tiles print(cx.providers.CartoDB.Voyager) print(cx.providers.Stamen.TonerLite) print(cx.providers.Stamen.keys()) ##################################### # df.plot(x="longitude", y="latitude", kind="scatter", colormap="YlOrRd") if args.save: plt.savefig("{}gps_plot.pdf".format(args.save)) else: plt.show()