|
- #!/usr/bin/env python3
- import multiprocessing
- import os
- from argparse import ArgumentParser
- from math import ceil
- from time import sleep
-
- import pandas as pd
- import geopandas as gpd
- import contextily as cx
- import matplotlib.pyplot as plt
-
- from util import chunk_list
-
-
- def csv_to_dataframe(csv_list, dummy):
-
- global n
- global frame_list
-
- transmission_df = None
-
- for csv in csv_list:
- tmp_df = pd.read_csv(
- "{}{}".format(args.pcap_csv_folder, csv),
- dtype=dict(is_retranmission=bool, is_dup_ack=bool),
- )
- tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
- tmp_df = tmp_df.set_index("datetime")
- tmp_df.index = pd.to_datetime(tmp_df.index)
- if transmission_df is None:
- transmission_df = tmp_df
- else:
- transmission_df = pd.concat([transmission_df, tmp_df])
-
- n.value += 1
-
- frame_list.append(transmission_df)
-
-
- from itertools import islice
-
-
- def chunk(it, size):
- it = iter(it)
- return iter(lambda: tuple(islice(it, size)), ())
-
-
- if __name__ == "__main__":
- parser = ArgumentParser()
- parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.")
- parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
- parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
- parser.add_argument("-a", "--column", required=True, help="Column to plot")
- parser.add_argument("-l", "--label", help="Label above the plot.")
- parser.add_argument("--no_legend", action="store_false", default=True, help="Do not show legend.")
- parser.add_argument("--save", default=None, help="Location to save pdf file.")
-
- parser.add_argument(
- "--show_providerinfo",
- default=False,
- help="Show providerinfo for map tiles an zoom levels.",
- )
- parser.add_argument(
- "-c",
- "--cores",
- default=1,
- type=int,
- help="Number of cores for multiprocessing.",
- )
- parser.add_argument(
- "-i",
- "--interval",
- default=10,
- type=int,
- help="Time interval for rolling window.",
- )
-
- args = parser.parse_args()
- manager = multiprocessing.Manager()
- n = manager.Value("i", 0)
- frame_list = manager.list()
- jobs = []
-
- # load all pcap csv into one dataframe
- pcap_csv_list = list()
- for filename in os.listdir(args.pcap_csv_folder):
- if filename.endswith(".csv") and "tcp" in filename:
- pcap_csv_list.append(filename)
-
- parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
- print("Start processing with {} jobs.".format(args.cores))
- for p in parts:
- process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
- jobs.append(process)
-
- for j in jobs:
- j.start()
-
- print("Started all jobs.")
- # Ensure all of the processes have finished
- finished_job_counter = 0
- working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
- w = 0
- while len(jobs) != finished_job_counter:
- sleep(1)
- print(
- "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
- working[w],
- working[w],
- working[w],
- len(jobs),
- finished_job_counter,
- n.value,
- len(pcap_csv_list),
- round((n.value / len(pcap_csv_list)) * 100, 2),
- ),
- end="",
- )
- finished_job_counter = 0
- for j in jobs:
- if not j.is_alive():
- finished_job_counter += 1
- if (w + 1) % len(working) == 0:
- w = 0
- else:
- w += 1
- print("\r\nSorting table...")
-
- transmission_df = pd.concat(frame_list)
- frame_list = None
- transmission_df = transmission_df.sort_index()
-
- print("Calculate goodput...")
- transmission_df["goodput"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
- transmission_df["goodput"] = transmission_df["goodput"].apply(
- lambda x: ((x * 8) / args.interval) / 10**6
- )
-
- # load dataframe an put it into geopandas
- df = pd.read_csv(args.gps_file)
- df["kmh"] = df["speed (knots)"].apply(lambda x: x * 1.852)
- df = df.set_index("datetime")
- df.index = pd.to_datetime(df.index)
-
- gdf = gpd.GeoDataFrame(
- df,
- geometry=gpd.points_from_xy(df["longitude"], df["latitude"]),
- crs="EPSG:4326",
- )
- gdf = pd.merge_asof(
- gdf,
- transmission_df,
- tolerance=pd.Timedelta("10s"),
- right_index=True,
- left_index=True,
- )
-
- # read serial csv
- serial_df = pd.read_csv(args.serial_file)
- serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
- serial_df = serial_df.set_index("datetime")
- serial_df.index = pd.to_datetime(serial_df.index)
-
- gdf = pd.merge_asof(
- gdf,
- serial_df,
- tolerance=pd.Timedelta("1s"),
- right_index=True,
- left_index=True,
- )
-
- print(gdf)
-
- print("Start plotting...")
- # format to needed format and add basemap as background
- df_wm = gdf.to_crs(epsg=3857)
- #df_wm.to_csv("debug-data.csv")
- # ax2 = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor='k')
- print(df_wm)
- ax2 = df_wm.plot()
- ax2 = df_wm.plot(args.column, cmap="hot", legend=args.no_legend, ax=ax2)
- # ax2 = df_wm.plot.scatter(x="longitude", y="latitude", c="kmh", cmap="hot")
- # zoom 17 is pretty
- cx.add_basemap(ax2, source=cx.providers.OpenStreetMap.Mapnik, zoom=15)
-
- # gdf.plot()
- ax2.set_axis_off()
- ax2.set_title(args.label if args.label else args.column)
-
- if args.show_providerinfo:
- #####################################
- # Identifying how many tiles
- latlon_outline = gdf.to_crs("epsg:4326").total_bounds
- def_zoom = cx.tile._calculate_zoom(*latlon_outline)
- print(f"Default Zoom level {def_zoom}")
-
- cx.howmany(*latlon_outline, def_zoom, ll=True)
- cx.howmany(*latlon_outline, def_zoom + 1, ll=True)
- cx.howmany(*latlon_outline, def_zoom + 2, ll=True)
-
- # Checking out some of the other providers and tiles
- print(cx.providers.CartoDB.Voyager)
- print(cx.providers.Stamen.TonerLite)
- print(cx.providers.Stamen.keys())
- #####################################
-
- # df.plot(x="longitude", y="latitude", kind="scatter", colormap="YlOrRd")
-
- if args.save:
- plt.savefig("{}gps_plot.pdf".format(args.save))
- else:
- plt.show()
|