ソースを参照

Plotting data related to gps loaction.

master
Lukas Prause 3年前
コミット
eb7c832b98
1個のファイルの変更208行の追加0行の削除
  1. +208
    -0
      plot_gps.py

+ 208
- 0
plot_gps.py ファイルの表示

@@ -0,0 +1,208 @@
#!/usr/bin/env python3
import multiprocessing
import os
from argparse import ArgumentParser
from math import ceil
from time import sleep

import pandas as pd
import geopandas as gpd
import contextily as cx
import matplotlib.pyplot as plt

from util import chunk_list


def csv_to_dataframe(csv_list, dummy):

global n
global frame_list

transmission_df = None

for csv in csv_list:
tmp_df = pd.read_csv(
"{}{}".format(args.pcap_csv_folder, csv),
dtype=dict(is_retranmission=bool, is_dup_ack=bool),
)
tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
tmp_df = tmp_df.set_index("datetime")
tmp_df.index = pd.to_datetime(tmp_df.index)
if transmission_df is None:
transmission_df = tmp_df
else:
transmission_df = pd.concat([transmission_df, tmp_df])

n.value += 1

frame_list.append(transmission_df)


from itertools import islice


def chunk(it, size):
it = iter(it)
return iter(lambda: tuple(islice(it, size)), ())


if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.")
parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
parser.add_argument("-a", "--column", required=True, help="Column to plot")
parser.add_argument("-l", "--label", help="Label above the plot.")
parser.add_argument("--no_legend", action="store_false", default=True, help="Do not show legend.")

parser.add_argument(
"--show_providerinfo",
default=False,
help="Show providerinfo for map tiles an zoom levels.",
)
parser.add_argument(
"-c",
"--cores",
default=1,
type=int,
help="Number of cores for multiprocessing.",
)
parser.add_argument(
"-i",
"--interval",
default=10,
type=int,
help="Time interval for rolling window.",
)

args = parser.parse_args()
manager = multiprocessing.Manager()
n = manager.Value("i", 0)
frame_list = manager.list()
jobs = []

# load all pcap csv into one dataframe
pcap_csv_list = list()
for filename in os.listdir(args.pcap_csv_folder):
if filename.endswith(".csv") and "tcp" in filename:
pcap_csv_list.append(filename)

parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
print("Start processing with {} jobs.".format(args.cores))
for p in parts:
process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
jobs.append(process)

for j in jobs:
j.start()

print("Started all jobs.")
# Ensure all of the processes have finished
finished_job_counter = 0
working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
w = 0
while len(jobs) != finished_job_counter:
sleep(1)
print(
"\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
working[w],
working[w],
working[w],
len(jobs),
finished_job_counter,
n.value,
len(pcap_csv_list),
round((n.value / len(pcap_csv_list)) * 100, 2),
),
end="",
)
finished_job_counter = 0
for j in jobs:
if not j.is_alive():
finished_job_counter += 1
if (w + 1) % len(working) == 0:
w = 0
else:
w += 1
print("\r\nSorting table...")

transmission_df = pd.concat(frame_list)
frame_list = None
transmission_df = transmission_df.sort_index()

print("Calculate goodput...")
transmission_df["goodput"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
transmission_df["goodput"] = transmission_df["goodput"].apply(
lambda x: ((x * 8) / args.interval) / 10**6
)

# load dataframe an put it into geopandas
df = pd.read_csv(args.gps_file)
df["kmh"] = df["speed (knots)"].apply(lambda x: x * 1.852)
df = df.set_index("datetime")
df.index = pd.to_datetime(df.index)

gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df["longitude"], df["latitude"]),
crs="EPSG:4326",
)
gdf = pd.merge_asof(
gdf,
transmission_df,
tolerance=pd.Timedelta("10s"),
right_index=True,
left_index=True,
)

# read serial csv
serial_df = pd.read_csv(args.serial_file)
serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
serial_df = serial_df.set_index("datetime")
serial_df.index = pd.to_datetime(serial_df.index)

gdf = pd.merge_asof(
gdf,
serial_df,
tolerance=pd.Timedelta("1s"),
right_index=True,
left_index=True,
)

print(gdf)

print("Start plotting...")
# format to needed format and add basemap as background
df_wm = gdf.to_crs(epsg=3857)
#df_wm.to_csv("debug-data.csv")
# ax2 = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor='k')
print(df_wm)
ax2 = df_wm.plot()
ax2 = df_wm.plot(args.column, cmap="hot", legend=args.no_legend, ax=ax2)
# ax2 = df_wm.plot.scatter(x="longitude", y="latitude", c="kmh", cmap="hot")
# zoom 17 is pretty
cx.add_basemap(ax2, source=cx.providers.OpenStreetMap.Mapnik, zoom=15)

# gdf.plot()
ax2.set_axis_off()
ax2.set_title(args.label if args.label else args.column)

if args.show_providerinfo:
#####################################
# Identifying how many tiles
latlon_outline = gdf.to_crs("epsg:4326").total_bounds
def_zoom = cx.tile._calculate_zoom(*latlon_outline)
print(f"Default Zoom level {def_zoom}")

cx.howmany(*latlon_outline, def_zoom, ll=True)
cx.howmany(*latlon_outline, def_zoom + 1, ll=True)
cx.howmany(*latlon_outline, def_zoom + 2, ll=True)

# Checking out some of the other providers and tiles
print(cx.providers.CartoDB.Voyager)
print(cx.providers.Stamen.TonerLite)
print(cx.providers.Stamen.keys())
#####################################

# df.plot(x="longitude", y="latitude", kind="scatter", colormap="YlOrRd")
plt.show()

読み込み中…
キャンセル
保存