You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

223 line
7.2KB

  1. #!/usr/bin/env python3
  2. import multiprocessing
  3. import os
  4. from argparse import ArgumentParser
  5. from math import ceil
  6. from time import sleep
  7. import pandas as pd
  8. import geopandas as gpd
  9. import contextily as cx
  10. import matplotlib.pyplot as plt
  11. def csv_to_dataframe(csv_list, dummy):
  12. global n
  13. global frame_list
  14. transmission_df = None
  15. for csv in csv_list:
  16. tmp_df = pd.read_csv(
  17. "{}{}".format(args.pcap_csv_folder, csv),
  18. dtype=dict(is_retranmission=bool, is_dup_ack=bool),
  19. )
  20. #tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
  21. tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"])
  22. tmp_df = tmp_df.set_index("datetime")
  23. tmp_df.index = pd.to_datetime(tmp_df.index)
  24. if transmission_df is None:
  25. transmission_df = tmp_df
  26. else:
  27. transmission_df = pd.concat([transmission_df, tmp_df])
  28. n.value += 1
  29. frame_list.append(transmission_df)
  30. from itertools import islice
  31. def chunk(it, size):
  32. it = iter(it)
  33. return iter(lambda: tuple(islice(it, size)), ())
  34. if __name__ == "__main__":
  35. parser = ArgumentParser()
  36. parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.")
  37. parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
  38. parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
  39. parser.add_argument("-a", "--column", required=True, help="Column to plot")
  40. parser.add_argument("-l", "--label", help="Label above the plot.")
  41. parser.add_argument("--no_legend", action="store_false", default=True, help="Do not show legend.")
  42. parser.add_argument("--save", default=None, help="Location to save pdf file.")
  43. parser.add_argument("--time_offset", default=None, type=int, help="Minutes added to GPS datetime.")
  44. parser.add_argument("--no_plot", default=False, action="store_true", help="Only calculations without plotting.")
  45. parser.add_argument(
  46. "--show_providerinfo",
  47. default=False,
  48. help="Show providerinfo for map tiles an zoom levels.",
  49. )
  50. parser.add_argument(
  51. "-c",
  52. "--cores",
  53. default=1,
  54. type=int,
  55. help="Number of cores for multiprocessing.",
  56. )
  57. parser.add_argument(
  58. "-i",
  59. "--interval",
  60. default=10,
  61. type=int,
  62. help="Time interval for rolling window.",
  63. )
  64. args = parser.parse_args()
  65. manager = multiprocessing.Manager()
  66. n = manager.Value("i", 0)
  67. frame_list = manager.list()
  68. jobs = []
  69. # load all pcap csv into one dataframe
  70. pcap_csv_list = list()
  71. for filename in os.listdir(args.pcap_csv_folder):
  72. if filename.endswith(".csv") and "tcp" in filename:
  73. pcap_csv_list.append(filename)
  74. parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
  75. print("Start processing with {} jobs.".format(args.cores))
  76. for p in parts:
  77. process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
  78. jobs.append(process)
  79. for j in jobs:
  80. j.start()
  81. print("Started all jobs.")
  82. # Ensure all of the processes have finished
  83. finished_job_counter = 0
  84. working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
  85. w = 0
  86. while len(jobs) != finished_job_counter:
  87. sleep(1)
  88. print(
  89. "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
  90. working[w],
  91. working[w],
  92. working[w],
  93. len(jobs),
  94. finished_job_counter,
  95. n.value,
  96. len(pcap_csv_list),
  97. round((n.value / len(pcap_csv_list)) * 100, 2),
  98. ),
  99. end="",
  100. )
  101. finished_job_counter = 0
  102. for j in jobs:
  103. if not j.is_alive():
  104. finished_job_counter += 1
  105. if (w + 1) % len(working) == 0:
  106. w = 0
  107. else:
  108. w += 1
  109. print("\r\nSorting table...")
  110. transmission_df = pd.concat(frame_list)
  111. frame_list = None
  112. transmission_df = transmission_df.sort_index()
  113. print("Calculate goodput...")
  114. transmission_df["goodput"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
  115. transmission_df["goodput"] = transmission_df["goodput"].apply(
  116. lambda x: ((x * 8) / args.interval) / 10**6
  117. )
  118. # load dataframe an put it into geopandas
  119. df = pd.read_csv(args.gps_file)
  120. df["kmh"] = df["speed (knots)"].apply(lambda x: x * 1.852)
  121. if args.time_offset:
  122. df["datetime"] = pd.to_datetime(df["datetime"]) + pd.Timedelta(minutes=args.time_offset)
  123. else:
  124. df["datetime"] = pd.to_datetime(df["datetime"])
  125. df = df.set_index("datetime")
  126. df.index = pd.to_datetime(df.index)
  127. gdf = gpd.GeoDataFrame(
  128. df,
  129. geometry=gpd.points_from_xy(df["longitude"], df["latitude"]),
  130. crs="EPSG:4326",
  131. )
  132. gdf = pd.merge_asof(
  133. gdf,
  134. transmission_df,
  135. tolerance=pd.Timedelta("10s"),
  136. right_index=True,
  137. left_index=True,
  138. )
  139. # read serial csv
  140. serial_df = pd.read_csv(args.serial_file)
  141. #serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
  142. serial_df["datetime"] = pd.to_datetime(serial_df["datetime"])
  143. serial_df = serial_df.set_index("datetime")
  144. serial_df.index = pd.to_datetime(serial_df.index)
  145. gdf = pd.merge_asof(
  146. gdf,
  147. serial_df,
  148. tolerance=pd.Timedelta("1s"),
  149. right_index=True,
  150. left_index=True,
  151. )
  152. # format to needed format and add basemap as background
  153. df_wm = gdf.to_crs(epsg=3857)
  154. #df_wm.to_csv("debug-data.csv")
  155. # ax2 = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor='k')
  156. if args.no_plot:
  157. df_wm.to_csv("{}gps_plot.csv".format(args.save))
  158. print("Saved calculations to: {}gps_plot.csv".format(args.save))
  159. exit(0)
  160. print("Start plotting...")
  161. ax2 = df_wm.plot()
  162. ax2 = df_wm.plot(args.column, cmap="hot", legend=args.no_legend, ax=ax2)
  163. # ax2 = df_wm.plot.scatter(x="longitude", y="latitude", c="kmh", cmap="hot")
  164. # zoom 17 is pretty
  165. cx.add_basemap(ax2, source=cx.providers.OpenStreetMap.Mapnik, zoom=15)
  166. # gdf.plot()
  167. ax2.set_axis_off()
  168. ax2.set_title(args.label if args.label else args.column)
  169. if args.show_providerinfo:
  170. #####################################
  171. # Identifying how many tiles
  172. latlon_outline = gdf.to_crs("epsg:4326").total_bounds
  173. def_zoom = cx.tile._calculate_zoom(*latlon_outline)
  174. print(f"Default Zoom level {def_zoom}")
  175. cx.howmany(*latlon_outline, def_zoom, ll=True)
  176. cx.howmany(*latlon_outline, def_zoom + 1, ll=True)
  177. cx.howmany(*latlon_outline, def_zoom + 2, ll=True)
  178. # Checking out some of the other providers and tiles
  179. print(cx.providers.CartoDB.Voyager)
  180. print(cx.providers.Stamen.TonerLite)
  181. print(cx.providers.Stamen.keys())
  182. #####################################
  183. # df.plot(x="longitude", y="latitude", kind="scatter", colormap="YlOrRd")
  184. if args.save:
  185. plt.savefig("{}gps_plot.pdf".format(args.save))
  186. else:
  187. plt.show()