You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

178 line
5.5KB

  1. #!/usr/bin/env python3
  2. import multiprocessing
  3. import os
  4. from argparse import ArgumentParser
  5. from math import ceil
  6. from time import sleep
  7. import pandas as pd
  8. import geopandas as gpd
  9. import contextily as cx
  10. import matplotlib.pyplot as plt
  11. def csv_to_dataframe(csv_list, dummy):
  12. global n
  13. global frame_list
  14. transmission_df = None
  15. for csv in csv_list:
  16. tmp_df = pd.read_csv(
  17. "{}{}".format(args.pcap_csv_folder, csv),
  18. dtype=dict(is_retranmission=bool, is_dup_ack=bool),
  19. )
  20. #tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
  21. tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"])
  22. tmp_df = tmp_df.set_index("datetime")
  23. tmp_df.index = pd.to_datetime(tmp_df.index)
  24. if transmission_df is None:
  25. transmission_df = tmp_df
  26. else:
  27. transmission_df = pd.concat([transmission_df, tmp_df])
  28. n.value += 1
  29. frame_list.append(transmission_df)
  30. from itertools import islice
  31. def chunk(it, size):
  32. it = iter(it)
  33. return iter(lambda: tuple(islice(it, size)), ())
  34. if __name__ == "__main__":
  35. parser = ArgumentParser()
  36. parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.")
  37. parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
  38. parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
  39. parser.add_argument("--save", default=None, help="Location to save pdf file.")
  40. parser.add_argument("--time_offset", default=None, type=int, help="Minutes added to GPS datetime.")
  41. parser.add_argument(
  42. "-c",
  43. "--cores",
  44. default=1,
  45. type=int,
  46. help="Number of cores for multiprocessing.",
  47. )
  48. parser.add_argument(
  49. "-i",
  50. "--interval",
  51. default=10,
  52. type=int,
  53. help="Time interval for rolling window.",
  54. )
  55. args = parser.parse_args()
  56. manager = multiprocessing.Manager()
  57. n = manager.Value("i", 0)
  58. frame_list = manager.list()
  59. jobs = []
  60. # load all pcap csv into one dataframe
  61. pcap_csv_list = list()
  62. for filename in os.listdir(args.pcap_csv_folder):
  63. if filename.endswith(".csv") and "tcp" in filename:
  64. pcap_csv_list.append(filename)
  65. parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
  66. print("Start processing with {} jobs.".format(args.cores))
  67. for p in parts:
  68. process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
  69. jobs.append(process)
  70. for j in jobs:
  71. j.start()
  72. print("Started all jobs.")
  73. # Ensure all of the processes have finished
  74. finished_job_counter = 0
  75. working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
  76. w = 0
  77. while len(jobs) != finished_job_counter:
  78. sleep(1)
  79. print(
  80. "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
  81. working[w],
  82. working[w],
  83. working[w],
  84. len(jobs),
  85. finished_job_counter,
  86. n.value,
  87. len(pcap_csv_list),
  88. round((n.value / len(pcap_csv_list)) * 100, 2),
  89. ),
  90. end="",
  91. )
  92. finished_job_counter = 0
  93. for j in jobs:
  94. if not j.is_alive():
  95. finished_job_counter += 1
  96. if (w + 1) % len(working) == 0:
  97. w = 0
  98. else:
  99. w += 1
  100. print("\r\nSorting table...")
  101. transmission_df = pd.concat(frame_list)
  102. frame_list = None
  103. transmission_df = transmission_df.sort_index()
  104. print("Calculate goodput...")
  105. transmission_df["goodput"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
  106. transmission_df["goodput"] = transmission_df["goodput"].apply(
  107. lambda x: ((x * 8) / args.interval) / 10**6
  108. )
  109. # load dataframe an put it into geopandas
  110. df = pd.read_csv(args.gps_file)
  111. df["kmh"] = df["speed (knots)"].apply(lambda x: x * 1.852)
  112. if args.time_offset is not None:
  113. if args.time_offset > 0:
  114. df["datetime"] = pd.to_datetime(df["datetime"]) + pd.Timedelta(minutes=abs(args.time_offset))
  115. else:
  116. df["datetime"] = pd.to_datetime(df["datetime"]) - pd.Timedelta(minutes=abs(args.time_offset))
  117. else:
  118. df["datetime"] = pd.to_datetime(df["datetime"])
  119. df = df.set_index("datetime")
  120. df.index = pd.to_datetime(df.index)
  121. gdf = gpd.GeoDataFrame(
  122. df,
  123. geometry=gpd.points_from_xy(df["longitude"], df["latitude"]),
  124. crs="EPSG:4326",
  125. )
  126. gdf = pd.merge_asof(
  127. gdf,
  128. transmission_df,
  129. tolerance=pd.Timedelta("10s"),
  130. right_index=True,
  131. left_index=True,
  132. )
  133. # read serial csv
  134. serial_df = pd.read_csv(args.serial_file)
  135. #serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
  136. serial_df["datetime"] = pd.to_datetime(serial_df["datetime"])
  137. serial_df = serial_df.set_index("datetime")
  138. serial_df.index = pd.to_datetime(serial_df.index)
  139. gdf = pd.merge_asof(
  140. gdf,
  141. serial_df,
  142. tolerance=pd.Timedelta("1s"),
  143. right_index=True,
  144. left_index=True,
  145. )
  146. # format to needed format and add basemap as background
  147. df_wm = gdf.to_crs(epsg=3857)
  148. #df_wm.to_csv("debug-data.csv")
  149. # ax2 = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor='k')
  150. df_wm.to_csv("{}gps_plot.csv".format(args.save))
  151. print("Saved calculations to: {}gps_plot.csv".format(args.save))