Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

179 lines
5.6KB

  1. #!/usr/bin/env python3
  2. import multiprocessing
  3. import os
  4. from argparse import ArgumentParser
  5. from math import ceil
  6. from time import sleep
  7. import pandas as pd
  8. import geopandas as gpd
  9. import contextily as cx
  10. import matplotlib.pyplot as plt
  11. def csv_to_dataframe(csv_list, dummy):
  12. global n
  13. global frame_list
  14. transmission_df = None
  15. for csv in csv_list:
  16. tmp_df = pd.read_csv(
  17. "{}{}".format(args.pcap_csv_folder, csv),
  18. dtype=dict(is_retranmission=bool, is_dup_ack=bool),
  19. )
  20. #tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
  21. tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"])
  22. tmp_df = tmp_df.set_index("datetime")
  23. tmp_df.index = pd.to_datetime(tmp_df.index)
  24. if transmission_df is None:
  25. transmission_df = tmp_df
  26. else:
  27. transmission_df = pd.concat([transmission_df, tmp_df])
  28. n.value += 1
  29. frame_list.append(transmission_df)
  30. from itertools import islice
  31. def chunk(it, size):
  32. it = iter(it)
  33. return iter(lambda: tuple(islice(it, size)), ())
  34. if __name__ == "__main__":
  35. parser = ArgumentParser()
  36. parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.")
  37. parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
  38. parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
  39. parser.add_argument("--save", default=None, help="Location to save pdf file.")
  40. parser.add_argument("--time_offset", default=0, type=int, help="Minutes added to GPS datetime.")
  41. parser.add_argument("--neg_offset", default=False, action="store_true", help="Subtract GPS time offset.")
  42. parser.add_argument(
  43. "-c",
  44. "--cores",
  45. default=1,
  46. type=int,
  47. help="Number of cores for multiprocessing.",
  48. )
  49. parser.add_argument(
  50. "-i",
  51. "--interval",
  52. default=10,
  53. type=int,
  54. help="Time interval for rolling window.",
  55. )
  56. args = parser.parse_args()
  57. manager = multiprocessing.Manager()
  58. n = manager.Value("i", 0)
  59. frame_list = manager.list()
  60. jobs = []
  61. # load all pcap csv into one dataframe
  62. pcap_csv_list = list()
  63. for filename in os.listdir(args.pcap_csv_folder):
  64. if filename.endswith(".csv") and "tcp" in filename:
  65. pcap_csv_list.append(filename)
  66. parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
  67. print("Start processing with {} jobs.".format(args.cores))
  68. for p in parts:
  69. process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
  70. jobs.append(process)
  71. for j in jobs:
  72. j.start()
  73. print("Started all jobs.")
  74. # Ensure all of the processes have finished
  75. finished_job_counter = 0
  76. working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
  77. w = 0
  78. while len(jobs) != finished_job_counter:
  79. sleep(1)
  80. print(
  81. "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
  82. working[w],
  83. working[w],
  84. working[w],
  85. len(jobs),
  86. finished_job_counter,
  87. n.value,
  88. len(pcap_csv_list),
  89. round((n.value / len(pcap_csv_list)) * 100, 2),
  90. ),
  91. end="",
  92. )
  93. finished_job_counter = 0
  94. for j in jobs:
  95. if not j.is_alive():
  96. finished_job_counter += 1
  97. if (w + 1) % len(working) == 0:
  98. w = 0
  99. else:
  100. w += 1
  101. print("\r\nSorting table...")
  102. transmission_df = pd.concat(frame_list)
  103. frame_list = None
  104. transmission_df = transmission_df.sort_index()
  105. print("Calculate goodput...")
  106. transmission_df["goodput"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
  107. transmission_df["goodput"] = transmission_df["goodput"].apply(
  108. lambda x: ((x * 8) / args.interval) / 10**6
  109. )
  110. # load dataframe an put it into geopandas
  111. df = pd.read_csv(args.gps_file)
  112. df["kmh"] = df["speed (knots)"].apply(lambda x: x * 1.852)
  113. if args.time_offset > 0:
  114. if args.neg_offset:
  115. df["datetime"] = pd.to_datetime(df["datetime"]) - pd.Timedelta(minutes=args.time_offset)
  116. else:
  117. df["datetime"] = pd.to_datetime(df["datetime"]) + pd.Timedelta(minutes=args.time_offset)
  118. else:
  119. df["datetime"] = pd.to_datetime(df["datetime"])
  120. df = df.set_index("datetime")
  121. df.index = pd.to_datetime(df.index)
  122. gdf = gpd.GeoDataFrame(
  123. df,
  124. geometry=gpd.points_from_xy(df["longitude"], df["latitude"]),
  125. crs="EPSG:4326",
  126. )
  127. gdf = pd.merge_asof(
  128. gdf,
  129. transmission_df,
  130. tolerance=pd.Timedelta("10s"),
  131. right_index=True,
  132. left_index=True,
  133. )
  134. # read serial csv
  135. serial_df = pd.read_csv(args.serial_file)
  136. #serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
  137. serial_df["datetime"] = pd.to_datetime(serial_df["datetime"])
  138. serial_df = serial_df.set_index("datetime")
  139. serial_df.index = pd.to_datetime(serial_df.index)
  140. gdf = pd.merge_asof(
  141. gdf,
  142. serial_df,
  143. tolerance=pd.Timedelta("1s"),
  144. right_index=True,
  145. left_index=True,
  146. )
  147. # format to needed format and add basemap as background
  148. df_wm = gdf.to_crs(epsg=3857)
  149. #df_wm.to_csv("debug-data.csv")
  150. # ax2 = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor='k')
  151. df_wm.to_csv("{}gps_plot.csv".format(args.save))
  152. print("Saved calculations to: {}gps_plot.csv".format(args.save))