No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

175 líneas
5.3KB

  1. #!/usr/bin/env python3
  2. import multiprocessing
  3. import os
  4. from argparse import ArgumentParser
  5. from math import ceil
  6. from time import sleep
  7. import pandas as pd
  8. import geopandas as gpd
  9. import contextily as cx
  10. import matplotlib.pyplot as plt
  11. def csv_to_dataframe(csv_list, dummy):
  12. global n
  13. global frame_list
  14. transmission_df = None
  15. for csv in csv_list:
  16. tmp_df = pd.read_csv(
  17. "{}{}".format(args.pcap_csv_folder, csv),
  18. dtype=dict(is_retranmission=bool, is_dup_ack=bool),
  19. )
  20. #tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
  21. tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"])
  22. tmp_df = tmp_df.set_index("datetime")
  23. tmp_df.index = pd.to_datetime(tmp_df.index)
  24. if transmission_df is None:
  25. transmission_df = tmp_df
  26. else:
  27. transmission_df = pd.concat([transmission_df, tmp_df])
  28. n.value += 1
  29. frame_list.append(transmission_df)
  30. from itertools import islice
  31. def chunk(it, size):
  32. it = iter(it)
  33. return iter(lambda: tuple(islice(it, size)), ())
  34. if __name__ == "__main__":
  35. parser = ArgumentParser()
  36. parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.")
  37. parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
  38. parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
  39. parser.add_argument("--save", default=None, help="Location to save pdf file.")
  40. parser.add_argument("--time_offset", default=None, type=int, help="Minutes added to GPS datetime.")
  41. parser.add_argument(
  42. "-c",
  43. "--cores",
  44. default=1,
  45. type=int,
  46. help="Number of cores for multiprocessing.",
  47. )
  48. parser.add_argument(
  49. "-i",
  50. "--interval",
  51. default=10,
  52. type=int,
  53. help="Time interval for rolling window.",
  54. )
  55. args = parser.parse_args()
  56. manager = multiprocessing.Manager()
  57. n = manager.Value("i", 0)
  58. frame_list = manager.list()
  59. jobs = []
  60. # load all pcap csv into one dataframe
  61. pcap_csv_list = list()
  62. for filename in os.listdir(args.pcap_csv_folder):
  63. if filename.endswith(".csv") and "tcp" in filename:
  64. pcap_csv_list.append(filename)
  65. parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
  66. print("Start processing with {} jobs.".format(args.cores))
  67. for p in parts:
  68. process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
  69. jobs.append(process)
  70. for j in jobs:
  71. j.start()
  72. print("Started all jobs.")
  73. # Ensure all of the processes have finished
  74. finished_job_counter = 0
  75. working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
  76. w = 0
  77. while len(jobs) != finished_job_counter:
  78. sleep(1)
  79. print(
  80. "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
  81. working[w],
  82. working[w],
  83. working[w],
  84. len(jobs),
  85. finished_job_counter,
  86. n.value,
  87. len(pcap_csv_list),
  88. round((n.value / len(pcap_csv_list)) * 100, 2),
  89. ),
  90. end="",
  91. )
  92. finished_job_counter = 0
  93. for j in jobs:
  94. if not j.is_alive():
  95. finished_job_counter += 1
  96. if (w + 1) % len(working) == 0:
  97. w = 0
  98. else:
  99. w += 1
  100. print("\r\nSorting table...")
  101. transmission_df = pd.concat(frame_list)
  102. frame_list = None
  103. transmission_df = transmission_df.sort_index()
  104. print("Calculate goodput...")
  105. transmission_df["goodput"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
  106. transmission_df["goodput"] = transmission_df["goodput"].apply(
  107. lambda x: ((x * 8) / args.interval) / 10**6
  108. )
  109. # load dataframe an put it into geopandas
  110. df = pd.read_csv(args.gps_file)
  111. df["kmh"] = df["speed (knots)"].apply(lambda x: x * 1.852)
  112. if args.time_offset:
  113. df["datetime"] = pd.to_datetime(df["datetime"]) + pd.Timedelta(minutes=args.time_offset)
  114. else:
  115. df["datetime"] = pd.to_datetime(df["datetime"])
  116. df = df.set_index("datetime")
  117. df.index = pd.to_datetime(df.index)
  118. gdf = gpd.GeoDataFrame(
  119. df,
  120. geometry=gpd.points_from_xy(df["longitude"], df["latitude"]),
  121. crs="EPSG:4326",
  122. )
  123. gdf = pd.merge_asof(
  124. gdf,
  125. transmission_df,
  126. tolerance=pd.Timedelta("10s"),
  127. right_index=True,
  128. left_index=True,
  129. )
  130. # read serial csv
  131. serial_df = pd.read_csv(args.serial_file)
  132. #serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
  133. serial_df["datetime"] = pd.to_datetime(serial_df["datetime"])
  134. serial_df = serial_df.set_index("datetime")
  135. serial_df.index = pd.to_datetime(serial_df.index)
  136. gdf = pd.merge_asof(
  137. gdf,
  138. serial_df,
  139. tolerance=pd.Timedelta("1s"),
  140. right_index=True,
  141. left_index=True,
  142. )
  143. # format to needed format and add basemap as background
  144. df_wm = gdf.to_crs(epsg=3857)
  145. #df_wm.to_csv("debug-data.csv")
  146. # ax2 = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor='k')
  147. df_wm.to_csv("{}gps_plot.csv".format(args.save))
  148. print("Saved calculations to: {}gps_plot.csv".format(args.save))