Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

217 lines
6.2KB

  1. #!/usr/bin/env python3
  2. import multiprocessing
  3. import os
  4. from argparse import ArgumentParser
  5. from math import ceil
  6. from time import sleep
  7. import pandas as pd
  8. import geopandas as gpd
  9. import contextily as cx
  10. import matplotlib.pyplot as plt
  11. from mpl_toolkits import axisartist
  12. from mpl_toolkits.axes_grid1 import host_subplot
  13. def csv_to_dataframe(csv_list, dummy):
  14. global n
  15. global frame_list
  16. transmission_df = None
  17. for csv in csv_list:
  18. tmp_df = pd.read_csv(
  19. "{}{}".format(args.pcap_csv_folder, csv),
  20. dtype=dict(is_retranmission=bool, is_dup_ack=bool),
  21. )
  22. tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
  23. tmp_df = tmp_df.set_index("datetime")
  24. tmp_df.index = pd.to_datetime(tmp_df.index)
  25. if transmission_df is None:
  26. transmission_df = tmp_df
  27. else:
  28. transmission_df = pd.concat([transmission_df, tmp_df])
  29. n.value += 1
  30. frame_list.append(transmission_df)
  31. from itertools import islice
  32. def chunk(it, size):
  33. it = iter(it)
  34. return iter(lambda: tuple(islice(it, size)), ())
  35. if __name__ == "__main__":
  36. parser = ArgumentParser()
  37. parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.")
  38. parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
  39. parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
  40. parser.add_argument("--save", default=None, help="Location to save pdf file.")
  41. parser.add_argument(
  42. "--show_providerinfo",
  43. default=False,
  44. help="Show providerinfo for map tiles an zoom levels.",
  45. )
  46. parser.add_argument(
  47. "-c",
  48. "--cores",
  49. default=1,
  50. type=int,
  51. help="Number of cores for multiprocessing.",
  52. )
  53. parser.add_argument(
  54. "-i",
  55. "--interval",
  56. default=10,
  57. type=int,
  58. help="Time interval for rolling window.",
  59. )
  60. args = parser.parse_args()
  61. manager = multiprocessing.Manager()
  62. n = manager.Value("i", 0)
  63. frame_list = manager.list()
  64. jobs = []
  65. # load all pcap csv into one dataframe
  66. pcap_csv_list = list()
  67. for filename in os.listdir(args.pcap_csv_folder):
  68. if filename.endswith(".csv") and "tcp" in filename:
  69. pcap_csv_list.append(filename)
  70. parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
  71. print("Start processing with {} jobs.".format(args.cores))
  72. for p in parts:
  73. process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
  74. jobs.append(process)
  75. for j in jobs:
  76. j.start()
  77. print("Started all jobs.")
  78. # Ensure all of the processes have finished
  79. finished_job_counter = 0
  80. working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
  81. w = 0
  82. while len(jobs) != finished_job_counter:
  83. sleep(1)
  84. print(
  85. "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
  86. working[w],
  87. working[w],
  88. working[w],
  89. len(jobs),
  90. finished_job_counter,
  91. n.value,
  92. len(pcap_csv_list),
  93. round((n.value / len(pcap_csv_list)) * 100, 2),
  94. ),
  95. end="",
  96. )
  97. finished_job_counter = 0
  98. for j in jobs:
  99. if not j.is_alive():
  100. finished_job_counter += 1
  101. if (w + 1) % len(working) == 0:
  102. w = 0
  103. else:
  104. w += 1
  105. print("\r\nSorting table...")
  106. transmission_df = pd.concat(frame_list)
  107. frame_list = None
  108. transmission_df = transmission_df.sort_index()
  109. print("Calculate goodput...")
  110. transmission_df["goodput"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
  111. transmission_df["goodput"] = transmission_df["goodput"].apply(
  112. lambda x: ((x * 8) / args.interval) / 10**6
  113. )
  114. # remove all not needed columns
  115. transmission_df = transmission_df.filter(["goodput", "datetime"])
  116. # read serial csv
  117. serial_df = pd.read_csv(args.serial_file)
  118. serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
  119. serial_df = serial_df.set_index("datetime")
  120. serial_df.index = pd.to_datetime(serial_df.index)
  121. transmission_df = pd.merge_asof(
  122. transmission_df,
  123. serial_df,
  124. tolerance=pd.Timedelta("1s"),
  125. right_index=True,
  126. left_index=True,
  127. )
  128. scaley = 1.5
  129. scalex = 1.0
  130. plt.figure(figsize=[6.4 * scaley, 4.8 * scalex])
  131. host = host_subplot(111, axes_class=axisartist.Axes)
  132. plt.subplots_adjust()
  133. # additional y axes
  134. par11 = host.twinx()
  135. par12 = host.twinx()
  136. # par13 = host.twinx()
  137. # axes offset
  138. par12.axis["right"] = par12.new_fixed_axis(loc="right", offset=(60, 0))
  139. # par13.axis["right"] = par13.new_fixed_axis(loc="right", offset=(120, 0))
  140. par11.axis["right"].toggle(all=True)
  141. par12.axis["right"].toggle(all=True)
  142. # par13.axis["right"].toggle(all=True)
  143. host.plot(transmission_df["goodput"], "-", color="blue", label="goodput" )
  144. host.set_xlabel("datetime")
  145. host.set_ylabel("goodput [Mbps]")
  146. #host.set_ylim([0, 13])
  147. #host.set_yscale("log")
  148. #host.set_yscale("log")
  149. #host.set_yscale("log")
  150. #host.set_yscale("log")
  151. par11.plot(transmission_df["downlink_cqi"], "--", color="green", label="CQI")
  152. par11.set_ylabel("CQI")
  153. par11.set_ylim([0, 15])
  154. par12.plot()
  155. if args.save:
  156. plt.savefig("{}timeline_plot.pdf".format(args.save))
  157. else:
  158. plt.show()
  159. plt.clf()
  160. # Get the frequency, PDF and CDF for each value in the series
  161. # Frequency
  162. stats_df = transmission_df \
  163. .groupby("goodput") \
  164. ["goodput"] \
  165. .agg("count") \
  166. .pipe(pd.DataFrame) \
  167. .rename(columns={"goodput": 'frequency'})
  168. # PDF
  169. stats_df['pdf'] = stats_df['frequency'] / sum(stats_df['frequency'])
  170. # CDF
  171. stats_df['cdf'] = stats_df['pdf'].cumsum()
  172. stats_df = stats_df.reset_index()
  173. stats_df.plot(x="goodput", y=["cdf"], grid=True)
  174. if args.save:
  175. plt.savefig("{}cdf_plot.pdf".format(args.save))
  176. else:
  177. plt.show()