Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

228 lines
6.7KB

  1. #!/usr/bin/env python3
  2. import multiprocessing
  3. import os
  4. from argparse import ArgumentParser
  5. from math import ceil
  6. from time import sleep
  7. import pandas as pd
  8. import matplotlib.pyplot as plt
  9. from mpl_toolkits import axisartist
  10. from mpl_toolkits.axes_grid1 import host_subplot
  11. def csv_to_dataframe(csv_list, dummy):
  12. global n
  13. global frame_list
  14. transmission_df = None
  15. for csv in csv_list:
  16. tmp_df = pd.read_csv(
  17. "{}{}".format(args.pcap_csv_folder, csv),
  18. dtype=dict(is_retranmission=bool, is_dup_ack=bool),
  19. )
  20. tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
  21. tmp_df = tmp_df.set_index("datetime")
  22. tmp_df.index = pd.to_datetime(tmp_df.index)
  23. if transmission_df is None:
  24. transmission_df = tmp_df
  25. else:
  26. transmission_df = pd.concat([transmission_df, tmp_df])
  27. n.value += 1
  28. frame_list.append(transmission_df)
  29. from itertools import islice
  30. def chunk(it, size):
  31. it = iter(it)
  32. return iter(lambda: tuple(islice(it, size)), ())
  33. def plot_cdf(dataframe, column_name):
  34. stats_df = dataframe \
  35. .groupby(column_name) \
  36. [column_name] \
  37. .agg("count") \
  38. .pipe(pd.DataFrame) \
  39. .rename(columns={column_name: "frequency"})
  40. # PDF
  41. stats_df["pdf"] = stats_df["frequency"] / sum(stats_df["frequency"])
  42. # CDF
  43. stats_df["cdf"] = stats_df["pdf"].cumsum()
  44. stats_df = stats_df.reset_index()
  45. stats_df.plot(x=column_name, y=["cdf"], grid=True)
  46. if __name__ == "__main__":
  47. parser = ArgumentParser()
  48. parser.add_argument("-f", "--gps_file", required=True, help="GPS csv file.")
  49. parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
  50. parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
  51. parser.add_argument("--save", default=None, help="Location to save pdf file.")
  52. parser.add_argument(
  53. "--show_providerinfo",
  54. default=False,
  55. help="Show providerinfo for map tiles an zoom levels.",
  56. )
  57. parser.add_argument(
  58. "-c",
  59. "--cores",
  60. default=1,
  61. type=int,
  62. help="Number of cores for multiprocessing.",
  63. )
  64. parser.add_argument(
  65. "-i",
  66. "--interval",
  67. default=10,
  68. type=int,
  69. help="Time interval for rolling window.",
  70. )
  71. args = parser.parse_args()
  72. manager = multiprocessing.Manager()
  73. n = manager.Value("i", 0)
  74. frame_list = manager.list()
  75. jobs = []
  76. # load all pcap csv into one dataframe
  77. pcap_csv_list = list()
  78. for filename in os.listdir(args.pcap_csv_folder):
  79. if filename.endswith(".csv") and "tcp" in filename:
  80. pcap_csv_list.append(filename)
  81. parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
  82. print("Start processing with {} jobs.".format(args.cores))
  83. for p in parts:
  84. process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
  85. jobs.append(process)
  86. for j in jobs:
  87. j.start()
  88. print("Started all jobs.")
  89. # Ensure all of the processes have finished
  90. finished_job_counter = 0
  91. working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
  92. w = 0
  93. while len(jobs) != finished_job_counter:
  94. sleep(1)
  95. print(
  96. "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
  97. working[w],
  98. working[w],
  99. working[w],
  100. len(jobs),
  101. finished_job_counter,
  102. n.value,
  103. len(pcap_csv_list),
  104. round((n.value / len(pcap_csv_list)) * 100, 2),
  105. ),
  106. end="",
  107. )
  108. finished_job_counter = 0
  109. for j in jobs:
  110. if not j.is_alive():
  111. finished_job_counter += 1
  112. if (w + 1) % len(working) == 0:
  113. w = 0
  114. else:
  115. w += 1
  116. print("\r\nSorting table...")
  117. transmission_df = pd.concat(frame_list)
  118. frame_list = None
  119. transmission_df = transmission_df.sort_index()
  120. print("Calculate goodput...")
  121. print(transmission_df)
  122. range_start_time = transmission_df.index.min()
  123. range_sum_interval = "{}s".format(args.interval)
  124. # create timedelta range with maximum timedelta
  125. time_range = pd.timedelta_range(pd.to_timedelta(range_start_time), transmission_df.index.max(), freq=range_sum_interval)
  126. # create bins by pd.cut, aggregate sum
  127. transmission_df = transmission_df.groupby(pd.cut(transmission_df.index, bins=time_range, labels=time_range[:-1]))["goodput"].sum().reset_index()
  128. #transmission_df["goodput"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
  129. transmission_df["goodput"] = transmission_df["goodput"].apply(
  130. lambda x: ((x * 8) / args.interval) / 10**6
  131. )
  132. # remove all not needed columns
  133. transmission_df = transmission_df.filter(["goodput", "datetime"])
  134. # read serial csv
  135. serial_df = pd.read_csv(args.serial_file)
  136. serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
  137. serial_df = serial_df.set_index("datetime")
  138. serial_df.index = pd.to_datetime(serial_df.index)
  139. transmission_df = pd.merge_asof(
  140. transmission_df,
  141. serial_df,
  142. tolerance=pd.Timedelta("1s"),
  143. right_index=True,
  144. left_index=True,
  145. )
  146. scaley = 1.5
  147. scalex = 1.0
  148. plt.figure(figsize=[6.4 * scaley, 4.8 * scalex])
  149. host = host_subplot(111, axes_class=axisartist.Axes)
  150. plt.subplots_adjust()
  151. # additional y axes
  152. par11 = host.twinx()
  153. par12 = host.twinx()
  154. # par13 = host.twinx()
  155. # axes offset
  156. par12.axis["right"] = par12.new_fixed_axis(loc="right", offset=(60, 0))
  157. # par13.axis["right"] = par13.new_fixed_axis(loc="right", offset=(120, 0))
  158. par11.axis["right"].toggle(all=True)
  159. par12.axis["right"].toggle(all=True)
  160. # par13.axis["right"].toggle(all=True)
  161. host.plot(transmission_df["goodput"], "-", color="blue", label="goodput" )
  162. host.set_xlabel("datetime")
  163. host.set_ylabel("goodput [Mbps]")
  164. #host.set_ylim([0, 13])
  165. #host.set_yscale("log")
  166. #host.set_yscale("log")
  167. #host.set_yscale("log")
  168. #host.set_yscale("log")
  169. par11.plot(transmission_df["downlink_cqi"], "--", color="green", label="CQI")
  170. par11.set_ylabel("CQI")
  171. par11.set_ylim([0, 15])
  172. par12.plot()
  173. if args.save:
  174. plt.savefig("{}timeline_plot.pdf".format(args.save))
  175. else:
  176. plt.show()
  177. plt.clf()
  178. print("Calculate and polt CDF...")
  179. plot_cdf(transmission_df, "goodput")
  180. if args.save:
  181. plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "goodput"))
  182. else:
  183. plt.show()