Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

277 lines
8.4KB

  1. #!/usr/bin/env python3
  2. import multiprocessing
  3. import os
  4. from argparse import ArgumentParser
  5. from math import ceil
  6. from time import sleep
  7. import matplotlib
  8. import pandas as pd
  9. import matplotlib.pyplot as plt
  10. from mpl_toolkits import axisartist
  11. from mpl_toolkits.axes_grid1 import host_subplot
  12. def csv_to_dataframe(csv_list, dummy):
  13. global n
  14. global frame_list
  15. transmission_df = None
  16. for csv in csv_list:
  17. tmp_df = pd.read_csv(
  18. "{}{}".format(args.pcap_csv_folder, csv),
  19. dtype=dict(is_retranmission=bool, is_dup_ack=bool),
  20. )
  21. tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
  22. tmp_df = tmp_df.set_index("datetime")
  23. tmp_df.index = pd.to_datetime(tmp_df.index)
  24. if transmission_df is None:
  25. transmission_df = tmp_df
  26. else:
  27. transmission_df = pd.concat([transmission_df, tmp_df])
  28. n.value += 1
  29. frame_list.append(transmission_df)
  30. from itertools import islice
  31. def chunk(it, size):
  32. it = iter(it)
  33. return iter(lambda: tuple(islice(it, size)), ())
  34. def plot_cdf(dataframe, column_name):
  35. stats_df = dataframe \
  36. .groupby(column_name) \
  37. [column_name] \
  38. .agg("count") \
  39. .pipe(pd.DataFrame) \
  40. .rename(columns={column_name: "frequency"})
  41. # PDF
  42. stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"])
  43. # CDF
  44. stats_df["CDF"] = stats_df["PDF"].cumsum()
  45. stats_df = stats_df.reset_index()
  46. stats_df.plot(x=column_name, y=["CDF"], grid=True)
  47. if __name__ == "__main__":
  48. parser = ArgumentParser()
  49. parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
  50. parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
  51. parser.add_argument("--save", default=None, help="Location to save pdf file.")
  52. parser.add_argument(
  53. "-c",
  54. "--cores",
  55. default=1,
  56. type=int,
  57. help="Number of cores for multiprocessing.",
  58. )
  59. parser.add_argument(
  60. "-i",
  61. "--interval",
  62. default=10,
  63. type=int,
  64. help="Time interval for rolling window.",
  65. )
  66. args = parser.parse_args()
  67. manager = multiprocessing.Manager()
  68. n = manager.Value("i", 0)
  69. frame_list = manager.list()
  70. jobs = []
  71. # load all pcap csv into one dataframe
  72. pcap_csv_list = list()
  73. for filename in os.listdir(args.pcap_csv_folder):
  74. if filename.endswith(".csv") and "tcp" in filename:
  75. pcap_csv_list.append(filename)
  76. parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
  77. print("Start processing with {} jobs.".format(args.cores))
  78. for p in parts:
  79. process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
  80. jobs.append(process)
  81. for j in jobs:
  82. j.start()
  83. print("Started all jobs.")
  84. # Ensure all the processes have finished
  85. finished_job_counter = 0
  86. working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
  87. w = 0
  88. while len(jobs) != finished_job_counter:
  89. sleep(1)
  90. print(
  91. "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
  92. working[w],
  93. working[w],
  94. working[w],
  95. len(jobs),
  96. finished_job_counter,
  97. n.value,
  98. len(pcap_csv_list),
  99. round((n.value / len(pcap_csv_list)) * 100, 2),
  100. ),
  101. end="",
  102. )
  103. finished_job_counter = 0
  104. for j in jobs:
  105. if not j.is_alive():
  106. finished_job_counter += 1
  107. if (w + 1) % len(working) == 0:
  108. w = 0
  109. else:
  110. w += 1
  111. print("\r\nSorting table...")
  112. transmission_df = pd.concat(frame_list)
  113. frame_list = None
  114. transmission_df = transmission_df.sort_index()
  115. print("Calculate goodput...")
  116. #print(transmission_df)
  117. # key for columns and level for index
  118. transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum")
  119. transmission_df["goodput"] = transmission_df["goodput"].apply(
  120. lambda x: ((x * 8) / args.interval) / 10**6
  121. )
  122. transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
  123. transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply(
  124. lambda x: ((x * 8) / args.interval) / 10 ** 6
  125. )
  126. # set meta values and remove all not needed columns
  127. cc_algo = transmission_df["congestion_control"].iloc[0]
  128. cc_algo = cc_algo.upper()
  129. transmission_direction = transmission_df["direction"].iloc[0]
  130. transmission_df = transmission_df.filter(["goodput", "datetime", "ack_rtt", "goodput_rolling"])
  131. # read serial csv
  132. serial_df = pd.read_csv(args.serial_file)
  133. serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
  134. serial_df = serial_df.set_index("datetime")
  135. serial_df.index = pd.to_datetime(serial_df.index)
  136. serial_df.sort_index()
  137. transmission_df = pd.merge_asof(
  138. transmission_df,
  139. serial_df,
  140. tolerance=pd.Timedelta("1s"),
  141. right_index=True,
  142. left_index=True,
  143. )
  144. # transmission timeline
  145. scaley = 1.5
  146. scalex = 1.0
  147. fig, ax = plt.subplots(figsize=[6.4 * scaley, 4.8 * scalex])
  148. plt.title("{} with {}".format(transmission_direction, cc_algo))
  149. fig.subplots_adjust(right=0.75)
  150. twin1 = ax.twinx()
  151. twin2 = ax.twinx()
  152. # Offset the right spine of twin2. The ticks and label have already been
  153. # placed on the right by twinx above.
  154. twin2.spines.right.set_position(("axes", 1.2))
  155. # create list fo color indices
  156. transmission_df["index"] = transmission_df.index
  157. color_dict = dict()
  158. color_list = list()
  159. i = 0
  160. for cell_id in transmission_df["cellID"]:
  161. if cell_id not in color_dict:
  162. color_dict[cell_id] = i
  163. i += 1
  164. color_list.append(color_dict[cell_id])
  165. transmission_df["cell_color"] = color_list
  166. color_dict = None
  167. color_list = None
  168. cmap = matplotlib.cm.get_cmap("Set3")
  169. unique_cells = transmission_df["cell_color"].unique()
  170. color_list = cmap.colors * (round(len(unique_cells) / len(cmap.colors)) + 1)
  171. for c in transmission_df["cell_color"].unique():
  172. bounds = transmission_df[["index", "cell_color"]].groupby("cell_color").agg(["min", "max"]).loc[c]
  173. ax.axvspan(bounds.min(), bounds.max(), alpha=0.3, color=color_list[c])
  174. p1, = ax.plot(transmission_df["goodput_rolling"], "-", color="blue", label="goodput")
  175. p2, = twin1.plot(transmission_df["downlink_cqi"], "--", color="green", label="CQI")
  176. p3, = twin2.plot(transmission_df["ack_rtt"], "-.", color="red", label="ACK RTT")
  177. ax.set_xlim(transmission_df["index"].min(), transmission_df["index"].max())
  178. ax.set_ylim(0, 500)
  179. twin1.set_ylim(0, 15)
  180. twin2.set_ylim(0, 1)
  181. ax.set_xlabel("Time")
  182. ax.set_ylabel("Goodput")
  183. twin1.set_ylabel("CQI")
  184. twin2.set_ylabel("ACK RTT")
  185. ax.yaxis.label.set_color(p1.get_color())
  186. twin1.yaxis.label.set_color(p2.get_color())
  187. twin2.yaxis.label.set_color(p3.get_color())
  188. tkw = dict(size=4, width=1.5)
  189. ax.tick_params(axis='y', colors=p1.get_color(), **tkw)
  190. twin1.tick_params(axis='y', colors=p2.get_color(), **tkw)
  191. twin2.tick_params(axis='y', colors=p3.get_color(), **tkw)
  192. ax.tick_params(axis='x', **tkw)
  193. #ax.legend(handles=[p1, p2, p3])
  194. if args.save:
  195. plt.savefig("{}timeline_plot.pdf".format(args.save))
  196. else:
  197. plt.show()
  198. #goodput cdf
  199. plt.clf()
  200. print("Calculate and polt goodput CDF...")
  201. plot_cdf(transmission_df, "goodput")
  202. plt.xlabel("goodput [mbps]")
  203. plt.ylabel("CDF")
  204. plt.legend([cc_algo])
  205. plt.title("{} with {}".format(transmission_direction, cc_algo))
  206. if args.save:
  207. plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "goodput"))
  208. else:
  209. plt.show()
  210. # rtt cdf
  211. plt.clf()
  212. print(transmission_df["ack_rtt"])
  213. print("Calculate and polt rtt CDF...")
  214. plot_cdf(transmission_df, "ack_rtt")
  215. plt.xlabel("ACK RTT [s]")
  216. plt.ylabel("CDF")
  217. plt.xscale("log")
  218. plt.legend([cc_algo])
  219. plt.title("{} with {}".format(transmission_direction, cc_algo))
  220. if args.save:
  221. plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "ack_rtt"))
  222. else:
  223. plt.show()