Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

281 line
8.6KB

  1. #!/usr/bin/env python3
  2. import multiprocessing
  3. import os
  4. import pickle
  5. from argparse import ArgumentParser
  6. from math import ceil
  7. from time import sleep
  8. import matplotlib
  9. import pandas as pd
  10. import matplotlib.pyplot as plt
  11. from mpl_toolkits import axisartist
  12. from mpl_toolkits.axes_grid1 import host_subplot
  13. def csv_to_dataframe(csv_list, dummy):
  14. global n
  15. global frame_list
  16. transmission_df = None
  17. for csv in csv_list:
  18. tmp_df = pd.read_csv(
  19. "{}{}".format(args.pcap_csv_folder, csv),
  20. dtype=dict(is_retranmission=bool, is_dup_ack=bool),
  21. )
  22. tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
  23. tmp_df = tmp_df.set_index("datetime")
  24. tmp_df.index = pd.to_datetime(tmp_df.index)
  25. if transmission_df is None:
  26. transmission_df = tmp_df
  27. else:
  28. transmission_df = pd.concat([transmission_df, tmp_df])
  29. n.value += 1
  30. frame_list.append(transmission_df)
  31. from itertools import islice
  32. def chunk(it, size):
  33. it = iter(it)
  34. return iter(lambda: tuple(islice(it, size)), ())
  35. def plot_cdf(dataframe, column_name):
  36. stats_df = dataframe \
  37. .groupby(column_name) \
  38. [column_name] \
  39. .agg("count") \
  40. .pipe(pd.DataFrame) \
  41. .rename(columns={column_name: "frequency"})
  42. # PDF
  43. stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"])
  44. # CDF
  45. stats_df["CDF"] = stats_df["PDF"].cumsum()
  46. stats_df = stats_df.reset_index()
  47. stats_df.plot(x=column_name, y=["CDF"], grid=True)
  48. if __name__ == "__main__":
  49. parser = ArgumentParser()
  50. parser.add_argument("-s", "--serial_file", required=True, help="Serial csv file.")
  51. parser.add_argument("-p", "--pcap_csv_folder", required=True, help="PCAP csv folder.")
  52. parser.add_argument("--save", default=None, help="Location to save pdf file.")
  53. parser.add_argument("--export", default=None, help="Export figure as an pickle file.")
  54. parser.add_argument(
  55. "-c",
  56. "--cores",
  57. default=1,
  58. type=int,
  59. help="Number of cores for multiprocessing.",
  60. )
  61. parser.add_argument(
  62. "-i",
  63. "--interval",
  64. default=10,
  65. type=int,
  66. help="Time interval for rolling window.",
  67. )
  68. args = parser.parse_args()
  69. manager = multiprocessing.Manager()
  70. n = manager.Value("i", 0)
  71. frame_list = manager.list()
  72. jobs = []
  73. # load all pcap csv into one dataframe
  74. pcap_csv_list = list()
  75. for filename in os.listdir(args.pcap_csv_folder):
  76. if filename.endswith(".csv") and "tcp" in filename:
  77. pcap_csv_list.append(filename)
  78. parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
  79. print("Start processing with {} jobs.".format(args.cores))
  80. for p in parts:
  81. process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy"))
  82. jobs.append(process)
  83. for j in jobs:
  84. j.start()
  85. print("Started all jobs.")
  86. # Ensure all the processes have finished
  87. finished_job_counter = 0
  88. working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
  89. w = 0
  90. while len(jobs) != finished_job_counter:
  91. sleep(1)
  92. print(
  93. "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
  94. working[w],
  95. working[w],
  96. working[w],
  97. len(jobs),
  98. finished_job_counter,
  99. n.value,
  100. len(pcap_csv_list),
  101. round((n.value / len(pcap_csv_list)) * 100, 2),
  102. ),
  103. end="",
  104. )
  105. finished_job_counter = 0
  106. for j in jobs:
  107. if not j.is_alive():
  108. finished_job_counter += 1
  109. if (w + 1) % len(working) == 0:
  110. w = 0
  111. else:
  112. w += 1
  113. print("\r\nSorting table...")
  114. transmission_df = pd.concat(frame_list)
  115. frame_list = None
  116. transmission_df = transmission_df.sort_index()
  117. print("Calculate goodput...")
  118. #print(transmission_df)
  119. # srtt to [s]
  120. transmission_df["srtt"] = transmission_df["srtt"].apply(lambda x: x / 10 ** 6)
  121. # key for columns and level for index
  122. transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum")
  123. transmission_df["goodput"] = transmission_df["goodput"].apply(
  124. lambda x: ((x * 8) / args.interval) / 10**6
  125. )
  126. transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
  127. transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply(
  128. lambda x: ((x * 8) / args.interval) / 10 ** 6
  129. )
  130. # set meta values and remove all not needed columns
  131. cc_algo = transmission_df["congestion_control"].iloc[0]
  132. cc_algo = cc_algo.upper()
  133. transmission_direction = transmission_df["direction"].iloc[0]
  134. transmission_df = transmission_df.filter(["goodput", "datetime", "srtt", "goodput_rolling"])
  135. # read serial csv
  136. serial_df = pd.read_csv(args.serial_file)
  137. serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
  138. serial_df = serial_df.set_index("datetime")
  139. serial_df.index = pd.to_datetime(serial_df.index)
  140. serial_df.sort_index()
  141. transmission_df = pd.merge_asof(
  142. transmission_df,
  143. serial_df,
  144. tolerance=pd.Timedelta("1s"),
  145. right_index=True,
  146. left_index=True,
  147. )
  148. # transmission timeline
  149. scaley = 1.5
  150. scalex = 1.0
  151. fig, ax = plt.subplots(figsize=[6.4 * scaley, 4.8 * scalex])
  152. plt.title("{} with {}".format(transmission_direction, cc_algo))
  153. fig.subplots_adjust(right=0.75)
  154. twin1 = ax.twinx()
  155. twin2 = ax.twinx()
  156. # Offset the right spine of twin2. The ticks and label have already been
  157. # placed on the right by twinx above.
  158. twin2.spines.right.set_position(("axes", 1.2))
  159. # create list fo color indices
  160. transmission_df["index"] = transmission_df.index
  161. color_dict = dict()
  162. color_list = list()
  163. i = 0
  164. for cell_id in transmission_df["cellID"]:
  165. if cell_id not in color_dict:
  166. color_dict[cell_id] = i
  167. i += 1
  168. color_list.append(color_dict[cell_id])
  169. transmission_df["cell_color"] = color_list
  170. color_dict = None
  171. color_list = None
  172. cmap = matplotlib.cm.get_cmap("Set3")
  173. unique_cells = transmission_df["cell_color"].unique()
  174. color_list = cmap.colors * (round(len(unique_cells) / len(cmap.colors)) + 1)
  175. for c in transmission_df["cell_color"].unique():
  176. bounds = transmission_df[["index", "cell_color"]].groupby("cell_color").agg(["min", "max"]).loc[c]
  177. ax.axvspan(bounds.min(), bounds.max(), alpha=0.3, color=color_list[c])
  178. p1, = ax.plot(transmission_df["goodput_rolling"], "-", color="blue", label="goodput")
  179. p2, = twin1.plot(transmission_df["downlink_cqi"], "--", color="green", label="CQI")
  180. p3, = twin2.plot(transmission_df["srtt"], "-.", color="red", label="sRTT")
  181. ax.set_xlim(transmission_df["index"].min(), transmission_df["index"].max())
  182. ax.set_ylim(0, 500)
  183. twin1.set_ylim(0, 15)
  184. twin2.set_ylim(0, 1)
  185. ax.set_xlabel("Time")
  186. ax.set_ylabel("Goodput")
  187. twin1.set_ylabel("CQI")
  188. twin2.set_ylabel("sRTT")
  189. ax.yaxis.label.set_color(p1.get_color())
  190. twin1.yaxis.label.set_color(p2.get_color())
  191. twin2.yaxis.label.set_color(p3.get_color())
  192. tkw = dict(size=4, width=1.5)
  193. ax.tick_params(axis='y', colors=p1.get_color(), **tkw)
  194. twin1.tick_params(axis='y', colors=p2.get_color(), **tkw)
  195. twin2.tick_params(axis='y', colors=p3.get_color(), **tkw)
  196. ax.tick_params(axis='x', **tkw)
  197. #ax.legend(handles=[p1, p2, p3])
  198. if args.save:
  199. plt.savefig("{}timeline_plot.pdf".format(args.save))
  200. if args.export:
  201. pickle.dump(fig, open("{}timeline_plot.pkl".format(args.export), "wb"))
  202. #goodput cdf
  203. plt.clf()
  204. print("Calculate and polt goodput CDF...")
  205. plot_cdf(transmission_df, "goodput")
  206. plt.xlabel("goodput [mbps]")
  207. plt.ylabel("CDF")
  208. plt.legend([cc_algo])
  209. plt.title("{} with {}".format(transmission_direction, cc_algo))
  210. if args.save:
  211. plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "goodput"))
  212. else:
  213. plt.show()
  214. # rtt cdf
  215. plt.clf()
  216. print("Calculate and polt rtt CDF...")
  217. plot_cdf(transmission_df, "srtt")
  218. plt.xlabel("sRTT [s]")
  219. plt.ylabel("CDF")
  220. plt.xscale("log")
  221. plt.legend([cc_algo])
  222. plt.title("{} with {}".format(transmission_direction, cc_algo))
  223. if args.save:
  224. plt.savefig("{}{}_cdf_plot.pdf".format(args.save, "srtt"))
  225. else:
  226. plt.show()