Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

211 lines
6.8KB

  1. #!/usr/bin/env python3
  2. import multiprocessing
  3. import os
  4. import pickle
  5. from argparse import ArgumentParser
  6. from math import ceil
  7. from time import sleep
  8. import matplotlib
  9. import pandas as pd
  10. import matplotlib.pyplot as plt
  11. from mpl_toolkits import axisartist
  12. from mpl_toolkits.axes_grid1 import host_subplot
  13. def csv_to_dataframe(csv_list, folder, dummy):
  14. global n
  15. global frame_list
  16. transmission_df = None
  17. for csv in csv_list:
  18. tmp_df = pd.read_csv(
  19. "{}{}".format(folder, csv),
  20. dtype=dict(is_retranmission=bool, is_dup_ack=bool),
  21. )
  22. tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
  23. tmp_df = tmp_df.set_index("datetime")
  24. tmp_df.index = pd.to_datetime(tmp_df.index)
  25. if transmission_df is None:
  26. transmission_df = tmp_df
  27. else:
  28. transmission_df = pd.concat([transmission_df, tmp_df])
  29. n.value += 1
  30. frame_list.append(transmission_df)
  31. from itertools import islice
  32. def chunk(it, size):
  33. it = iter(it)
  34. return iter(lambda: tuple(islice(it, size)), ())
  35. def plot_cdf(dataframe, column_name, axis=None):
  36. stats_df = dataframe \
  37. .groupby(column_name) \
  38. [column_name] \
  39. .agg("count") \
  40. .pipe(pd.DataFrame) \
  41. .rename(columns={column_name: "frequency"})
  42. # PDF
  43. stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"])
  44. # CDF
  45. stats_df["CDF"] = stats_df["PDF"].cumsum()
  46. stats_df = stats_df.reset_index()
  47. if axis:
  48. stats_df.plot(x=column_name, y=["CDF"], grid=True, ax=axis)
  49. else:
  50. stats_df.plot(x=column_name, y=["CDF"], grid=True)
  51. return axis
  52. if __name__ == "__main__":
  53. parser = ArgumentParser()
  54. parser.add_argument("--serial1", required=True, help="Serial csv file1.")
  55. parser.add_argument("--serial2", required=True, help="Serial csv file2.")
  56. parser.add_argument("--folder1", required=True, help="PCAP csv folder1.")
  57. parser.add_argument("--folder2", required=True, help="PCAP csv folder2.")
  58. parser.add_argument("--save", default=None, help="Location to save pdf file.")
  59. parser.add_argument(
  60. "-c",
  61. "--cores",
  62. default=1,
  63. type=int,
  64. help="Number of cores for multiprocessing.",
  65. )
  66. parser.add_argument(
  67. "-i",
  68. "--interval",
  69. default=2,
  70. type=int,
  71. help="Time interval for rolling window.",
  72. )
  73. args = parser.parse_args()
  74. transmission_df_list = list()
  75. for f in [args.folder1, args.folder2]:
  76. manager = multiprocessing.Manager()
  77. n = manager.Value("i", 0)
  78. frame_list = manager.list()
  79. jobs = []
  80. # load all pcap csv into one dataframe
  81. pcap_csv_list = list()
  82. for filename in os.listdir(f):
  83. if filename.endswith(".csv") and "tcp" in filename:
  84. pcap_csv_list.append(filename)
  85. parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
  86. print("Start processing with {} jobs.".format(args.cores))
  87. for p in parts:
  88. process = multiprocessing.Process(target=csv_to_dataframe, args=(p, f, "dummy"))
  89. jobs.append(process)
  90. for j in jobs:
  91. j.start()
  92. print("Started all jobs.")
  93. # Ensure all the processes have finished
  94. finished_job_counter = 0
  95. working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
  96. w = 0
  97. while len(jobs) != finished_job_counter:
  98. sleep(1)
  99. print(
  100. "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
  101. working[w],
  102. working[w],
  103. working[w],
  104. len(jobs),
  105. finished_job_counter,
  106. n.value,
  107. len(pcap_csv_list),
  108. round((n.value / len(pcap_csv_list)) * 100, 2),
  109. ),
  110. end="",
  111. )
  112. finished_job_counter = 0
  113. for j in jobs:
  114. if not j.is_alive():
  115. finished_job_counter += 1
  116. if (w + 1) % len(working) == 0:
  117. w = 0
  118. else:
  119. w += 1
  120. print("\r\nSorting table...")
  121. transmission_df = pd.concat(frame_list)
  122. frame_list = None
  123. transmission_df = transmission_df.sort_index()
  124. print("Calculate goodput...")
  125. transmission_df["srtt"] = transmission_df["srtt"].apply(lambda x: x / 10 ** 6)
  126. # key for columns and level for index
  127. transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum")
  128. transmission_df["goodput"] = transmission_df["goodput"].apply(
  129. lambda x: ((x * 8) / args.interval) / 10**6
  130. )
  131. transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
  132. transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply(
  133. lambda x: ((x * 8) / args.interval) / 10 ** 6
  134. )
  135. # set meta values
  136. cc_algo = transmission_df["congestion_control"].iloc[0]
  137. cc_algo = cc_algo.upper()
  138. transmission_direction = transmission_df["direction"].iloc[0]
  139. # read serial csv
  140. #serial_df = pd.read_csv(args.serial_file)
  141. #serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
  142. #serial_df = serial_df.set_index("datetime")
  143. #serial_df.index = pd.to_datetime(serial_df.index)
  144. #serial_df.sort_index()
  145. #transmission_df = pd.merge_asof(
  146. # transmission_df,
  147. # serial_df,
  148. # tolerance=pd.Timedelta("1s"),
  149. # right_index=True,
  150. # left_index=True,
  151. #)
  152. transmission_df_list.append(dict(
  153. df=transmission_df,
  154. cc_algo=cc_algo,
  155. transmission_direction=transmission_direction
  156. ))
  157. # Plot sRTT CDF
  158. ax = plot_cdf(transmission_df_list[0]["df"], "srtt")
  159. plot_cdf(transmission_df_list[1]["df"], "srtt", axis=ax)
  160. plt.xscale("log")
  161. plt.xlabel("sRTT [s]")
  162. plt.ylabel("CDF")
  163. plt.legend([transmission_df_list[0]["cc_algo"], transmission_df_list[1]["cc_algo"]])
  164. plt.title("{}".format(transmission_direction))
  165. plt.savefig("{}{}_cdf_compare_plot.pdf".format(args.save, "srtt"))
  166. plt.clf()
  167. # Plot goodput CDF
  168. ax = plot_cdf(transmission_df_list[0]["df"], "goodput")
  169. plot_cdf(transmission_df_list[1]["df"], "goodput", axis=ax)
  170. plt.xlabel("goodput [mbps]")
  171. plt.ylabel("CDF")
  172. plt.legend([transmission_df_list[0]["cc_algo"], transmission_df_list[1]["cc_algo"]])
  173. plt.title("{}".format(transmission_direction))
  174. plt.savefig("{}{}_cdf_compare_plot.pdf".format(args.save, "goodput"))