您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

227 行
7.3KB

  1. #!/usr/bin/env python3
  2. import multiprocessing
  3. import os
  4. import pickle
  5. from argparse import ArgumentParser
  6. from math import ceil
  7. from time import sleep
  8. import matplotlib
  9. import pandas as pd
  10. import matplotlib.pyplot as plt
  11. from mpl_toolkits import axisartist
  12. from mpl_toolkits.axes_grid1 import host_subplot
  13. def csv_to_dataframe(csv_list, folder, dummy):
  14. global n
  15. global frame_list
  16. tmp_df = None
  17. for csv in csv_list:
  18. tmp_df = pd.read_csv(
  19. "{}{}".format(folder, csv),
  20. dtype=dict(is_retranmission=bool, is_dup_ack=bool),
  21. )
  22. tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1)
  23. tmp_df = tmp_df.set_index("datetime")
  24. tmp_df.index = pd.to_datetime(tmp_df.index)
  25. if tmp_df is None:
  26. tmp_df = tmp_df
  27. else:
  28. tmp_df = pd.concat([tmp_df, tmp_df])
  29. n.value += 1
  30. tmp_df = tmp_df.filter(
  31. ["srtt", "datetime", "payload_size", "congestion_control", "direction"])
  32. frame_list.append(tmp_df)
  33. del tmp_df
  34. from itertools import islice
  35. def chunk(it, size):
  36. it = iter(it)
  37. return iter(lambda: tuple(islice(it, size)), ())
  38. def plot_cdf(dataframe, column_name, axis=None):
  39. stats_df = dataframe \
  40. .groupby(column_name) \
  41. [column_name] \
  42. .agg("count") \
  43. .pipe(pd.DataFrame) \
  44. .rename(columns={column_name: "frequency"})
  45. # PDF
  46. stats_df["PDF"] = stats_df["frequency"] / sum(stats_df["frequency"])
  47. # CDF
  48. stats_df["CDF"] = stats_df["PDF"].cumsum()
  49. stats_df = stats_df.reset_index()
  50. if axis:
  51. stats_df.plot(x=column_name, y=["CDF"], grid=True, ax=axis)
  52. else:
  53. stats_df.plot(x=column_name, y=["CDF"], grid=True)
  54. del stats_df
  55. if __name__ == "__main__":
  56. parser = ArgumentParser()
  57. parser.add_argument("-s", "--serials", required=True, help="Serial csv files. Comma separated.")
  58. parser.add_argument("-f", "--folders", required=True, help="PCAP csv folders. Comma separated.")
  59. parser.add_argument("--save", default=None, help="Location to save pdf file.")
  60. parser.add_argument(
  61. "-c",
  62. "--cores",
  63. default=1,
  64. type=int,
  65. help="Number of cores for multiprocessing.",
  66. )
  67. parser.add_argument(
  68. "-i",
  69. "--interval",
  70. default=2,
  71. type=int,
  72. help="Time interval for rolling window.",
  73. )
  74. args = parser.parse_args()
  75. transmission_df_list = list()
  76. for f in args.folders.split(","):
  77. manager = multiprocessing.Manager()
  78. n = manager.Value("i", 0)
  79. frame_list = manager.list()
  80. jobs = []
  81. # load all pcap csv into one dataframe
  82. pcap_csv_list = list()
  83. for filename in os.listdir(f):
  84. if filename.endswith(".csv") and "tcp" in filename:
  85. pcap_csv_list.append(filename)
  86. parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores))
  87. print("Start processing with {} jobs.".format(args.cores))
  88. for p in parts:
  89. process = multiprocessing.Process(target=csv_to_dataframe, args=(p, f, "dummy"))
  90. jobs.append(process)
  91. for j in jobs:
  92. j.start()
  93. print("Started all jobs.")
  94. # Ensure all the processes have finished
  95. finished_job_counter = 0
  96. working = ["|", "/", "-", "\\", "|", "/", "-", "\\"]
  97. w = 0
  98. while len(jobs) != finished_job_counter:
  99. sleep(1)
  100. print(
  101. "\r\t{}{}{}\t Running {} jobs ({} finished). Processed {} out of {} pcap csv files. ({}%) ".format(
  102. working[w],
  103. working[w],
  104. working[w],
  105. len(jobs),
  106. finished_job_counter,
  107. n.value,
  108. len(pcap_csv_list),
  109. round((n.value / len(pcap_csv_list)) * 100, 2),
  110. ),
  111. end="",
  112. )
  113. finished_job_counter = 0
  114. for j in jobs:
  115. if not j.is_alive():
  116. finished_job_counter += 1
  117. if (w + 1) % len(working) == 0:
  118. w = 0
  119. else:
  120. w += 1
  121. print("\r\nSorting table...")
  122. transmission_df = pd.concat(frame_list)
  123. frame_list = None
  124. transmission_df = transmission_df.sort_index()
  125. #
  126. # Don't forget to add new columns to the filter argument in the function above!
  127. #
  128. transmission_df["srtt"] = transmission_df["srtt"].apply(lambda x: x / 10 ** 6)
  129. # key for columns and level for index
  130. transmission_df["goodput"] = transmission_df["payload_size"].groupby(pd.Grouper(level="datetime", freq="{}s".format(args.interval))).transform("sum")
  131. transmission_df["goodput"] = transmission_df["goodput"].apply(
  132. lambda x: ((x * 8) / args.interval) / 10**6
  133. )
  134. transmission_df["goodput_rolling"] = transmission_df["payload_size"].rolling("{}s".format(args.interval)).sum()
  135. transmission_df["goodput_rolling"] = transmission_df["goodput_rolling"].apply(
  136. lambda x: ((x * 8) / args.interval) / 10 ** 6
  137. )
  138. # set meta values
  139. cc_algo = transmission_df["congestion_control"].iloc[0]
  140. cc_algo = cc_algo.upper()
  141. transmission_direction = transmission_df["direction"].iloc[0]
  142. #transmission_df = transmission_df.filter(["srtt", "datetime", "srtt", "payload_size"])
  143. transmission_df = transmission_df.drop(columns=["congestion_control", "direction"])
  144. # read serial csv
  145. #serial_df = pd.read_csv(args.serial_file)
  146. #serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1)
  147. #serial_df = serial_df.set_index("datetime")
  148. #serial_df.index = pd.to_datetime(serial_df.index)
  149. #serial_df.sort_index()
  150. #transmission_df = pd.merge_asof(
  151. # transmission_df,
  152. # serial_df,
  153. # tolerance=pd.Timedelta("1s"),
  154. # right_index=True,
  155. # left_index=True,
  156. #)
  157. transmission_df_list.append(dict(
  158. df=transmission_df,
  159. cc_algo=cc_algo,
  160. transmission_direction=transmission_direction
  161. ))
  162. del transmission_df
  163. # Plot sRTT CDF
  164. legend = list()
  165. plot_cdf(transmission_df_list[0]["df"], "srtt")
  166. legend.append(transmission_df_list[0]["cc_algo"])
  167. for i in range(1, len(transmission_df_list)):
  168. plot_cdf(transmission_df_list[i]["df"], "srtt", axis=plt.gca())
  169. legend.append(transmission_df_list[i]["cc_algo"])
  170. plt.xscale("log")
  171. plt.xlabel("sRTT [s]")
  172. plt.ylabel("CDF")
  173. plt.legend(legend)
  174. plt.title("{}".format(transmission_df_list[0]["transmission_direction"]))
  175. plt.savefig("{}{}_cdf_compare_plot.pdf".format(args.save, "srtt"))
  176. plt.clf()
  177. # Plot goodput CDF
  178. legend = list()
  179. plot_cdf(transmission_df_list[0]["df"], "goodput")
  180. legend.append(transmission_df_list[0]["cc_algo"])
  181. for i in range(1, len(transmission_df_list)):
  182. plot_cdf(transmission_df_list[i]["df"], "goodput", axis=plt.gca())
  183. legend.append(transmission_df_list[i]["cc_algo"])
  184. plt.xlabel("goodput [mbps]")
  185. plt.ylabel("CDF")
  186. plt.legend(legend)
  187. plt.title("{}".format(transmission_df_list[0]["transmission_direction"]))
  188. plt.savefig("{}{}_cdf_compare_plot.pdf".format(args.save, "goodput"))