| def csv_to_dataframe(csv_list, dummy): | |||||
| def csv_to_dataframe(csv_list, folder, dummy): | |||||
| global n | global n | ||||
| global frame_list | global frame_list | ||||
| for csv in csv_list: | for csv in csv_list: | ||||
| tmp_df = pd.read_csv( | tmp_df = pd.read_csv( | ||||
| "{}{}".format(args.pcap_csv_folder, csv), | |||||
| "{}{}".format(folder, csv), | |||||
| dtype=dict(is_retranmission=bool, is_dup_ack=bool), | dtype=dict(is_retranmission=bool, is_dup_ack=bool), | ||||
| ) | ) | ||||
| tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1) | tmp_df["datetime"] = pd.to_datetime(tmp_df["datetime"]) - pd.Timedelta(hours=1) | ||||
| # load all pcap csv into one dataframe | # load all pcap csv into one dataframe | ||||
| pcap_csv_list = list() | pcap_csv_list = list() | ||||
| for filename in os.listdir(args.pcap_csv_folder): | |||||
| for filename in os.listdir(f): | |||||
| if filename.endswith(".csv") and "tcp" in filename: | if filename.endswith(".csv") and "tcp" in filename: | ||||
| pcap_csv_list.append(filename) | pcap_csv_list.append(filename) | ||||
| parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores)) | parts = chunk(pcap_csv_list, ceil(len(pcap_csv_list) / args.cores)) | ||||
| print("Start processing with {} jobs.".format(args.cores)) | print("Start processing with {} jobs.".format(args.cores)) | ||||
| for p in parts: | for p in parts: | ||||
| process = multiprocessing.Process(target=csv_to_dataframe, args=(p, "dummy")) | |||||
| process = multiprocessing.Process(target=csv_to_dataframe, args=(p, f, "dummy")) | |||||
| jobs.append(process) | jobs.append(process) | ||||
| for j in jobs: | for j in jobs: | ||||
| transmission_direction = transmission_df["direction"].iloc[0] | transmission_direction = transmission_df["direction"].iloc[0] | ||||
| # read serial csv | # read serial csv | ||||
| serial_df = pd.read_csv(args.serial_file) | |||||
| serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1) | |||||
| serial_df = serial_df.set_index("datetime") | |||||
| serial_df.index = pd.to_datetime(serial_df.index) | |||||
| serial_df.sort_index() | |||||
| transmission_df = pd.merge_asof( | |||||
| transmission_df, | |||||
| serial_df, | |||||
| tolerance=pd.Timedelta("1s"), | |||||
| right_index=True, | |||||
| left_index=True, | |||||
| ) | |||||
| #serial_df = pd.read_csv(args.serial_file) | |||||
| #serial_df["datetime"] = pd.to_datetime(serial_df["datetime"]) - pd.Timedelta(hours=1) | |||||
| #serial_df = serial_df.set_index("datetime") | |||||
| #serial_df.index = pd.to_datetime(serial_df.index) | |||||
| #serial_df.sort_index() | |||||
| #transmission_df = pd.merge_asof( | |||||
| # transmission_df, | |||||
| # serial_df, | |||||
| # tolerance=pd.Timedelta("1s"), | |||||
| # right_index=True, | |||||
| # left_index=True, | |||||
| #) | |||||
| transmission_df_list.append(dict( | transmission_df_list.append(dict( | ||||
| df=transmission_df, | df=transmission_df, |