我正在尝试使用多处理库来加快从文件读取CSV的速度。我已经使用Pool完成了此操作,现在我正在尝试使用Process()实现它。但是,将列表串联以创建数据框时,会出现以下错误:
ValueError:没有要串联的对象
在我看来,进程正在覆盖uber_data列表。我在这里想念什么?
import glob
import pandas as pd
from multiprocessing import Process
import matplotlib.pyplot as plt
import os
location = "/home/data/csv/"
uber_data = []
def read_csv(filename):
return uber_data.append(pd.read_csv(filename))
def data_wrangling(uber_data):
uber_data['Date/Time'] = pd.to_datetime(uber_data['Date/Time'], format="%m/%d/%Y %H:%M:%S")
uber_data['Dia Setmana'] = uber_data['Date/Time'].dt.weekday_name
uber_data['Num dia'] = uber_data['Date/Time'].dt.dayofweek
return uber_data
def plotting(uber_data):
weekdays = uber_data.pivot_table(index=['Num dia','Dia Setmana'], values='Base', aggfunc='count')
weekdays.plot(kind='bar', figsize=(8,6))
plt.ylabel('Total Journeys')
plt.title('Journey on Week Day')
def main():
processes = []
files = list(glob.glob(os.path.join(location,'*.csv*')))
for file in files:
print(file)
p = Process(target=read_csv, args=[file])
processes.append(p)
p.start()
for i, process in enumerate(processes):
process.join()
print(uber_data)
combined_df = pd.concat(uber_data, ignore_index=True)
dades_mod = data_wrangling(combined_df)
Plotting(dades_mod)
main()
追溯为:
Process Process-223:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-224:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-221:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-222:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
Process Process-225:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-220:
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "<timed exec>", line 17, in read_csv
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
sort=sort,
File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
objs = list(objs)
TypeError: 'NoneType' object is not iterable
[]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<timed eval> in <module>
<timed exec> in main()
/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, sort, copy)
253 verify_integrity=verify_integrity,
254 copy=copy,
--> 255 sort=sort,
256 )
257
/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy, sort)
302
303 if len(objs) == 0:
--> 304 raise ValueError("No objects to concatenate")
305
306 if keys is None:
ValueError: No objects to concatenate
谢谢