在Python中使用进程时为空列表

我正在尝试使用多处理库来加快从文件读取CSV的速度。我已经使用Pool完成了此操作,现在我正在尝试使用Process()实现它。但是,将列表串联以创建数据框时,会出现以下错误:

ValueError:没有要串联的对象

在我看来,进程正在覆盖uber_data列表。我在这里想念什么?

import glob
import pandas as pd
from multiprocessing import Process
import matplotlib.pyplot as plt
import os

location = "/home/data/csv/"

uber_data = []

def read_csv(filename):

    return uber_data.append(pd.read_csv(filename))

def data_wrangling(uber_data):
    uber_data['Date/Time'] = pd.to_datetime(uber_data['Date/Time'], format="%m/%d/%Y %H:%M:%S")
    uber_data['Dia Setmana'] = uber_data['Date/Time'].dt.weekday_name
    uber_data['Num dia'] = uber_data['Date/Time'].dt.dayofweek

    return uber_data

def plotting(uber_data):

    weekdays = uber_data.pivot_table(index=['Num dia','Dia Setmana'], values='Base', aggfunc='count')
    weekdays.plot(kind='bar', figsize=(8,6))
    plt.ylabel('Total Journeys')
    plt.title('Journey on Week Day')

def main():

    processes = []
    files = list(glob.glob(os.path.join(location,'*.csv*')))

    for file in files:
        print(file)
        p = Process(target=read_csv, args=[file])
        processes.append(p)
        p.start()

    for i, process in enumerate(processes):
        process.join()

    print(uber_data)

    combined_df = pd.concat(uber_data, ignore_index=True)
    dades_mod = data_wrangling(combined_df)
    Plotting(dades_mod)

main()

追溯为:

Process Process-223:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<timed exec>", line 17, in read_csv
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
    sort=sort,
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
    objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-224:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<timed exec>", line 17, in read_csv
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
    sort=sort,
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
    objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-221:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<timed exec>", line 17, in read_csv
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
    sort=sort,
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
    objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-222:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Process Process-225:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<timed exec>", line 17, in read_csv
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
    sort=sort,
  File "<timed exec>", line 17, in read_csv
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
    objs = list(objs)
TypeError: 'NoneType' object is not iterable
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
    sort=sort,
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
    objs = list(objs)
TypeError: 'NoneType' object is not iterable
Process Process-220:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "<timed exec>", line 17, in read_csv
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 255, in concat
    sort=sort,
  File "/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py", line 301, in __init__
    objs = list(objs)
TypeError: 'NoneType' object is not iterable

[]

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<timed eval> in <module>

<timed exec> in main()

/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, sort, copy)
    253         verify_integrity=verify_integrity,
    254         copy=copy,
--> 255         sort=sort,
    256     )
    257 

/usr/local/lib/python3.6/dist-packages/pandas/core/reshape/concat.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy, sort)
    302 
    303         if len(objs) == 0:
--> 304             raise ValueError("No objects to concatenate")
    305 
    306         if keys is None:

ValueError: No objects to concatenate

谢谢

评论