使用Pandas 1.0.1,鉴于此演示数据框,我想将数字列的权重和价格替换为在子组“框”上计算的中值(不修改原始数据框):
import pandas as pd
import numpy as np
df = pd.DataFrame({'box': ['a100','a100','a100','a200','a200','a300','a300','a300','a300'], 'id_in_box': ['2x', '1x', '3x', '2x', '1x', '3x', '1x', '2x', '4x'], 'weight': [2, 1, 2, 3, 4, 2, 8, 6, 4.5], 'price': [1.5, 3.2, 2.1, 3.3, 1.5, 3.2, 2.1, 3.3, 4.4]})
df
box id_in_box weight price
0 a100 2x 2.0 1.5
1 a100 1x 1.0 3.2
2 a100 3x 2.0 2.1
3 a200 2x 3.0 3.3
4 a200 1x 4.0 1.5
5 a300 3x 2.0 3.2
6 a300 1x 8.0 2.1
7 a300 2x 6.0 3.3
8 a300 4x 4.5 4.4
实际:
df.groupby('box')[['weight', 'price']].transform(lambda x: np.median(x))
weight price
0 2.00 2.10
1 2.00 2.10
2 2.00 2.10
3 3.50 2.40
4 3.50 2.40
5 5.25 3.25
6 5.25 3.25
7 5.25 3.25
8 5.25 3.25
预期:
box id_in_box weight price
0 a100 2x 2.00 2.10
1 a100 1x 2.00 2.10
2 a100 3x 2.00 2.10
3 a200 2x 3.50 2.40
4 a200 1x 3.50 2.40
5 a300 3x 5.25 3.25
6 a300 1x 5.25 3.25
7 a300 2x 5.25 3.25
8 a300 4x 5.25 3.25
如何以最有效的方式实现这一目标?