在 Python Pandas 中同时熔化多个列
•浏览 1
Simultaneously melt multiple columns in Python Pandas
想知道 pd.melt 是否支持熔化多个列。我有以下示例尝试将 value_vars 作为列表列表,但出现错误:
ValueError: Location based indexing can only have [labels (MUST BE IN THE INDEX), slices of labels (BOTH endpoints included! Can be slices of integers if the index is integers), listlike of labels, boolean] types
使用pandas 0.23.1.
df = pd.DataFrame({'City': ['Houston', 'Austin', 'Hoover'],
'State': ['Texas', 'Texas', 'Alabama'],
'Name':['Aria', 'Penelope', 'Niko'],
'Mango':[4, 10, 90],
'Orange': [10, 8, 14],
'Watermelon':[40, 99, 43],
'Gin':[16, 200, 34],
'Vodka':[20, 33, 18]},
columns=['City', 'State', 'Name', 'Mango', 'Orange', 'Watermelon', 'Gin', 'Vodka']) City State Fruit Pounds Drink Ounces
0 Houston Texas Mango 4 Gin 16.0
1 Austin Texas Mango 10 Gin 200.0
2 Hoover Alabama Mango 90 Gin 34.0
3 Houston Texas Orange 10 Vodka 20.0
4 Austin Texas Orange 8 Vodka 33.0
5 Hoover Alabama Orange 14 Vodka 18.0
6 Houston Texas Watermelon 40 nan NaN
7 Austin Texas Watermelon 99 nan NaN
8 Hoover Alabama Watermelon 43 nan NaNdf.melt(id_vars=['City', 'State'],
value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']],var_name=['Fruit', 'Drink'],
value_name=['Pounds', 'Ounces'])df1 = df.melt(id_vars=['City', 'State'],
value_vars=['Mango', 'Orange', 'Watermelon'],
var_name='Fruit', value_name='Pounds')
df2 = df.melt(id_vars=['City', 'State'],
value_vars=['Gin', 'Vodka'],
var_name='Drink', value_name='Ounces')
df1 = df1.set_index(['City', 'State', df1.groupby(['City', 'State']).cumcount()])
df2 = df2.set_index(['City', 'State', df2.groupby(['City', 'State']).cumcount()])
df3 = (pd.concat([df1, df2],axis=1)
.sort_index(level=2)
.reset_index(level=2, drop=True)
.reset_index())
print (df3)
City State Fruit Pounds Drink Ounces
0 Austin Texas Mango 10 Gin 200.0
1 Hoover Alabama Mango 90 Gin 34.0
2 Houston Texas Mango 4 Gin 16.0
3 Austin Texas Orange 8 Vodka 33.0
4 Hoover Alabama Orange 14 Vodka 18.0
5 Houston Texas Orange 10 Vodka 20.0
6 Austin Texas Watermelon 99 NaN NaN
7 Hoover Alabama Watermelon 43 NaN NaN
8 Houston Texas Watermelon 40 NaN NaN# pip install pyjanitor
import pandas as pd
import janitor as jn
fruits = ("Mango","Orange","Watermelon")
drinks = ("Gin","Vodka")
mapp = {key :"fruits" for key in fruits} | {key :"drinks" for key in drinks}
(df.rename(columns = lambda col: f"{col}_{fruits.index(col)}_pounds"
if col in fruits
else f"{col}_{drinks.index(col)}_ounces"
if col in drinks
else col)
.pivot_longer(index = slice('City', 'Name'),
names_to = ('generic', 'position', '.value'),
names_sep = '_')
.assign(temp = lambda df: df.generic.map(mapp))
.pivot_wider(index = [slice('City', 'Name'), 'position'],
names_from = 'temp')
.dropna(how='all', axis = 1)
.rename(columns = lambda col: col.replace("generic_","")
.replace("_drinks","")
.replace("_fruits",""))
.loc[:, ['City', 'State', 'fruits', 'pounds', 'drinks', 'ounces']]
)
City State fruits pounds drinks ounces
0 Austin Texas Mango 10.0 Gin 200.0
1 Austin Texas Orange 8.0 Vodka 33.0
2 Austin Texas Watermelon 99.0 NaN NaN
3 Hoover Alabama Mango 90.0 Gin 34.0
4 Hoover Alabama Orange 14.0 Vodka 18.0
5 Hoover Alabama Watermelon 43.0 NaN NaN
6 Houston Texas Mango 4.0 Gin 16.0
7 Houston Texas Orange 10.0 Vodka 20.0
8 Houston Texas Watermelon 40.0 NaN NaN
期望的输出:
df = pd.DataFrame({'City': ['Houston', 'Austin', 'Hoover'],
'State': ['Texas', 'Texas', 'Alabama'],
'Name':['Aria', 'Penelope', 'Niko'],
'Mango':[4, 10, 90],
'Orange': [10, 8, 14],
'Watermelon':[40, 99, 43],
'Gin':[16, 200, 34],
'Vodka':[20, 33, 18]},
columns=['City', 'State', 'Name', 'Mango', 'Orange', 'Watermelon', 'Gin', 'Vodka']) City State Fruit Pounds Drink Ounces
0 Houston Texas Mango 4 Gin 16.0
1 Austin Texas Mango 10 Gin 200.0
2 Hoover Alabama Mango 90 Gin 34.0
3 Houston Texas Orange 10 Vodka 20.0
4 Austin Texas Orange 8 Vodka 33.0
5 Hoover Alabama Orange 14 Vodka 18.0
6 Houston Texas Watermelon 40 nan NaN
7 Austin Texas Watermelon 99 nan NaN
8 Hoover Alabama Watermelon 43 nan NaNdf.melt(id_vars=['City', 'State'],
value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']],var_name=['Fruit', 'Drink'],
value_name=['Pounds', 'Ounces'])df1 = df.melt(id_vars=['City', 'State'],
value_vars=['Mango', 'Orange', 'Watermelon'],
var_name='Fruit', value_name='Pounds')
df2 = df.melt(id_vars=['City', 'State'],
value_vars=['Gin', 'Vodka'],
var_name='Drink', value_name='Ounces')
df1 = df1.set_index(['City', 'State', df1.groupby(['City', 'State']).cumcount()])
df2 = df2.set_index(['City', 'State', df2.groupby(['City', 'State']).cumcount()])
df3 = (pd.concat([df1, df2],axis=1)
.sort_index(level=2)
.reset_index(level=2, drop=True)
.reset_index())
print (df3)
City State Fruit Pounds Drink Ounces
0 Austin Texas Mango 10 Gin 200.0
1 Hoover Alabama Mango 90 Gin 34.0
2 Houston Texas Mango 4 Gin 16.0
3 Austin Texas Orange 8 Vodka 33.0
4 Hoover Alabama Orange 14 Vodka 18.0
5 Houston Texas Orange 10 Vodka 20.0
6 Austin Texas Watermelon 99 NaN NaN
7 Hoover Alabama Watermelon 43 NaN NaN
8 Houston Texas Watermelon 40 NaN NaN# pip install pyjanitor
import pandas as pd
import janitor as jn
fruits = ("Mango","Orange","Watermelon")
drinks = ("Gin","Vodka")
mapp = {key :"fruits" for key in fruits} | {key :"drinks" for key in drinks}
(df.rename(columns = lambda col: f"{col}_{fruits.index(col)}_pounds"
if col in fruits
else f"{col}_{drinks.index(col)}_ounces"
if col in drinks
else col)
.pivot_longer(index = slice('City', 'Name'),
names_to = ('generic', 'position', '.value'),
names_sep = '_')
.assign(temp = lambda df: df.generic.map(mapp))
.pivot_wider(index = [slice('City', 'Name'), 'position'],
names_from = 'temp')
.dropna(how='all', axis = 1)
.rename(columns = lambda col: col.replace("generic_","")
.replace("_drinks","")
.replace("_fruits",""))
.loc[:, ['City', 'State', 'fruits', 'pounds', 'drinks', 'ounces']]
)
City State fruits pounds drinks ounces
0 Austin Texas Mango 10.0 Gin 200.0
1 Austin Texas Orange 8.0 Vodka 33.0
2 Austin Texas Watermelon 99.0 NaN NaN
3 Hoover Alabama Mango 90.0 Gin 34.0
4 Hoover Alabama Orange 14.0 Vodka 18.0
5 Hoover Alabama Watermelon 43.0 NaN NaN
6 Houston Texas Mango 4.0 Gin 16.0
7 Houston Texas Orange 10.0 Vodka 20.0
8 Houston Texas Watermelon 40.0 NaN NaN
我试过了,我得到了上述错误:
df = pd.DataFrame({'City': ['Houston', 'Austin', 'Hoover'],
'State': ['Texas', 'Texas', 'Alabama'],
'Name':['Aria', 'Penelope', 'Niko'],
'Mango':[4, 10, 90],
'Orange': [10, 8, 14],
'Watermelon':[40, 99, 43],
'Gin':[16, 200, 34],
'Vodka':[20, 33, 18]},
columns=['City', 'State', 'Name', 'Mango', 'Orange', 'Watermelon', 'Gin', 'Vodka']) City State Fruit Pounds Drink Ounces
0 Houston Texas Mango 4 Gin 16.0
1 Austin Texas Mango 10 Gin 200.0
2 Hoover Alabama Mango 90 Gin 34.0
3 Houston Texas Orange 10 Vodka 20.0
4 Austin Texas Orange 8 Vodka 33.0
5 Hoover Alabama Orange 14 Vodka 18.0
6 Houston Texas Watermelon 40 nan NaN
7 Austin Texas Watermelon 99 nan NaN
8 Hoover Alabama Watermelon 43 nan NaNdf.melt(id_vars=['City', 'State'],
value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']],var_name=['Fruit', 'Drink'],
value_name=['Pounds', 'Ounces'])df1 = df.melt(id_vars=['City', 'State'],
value_vars=['Mango', 'Orange', 'Watermelon'],
var_name='Fruit', value_name='Pounds')
df2 = df.melt(id_vars=['City', 'State'],
value_vars=['Gin', 'Vodka'],
var_name='Drink', value_name='Ounces')
df1 = df1.set_index(['City', 'State', df1.groupby(['City', 'State']).cumcount()])
df2 = df2.set_index(['City', 'State', df2.groupby(['City', 'State']).cumcount()])
df3 = (pd.concat([df1, df2],axis=1)
.sort_index(level=2)
.reset_index(level=2, drop=True)
.reset_index())
print (df3)
City State Fruit Pounds Drink Ounces
0 Austin Texas Mango 10 Gin 200.0
1 Hoover Alabama Mango 90 Gin 34.0
2 Houston Texas Mango 4 Gin 16.0
3 Austin Texas Orange 8 Vodka 33.0
4 Hoover Alabama Orange 14 Vodka 18.0
5 Houston Texas Orange 10 Vodka 20.0
6 Austin Texas Watermelon 99 NaN NaN
7 Hoover Alabama Watermelon 43 NaN NaN
8 Houston Texas Watermelon 40 NaN NaN# pip install pyjanitor
import pandas as pd
import janitor as jn
fruits = ("Mango","Orange","Watermelon")
drinks = ("Gin","Vodka")
mapp = {key :"fruits" for key in fruits} | {key :"drinks" for key in drinks}
(df.rename(columns = lambda col: f"{col}_{fruits.index(col)}_pounds"
if col in fruits
else f"{col}_{drinks.index(col)}_ounces"
if col in drinks
else col)
.pivot_longer(index = slice('City', 'Name'),
names_to = ('generic', 'position', '.value'),
names_sep = '_')
.assign(temp = lambda df: df.generic.map(mapp))
.pivot_wider(index = [slice('City', 'Name'), 'position'],
names_from = 'temp')
.dropna(how='all', axis = 1)
.rename(columns = lambda col: col.replace("generic_","")
.replace("_drinks","")
.replace("_fruits",""))
.loc[:, ['City', 'State', 'fruits', 'pounds', 'drinks', 'ounces']]
)
City State fruits pounds drinks ounces
0 Austin Texas Mango 10.0 Gin 200.0
1 Austin Texas Orange 8.0 Vodka 33.0
2 Austin Texas Watermelon 99.0 NaN NaN
3 Hoover Alabama Mango 90.0 Gin 34.0
4 Hoover Alabama Orange 14.0 Vodka 18.0
5 Hoover Alabama Watermelon 43.0 NaN NaN
6 Houston Texas Mango 4.0 Gin 16.0
7 Houston Texas Orange 10.0 Vodka 20.0
8 Houston Texas Watermelon 40.0 NaN NaN
对每个类别使用双 melt,然后使用 concat,但是因为重复的值会为 MultiIndex 中的唯一 triples 添加 cumcount:
df = pd.DataFrame({'City': ['Houston', 'Austin', 'Hoover'],
'State': ['Texas', 'Texas', 'Alabama'],
'Name':['Aria', 'Penelope', 'Niko'],
'Mango':[4, 10, 90],
'Orange': [10, 8, 14],
'Watermelon':[40, 99, 43],
'Gin':[16, 200, 34],
'Vodka':[20, 33, 18]},
columns=['City', 'State', 'Name', 'Mango', 'Orange', 'Watermelon', 'Gin', 'Vodka']) City State Fruit Pounds Drink Ounces
0 Houston Texas Mango 4 Gin 16.0
1 Austin Texas Mango 10 Gin 200.0
2 Hoover Alabama Mango 90 Gin 34.0
3 Houston Texas Orange 10 Vodka 20.0
4 Austin Texas Orange 8 Vodka 33.0
5 Hoover Alabama Orange 14 Vodka 18.0
6 Houston Texas Watermelon 40 nan NaN
7 Austin Texas Watermelon 99 nan NaN
8 Hoover Alabama Watermelon 43 nan NaNdf.melt(id_vars=['City', 'State'],
value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']],var_name=['Fruit', 'Drink'],
value_name=['Pounds', 'Ounces'])df1 = df.melt(id_vars=['City', 'State'],
value_vars=['Mango', 'Orange', 'Watermelon'],
var_name='Fruit', value_name='Pounds')
df2 = df.melt(id_vars=['City', 'State'],
value_vars=['Gin', 'Vodka'],
var_name='Drink', value_name='Ounces')
df1 = df1.set_index(['City', 'State', df1.groupby(['City', 'State']).cumcount()])
df2 = df2.set_index(['City', 'State', df2.groupby(['City', 'State']).cumcount()])
df3 = (pd.concat([df1, df2],axis=1)
.sort_index(level=2)
.reset_index(level=2, drop=True)
.reset_index())
print (df3)
City State Fruit Pounds Drink Ounces
0 Austin Texas Mango 10 Gin 200.0
1 Hoover Alabama Mango 90 Gin 34.0
2 Houston Texas Mango 4 Gin 16.0
3 Austin Texas Orange 8 Vodka 33.0
4 Hoover Alabama Orange 14 Vodka 18.0
5 Houston Texas Orange 10 Vodka 20.0
6 Austin Texas Watermelon 99 NaN NaN
7 Hoover Alabama Watermelon 43 NaN NaN
8 Houston Texas Watermelon 40 NaN NaN# pip install pyjanitor
import pandas as pd
import janitor as jn
fruits = ("Mango","Orange","Watermelon")
drinks = ("Gin","Vodka")
mapp = {key :"fruits" for key in fruits} | {key :"drinks" for key in drinks}
(df.rename(columns = lambda col: f"{col}_{fruits.index(col)}_pounds"
if col in fruits
else f"{col}_{drinks.index(col)}_ounces"
if col in drinks
else col)
.pivot_longer(index = slice('City', 'Name'),
names_to = ('generic', 'position', '.value'),
names_sep = '_')
.assign(temp = lambda df: df.generic.map(mapp))
.pivot_wider(index = [slice('City', 'Name'), 'position'],
names_from = 'temp')
.dropna(how='all', axis = 1)
.rename(columns = lambda col: col.replace("generic_","")
.replace("_drinks","")
.replace("_fruits",""))
.loc[:, ['City', 'State', 'fruits', 'pounds', 'drinks', 'ounces']]
)
City State fruits pounds drinks ounces
0 Austin Texas Mango 10.0 Gin 200.0
1 Austin Texas Orange 8.0 Vodka 33.0
2 Austin Texas Watermelon 99.0 NaN NaN
3 Hoover Alabama Mango 90.0 Gin 34.0
4 Hoover Alabama Orange 14.0 Vodka 18.0
5 Hoover Alabama Watermelon 43.0 NaN NaN
6 Houston Texas Mango 4.0 Gin 16.0
7 Houston Texas Orange 10.0 Vodka 20.0
8 Houston Texas Watermelon 40.0 NaN NaN
一个已经很好回答的老问题;这只是一种替代方法,它依赖于 pyjanitor 的辅助函数,特别是 pivot_longer 和 pivot_wider,来帮助重塑过程:
df = pd.DataFrame({'City': ['Houston', 'Austin', 'Hoover'],
'State': ['Texas', 'Texas', 'Alabama'],
'Name':['Aria', 'Penelope', 'Niko'],
'Mango':[4, 10, 90],
'Orange': [10, 8, 14],
'Watermelon':[40, 99, 43],
'Gin':[16, 200, 34],
'Vodka':[20, 33, 18]},
columns=['City', 'State', 'Name', 'Mango', 'Orange', 'Watermelon', 'Gin', 'Vodka']) City State Fruit Pounds Drink Ounces
0 Houston Texas Mango 4 Gin 16.0
1 Austin Texas Mango 10 Gin 200.0
2 Hoover Alabama Mango 90 Gin 34.0
3 Houston Texas Orange 10 Vodka 20.0
4 Austin Texas Orange 8 Vodka 33.0
5 Hoover Alabama Orange 14 Vodka 18.0
6 Houston Texas Watermelon 40 nan NaN
7 Austin Texas Watermelon 99 nan NaN
8 Hoover Alabama Watermelon 43 nan NaNdf.melt(id_vars=['City', 'State'],
value_vars=[['Mango', 'Orange', 'Watermelon'], ['Gin', 'Vodka']],var_name=['Fruit', 'Drink'],
value_name=['Pounds', 'Ounces'])df1 = df.melt(id_vars=['City', 'State'],
value_vars=['Mango', 'Orange', 'Watermelon'],
var_name='Fruit', value_name='Pounds')
df2 = df.melt(id_vars=['City', 'State'],
value_vars=['Gin', 'Vodka'],
var_name='Drink', value_name='Ounces')
df1 = df1.set_index(['City', 'State', df1.groupby(['City', 'State']).cumcount()])
df2 = df2.set_index(['City', 'State', df2.groupby(['City', 'State']).cumcount()])
df3 = (pd.concat([df1, df2],axis=1)
.sort_index(level=2)
.reset_index(level=2, drop=True)
.reset_index())
print (df3)
City State Fruit Pounds Drink Ounces
0 Austin Texas Mango 10 Gin 200.0
1 Hoover Alabama Mango 90 Gin 34.0
2 Houston Texas Mango 4 Gin 16.0
3 Austin Texas Orange 8 Vodka 33.0
4 Hoover Alabama Orange 14 Vodka 18.0
5 Houston Texas Orange 10 Vodka 20.0
6 Austin Texas Watermelon 99 NaN NaN
7 Hoover Alabama Watermelon 43 NaN NaN
8 Houston Texas Watermelon 40 NaN NaN# pip install pyjanitor
import pandas as pd
import janitor as jn
fruits = ("Mango","Orange","Watermelon")
drinks = ("Gin","Vodka")
mapp = {key :"fruits" for key in fruits} | {key :"drinks" for key in drinks}
(df.rename(columns = lambda col: f"{col}_{fruits.index(col)}_pounds"
if col in fruits
else f"{col}_{drinks.index(col)}_ounces"
if col in drinks
else col)
.pivot_longer(index = slice('City', 'Name'),
names_to = ('generic', 'position', '.value'),
names_sep = '_')
.assign(temp = lambda df: df.generic.map(mapp))
.pivot_wider(index = [slice('City', 'Name'), 'position'],
names_from = 'temp')
.dropna(how='all', axis = 1)
.rename(columns = lambda col: col.replace("generic_","")
.replace("_drinks","")
.replace("_fruits",""))
.loc[:, ['City', 'State', 'fruits', 'pounds', 'drinks', 'ounces']]
)
City State fruits pounds drinks ounces
0 Austin Texas Mango 10.0 Gin 200.0
1 Austin Texas Orange 8.0 Vodka 33.0
2 Austin Texas Watermelon 99.0 NaN NaN
3 Hoover Alabama Mango 90.0 Gin 34.0
4 Hoover Alabama Orange 14.0 Vodka 18.0
5 Hoover Alabama Watermelon 43.0 NaN NaN
6 Houston Texas Mango 4.0 Gin 16.0
7 Houston Texas Orange 10.0 Vodka 20.0
8 Houston Texas Watermelon 40.0 NaN NaN