老师 报错有什么问题吗
来源:4-10 相关分析与实现

慕运维7479159
2018-03-07
import pandas as pd
import numpy as np
import scipy.stats as ss
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context(font_scale=1.5)
# df = pd.read_csv("./data/HR.csv")
# sns.heatmap(df.corr(),vmin=-1,vmax=1,cmap=sns.color_palette("RdBu",n_colors=128))
# plt.show()
s1 = pd.Series(["X1","X1","X2","X2","X2","X2"])
s2 = pd.Series(["Y1","Y1","Y1","Y2","Y2","Y2"])
def getEntropy(s):
#先判断一下是否是一个Series
if not isinstance(s,pd.core.series.Series):#如果不是的话就转换成
s = pd.Series(s)
#知道熵,就要了解其分布 除以数量,获取其概率分布
prt_ary = pd.groupby(s,by=s).count().values/float(len(s))
#以2为底求对数 * 概率 然后求和 就可以得到熵
return -(np.log2(prt_ary)*prt_ary).sum()
print("Entropy:",getEntropy(s1))
#0.5 0.5的分布 熵就是1
print("Entropy:",getEntropy(s2))
def getCondEntropy(s1,s2):
d=dict()
#求出分布
for i in list(range(len(s1))):
#准备了一个结构体 value 是一个数组 s1值之下 s2 的分布
d[s1[i]]=d.get(s1[i],[])+[s2[i]]
return sum([getEntropy(d[k]) * len(d[k])/float(len(s1)) for k in d])
print("CondEntropy:" + getCondEntropy(s1,s2))
Traceback (most recent call last):
File "C:/Users/5838/Untitled Folder/pycharm/cov.py", line 36, in <module>
print("CondEntropy:" + getCondEntropy(s1,s2))
TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('<U32') dtype('<U32') dtype('<U32')
1回答
-
这个写成print("CondEntropy:" + str(getCondEntropy(s1,s2))),加个str转换下
022018-03-10
相似问题