二分类网络的评价指标

  • Acc, F1, ROC, AUC
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def Metrics(output, labels):
'''
output: <list>
labels: <list>
'''
test_correct = 0
test_total = 0
predicts = []
prob = []
tp, tn, fp, fn = 0, 0, 0, 0

for outputs in output:
index, predicted = torch.max(outputs.data, 1)
predicts.append([outputs.data, predicted, labels])
test_total += labels.size(0)
test_correct += (predicted == labels.data).sum()

# calc F1-score

f1_predicts = predicted.cpu().numpy().tolist()
f1_label = labels.data.cpu().numpy().tolist()
for i in range(len(f1_label)):
if f1_predicts[i] == 1 and f1_label[i] == 1:
tp += 1
if f1_predicts[i] == 1 and f1_label[i] == 0:
fp += 1
if f1_predicts[i] == 0 and f1_label[i] == 1:
fn += 1
if f1_predicts[i] == 0 and f1_label[i] == 0:
tn += 1

# calc softmax probability
outputs = outputs.data.cpu().numpy()
probability = np.exp(outputs)/np.mat(np.sum(np.exp(outputs), axis=1)).T
prob = probability[:,1].T.tolist()[0]
for p, label in zip(prob, labels.data.cpu().numpy().tolist()):
prob.append([p, label])

F1_Score = 2*tp/(2*tp+fp+fn)
test_acc = test_correct.item()/test_total

return prob, test_acc, F1_Score


def drawROC(prob):
'''
prob: <list>: [probability of positive, true label]
'''

neg = 0
for i in range(len(prob)):
if prob[i][1] == 0:
neg += 1
pos = len(prob) - neg

x = []
y = []
sample_sort = sorted(prob, key=lambda x:x[0], reverse=False)
for i in range(len(sample_sort)):
tp = 0
fp = 0
for j in range(i, len(sample_sort)):
if sample_sort[j][1] == 1: # true positive
tp += 1
if sample_sort[j][1] == 0: # false positive
fp += 1
x.append(fp/neg)
y.append(tp/pos)
x.append(0)
y.append(0)

auc = 0
for i in range(len(x)-1):
auc += ((x[i]-x[i+1]) * (y[i]+y[i+1]))

# ROC curve
plt.clf()
plt.title('Small_Intestine_Classifier_ROC')
plt.plot(x, y, 'b', label='AUC = %.3f ACC = %.3f' % (auc*0.5))
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.savefig("./temp/Small_Intestine_Classifier_ROC_AUC=%.3f_EfficientNet.jpg" % (auc*0.5))

return auc*0.5

多分类网络的评价指标(以四分类为例)

  • Acc, macro_F1, micro_F1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def Metrics(output, labels):
'''
output: <list>
labels: <list>
'''
test_correct = 0
test_total = 0
Confusion_Matrix = np.zeros((4, 4))

# traverse output
for outputs in output:
index, predicted = torch.max(outputs.data, 1)
test_total += labels.size(0)
test_correct += (predicted == labels.data).sum()

# calc Confusion Matrix
f1_predicts = predicted.cpu().numpy().tolist()
f1_label = labels.data.cpu().numpy().tolist()
for i in range(len(f1_label)):
Confusion_Matrix[f1_predicts[i]][f1_label[i]] += 1

# print("Confusion_Matrix: \n", Confusion_Matrix)

# calc F1-Score
TP, FP, TN, FN, P, R = [], [], [], [], [], []
for i in range(4):
for j in range(i+1, 4):
tp = Confusion_Matrix[i][i]
tn = Confusion_Matrix[j][j]
fp = Confusion_Matrix[j][i]
fn = Confusion_Matrix[i][j]
TP.append(tp)
FP.append(fp)
TN.append(tn)
FN.append(fn)
P.append(tp/(tp+fp))
R.append(tp/(tp+fn))
macro_P = np.mean(P)
macro_R = np.mean(R)
_TP = np.mean(TP)
_FP = np.mean(FP)
_TN = np.mean(TN)
_FN = np.mean(FN)
micro_P = _TP/(_TP+_FP)
micro_R = _TP/(_TP+_FN)

macro_F1 = (2*macro_P*macro_R)/(macro_P+macro_R)
micro_F1 = (2*micro_P*micro_R)/(micro_P+micro_R)
test_acc = test_correct.item()/test_total

return test_acc, macro_F1, micro_F1

分割网络的评价指标

  • Pixel_Acc, Dice, Jac, IoU, Spe, Sen, Pre
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def Metrics(output, mask):
'''
output: model outputs, size=[Batch_Size, Channels, Height, Width]
mask: Ground Truth
'''
tp, fp, tn, fn = 0, 0, 0, 0

# output binarization
output[output>=0.5] = 1
output[output<0.5] = 0
output = output.cpu().detach().numpy().flatten().flatten()
img_mask = img_mask.cpu().numpy().flatten()

# nums for metrics
test_pixel_correct = len(np.argwhere(output == img_mask))
test_pixel_total = output.size
for i in range(output.size):
if output[i] == 1:
if img_mask[i]==1: tp += 1
else: fp += 1
else:
if img_mask[i]==0: tn += 1
else: fn += 1
overlap = np.sum(output * img_mask)

# Pixel Accuracy
test_pixel_acc = test_pixel_correct/test_pixel_total
# Dice
dice = np.clip(((2. * overlap) / (np.sum(img_mask) + np.sum(output) + 1)), 1e-4, 0.9999)
# Jaccard Index
jac = overlap/(np.sum(img_mask) + np.sum(output)-overlap)
# Intersection over Union
IoU = dice/(2-dice)
# specificity
Spe = tn/(tn+fp)
# sensitivity
Sen = tp/(tp+fn)
# Precision
Pre = tp/(tp+fp)

return test_pixel_acc, dice, jac, IoU, Spe, Sen, Pre