二分类网络的评价指标

Acc, F1, ROC, AUC

def Metrics(output, labels):
    '''
    output: <list>
    labels: <list>
    '''
    test_correct = 0
    test_total = 0
    predicts = []
    prob = []
    tp, tn, fp, fn = 0, 0, 0, 0

    for outputs in output:
        index, predicted = torch.max(outputs.data, 1)
        predicts.append([outputs.data, predicted, labels])
        test_total += labels.size(0)
        test_correct += (predicted == labels.data).sum()

        # calc F1-score
        
        f1_predicts = predicted.cpu().numpy().tolist()
        f1_label = labels.data.cpu().numpy().tolist()
        for i in range(len(f1_label)):
            if f1_predicts[i] == 1 and f1_label[i] == 1:
                tp += 1
            if f1_predicts[i] == 1 and f1_label[i] == 0:
                fp += 1
            if f1_predicts[i] == 0 and f1_label[i] == 1:
                fn += 1
            if f1_predicts[i] == 0 and f1_label[i] == 0:
                tn += 1

        # calc softmax probability
        outputs = outputs.data.cpu().numpy()
        probability = np.exp(outputs)/np.mat(np.sum(np.exp(outputs), axis=1)).T
        prob = probability[:,1].T.tolist()[0]  
        for p, label in zip(prob, labels.data.cpu().numpy().tolist()):
            prob.append([p, label])

    F1_Score = 2*tp/(2*tp+fp+fn)
    test_acc = test_correct.item()/test_total 

    return prob, test_acc, F1_Score


def drawROC(prob):
    '''
    prob: <list>: [probability of positive, true label]
    '''

    neg = 0
    for i in range(len(prob)):
        if prob[i][1] == 0:
            neg += 1
    pos = len(prob) - neg

    x = []
    y = []
    sample_sort = sorted(prob, key=lambda x:x[0], reverse=False)
    for i in range(len(sample_sort)):
        tp = 0
        fp = 0
        for j in range(i, len(sample_sort)):
            if sample_sort[j][1] == 1:  # true positive
                tp += 1
            if sample_sort[j][1] == 0:  # false positive
                fp += 1
        x.append(fp/neg)
        y.append(tp/pos)
    x.append(0)
    y.append(0)

    auc = 0
    for i in range(len(x)-1):
        auc += ((x[i]-x[i+1]) * (y[i]+y[i+1]))

    # ROC curve
    plt.clf()
    plt.title('Small_Intestine_Classifier_ROC')
    plt.plot(x, y, 'b', label='AUC = %.3f ACC = %.3f' % (auc*0.5))
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.legend(loc='lower right')
    plt.plot([0, 1], [0, 1], 'r--')
    plt.savefig("./temp/Small_Intestine_Classifier_ROC_AUC=%.3f_EfficientNet.jpg" % (auc*0.5))

    return auc*0.5

多分类网络的评价指标(以四分类为例)

Acc, macro_F1, micro_F1

def Metrics(output, labels):
    '''
    output: <list>
    labels: <list>
    '''
    test_correct = 0
    test_total = 0
    Confusion_Matrix = np.zeros((4, 4))

    # traverse output
    for outputs in output:
        index, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels.data).sum()

        # calc Confusion Matrix
        f1_predicts = predicted.cpu().numpy().tolist()
        f1_label = labels.data.cpu().numpy().tolist()
        for i in range(len(f1_label)):
            Confusion_Matrix[f1_predicts[i]][f1_label[i]] += 1

    # print("Confusion_Matrix: \n", Confusion_Matrix)    

    # calc F1-Score
    TP, FP, TN, FN, P, R = [], [], [], [], [], []
    for i in range(4):
        for j in range(i+1, 4):
            tp = Confusion_Matrix[i][i]
            tn = Confusion_Matrix[j][j]
            fp = Confusion_Matrix[j][i]
            fn = Confusion_Matrix[i][j]
        TP.append(tp)
        FP.append(fp)
        TN.append(tn)
        FN.append(fn)
        P.append(tp/(tp+fp))
        R.append(tp/(tp+fn))
    macro_P = np.mean(P)
    macro_R = np.mean(R)
    _TP = np.mean(TP)
    _FP = np.mean(FP)
    _TN = np.mean(TN)
    _FN = np.mean(FN)
    micro_P = _TP/(_TP+_FP)
    micro_R = _TP/(_TP+_FN)

    macro_F1 = (2*macro_P*macro_R)/(macro_P+macro_R)
    micro_F1 = (2*micro_P*micro_R)/(micro_P+micro_R)
    test_acc = test_correct.item()/test_total

    return test_acc, macro_F1, micro_F1

分割网络的评价指标

Pixel_Acc, Dice, Jac, IoU, Spe, Sen, Pre

def Metrics(output, mask):
	'''
	output: model outputs, size=[Batch_Size, Channels, Height, Width]
	mask:   Ground Truth
	'''
	tp, fp, tn, fn = 0, 0, 0, 0

	# output binarization
	output[output>=0.5] = 1
    output[output<0.5]  = 0
    output = output.cpu().detach().numpy().flatten().flatten()
    img_mask = img_mask.cpu().numpy().flatten()

    # nums for metrics
    test_pixel_correct = len(np.argwhere(output == img_mask))
    test_pixel_total   = output.size
    for i in range(output.size):
        if output[i] == 1:
            if img_mask[i]==1: tp += 1
            else: fp += 1
        else:
            if img_mask[i]==0: tn += 1
            else: fn += 1
    overlap = np.sum(output * img_mask)
 
    # Pixel Accuracy
    test_pixel_acc = test_pixel_correct/test_pixel_total
    # Dice 
    dice = np.clip(((2. * overlap) / (np.sum(img_mask) + np.sum(output) + 1)), 1e-4, 0.9999)
    # Jaccard Index
    jac  = overlap/(np.sum(img_mask) + np.sum(output)-overlap)
    # Intersection over Union
    IoU = dice/(2-dice)
    # specificity
    Spe = tn/(tn+fp)
    # sensitivity
    Sen = tp/(tp+fn)
    # Precision
    Pre = tp/(tp+fp)

    return test_pixel_acc, dice, jac, IoU, Spe, Sen, Pre