#y.append((1 + float(sum(result_list[:i])) / (i+1)) / 2)y.append( float(sum(result_list[:i])) / (i+ 1))print( '最近',j, '次准确率',y[- 1])print(x, y)line, = plt.plot(x, y)plt.show 549549最近 549 次准确率 0.5300546448087432range(0, 549) [0.0, 0.0, 0.3333333333333333, 0.25等 #评估准确率 # evaluating accuracy accuracy = accuracy_score(y_true=y_te, y_pred=pred_X)print( 'gcForest accuracy : {}'.format(accuracy)) gcForest accuracy : 0.5300546448087432 预测结果很一般,不过还是有效的。 预测涨跌看起不是那么靠谱,但识别手写数字还是相当牛逼的。 下面只贴出结果: # loading the data digits = load_digits()X = digits.datay = digits.targetX_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size= 0.4)gcf = gcForest(shape_1X=[ 7, 8], window=[ 4, 6], tolerance= 0.0, min_samples_mgs= 10, min_samples_cascade= 7) #gcf = gcForest(shape_1X=13, window=13, tolerance=0.0, min_samples_mgs=10, min_samples_cascade=7) gcf.fit(X_tr, y_tr) Slicing Images...Training MGS Random Forests...Slicing Images...Training MGS Random Forests...Adding/Training Layer, n_layer=1Layer validation accuracy = 0.9814814814814815Adding/Training Layer, n_layer=2Layer validation accuracy = 0.9814814814814815# evaluating accuracy accuracy = accuracy_score(y_true=y_te, y_pred=pred_X)print( 'gcForest accuracy : {}'.format(accuracy)) gcForest accuracy : 0.980528511821975 厉害了,简单的参数都能使手写数字识别的准确率高达98% 单独利用多粒度扫描和级联森林 由于多粒度扫描和级联森林模块是相当独立的,因此可以单独使用它们。 如果给定目标“y”,代码将自动使用它进行训练,否则它会调用最后训练的随机森林来分割数据。 gcf = gcForest(shape_1X=[ 8, 8], window= 5, min_samples_mgs= 10, min_samples_cascade= 7)X_tr_mgs = gcf.mg_scanning(X_tr, y_tr) Slicing Images...Training MGS Random Forests... It is now possible to use the mg_scanning output as input for cascade forests using different parameters. Note that the cascade forest module does not directly return predictions but probability predictions from each Random Forest in the last layer of the cascade. Hence the need to first take the mean of the output and then find the max. gcf = gcForest(tolerance= 0.0, min_samples_mgs= 10, min_samples_cascade= 7)_ = gcf.cascade_forest(X_tr_mgs, y_tr) Adding/Training Layer, n_layer=1Layer validation accuracy = 0.9722222222222222Adding/Training Layer, n_layer=2Layer validation accuracy = 0.9907407407407407Adding/Training Layer, n_layer=3Layer validation accuracy = 0.9814814814814815 importnumpy asnppred_proba = gcf.cascade_forest(X_te_mgs)tmp = np.mean(pred_proba, axis= 0)preds = np.argmax(tmp, axis= 1)accuracy_score(y_true=y_te, y_pred=preds)gcf = gcForest(tolerance= 0.0, min_samples_mgs= 20, min_samples_cascade= 10)_ = gcf.cascade_forest(X_tr_mgs, y_tr)pred_proba = gcf.cascade_forest(X_te_mgs)tmp = np.mean(pred_proba, axis= 0)preds = np.argmax(tmp, axis= 1)accuracy_score(y_true=y_te, y_pred=preds) 0.97774687065368571Adding/Training Layer, n_layer=1Layer validation accuracy = 0.9629629629629629Adding/Training Layer, n_layer=2Layer validation accuracy = 0.9675925925925926Adding/Training Layer, n_layer=3Layer validation accuracy = 0.9722222222222222Adding/Training Layer, n_layer=4Layer validation accuracy = 0.97222222222222220.97218358831710705 Skipping mg_scanning It is also possible to directly use the cascade forest and skip the multi grain scanning step. gcf = gcForest(tolerance= 0.0, min_samples_cascade= 20)_ = gcf.cascade_forest(X_tr, y_tr) (责任编辑:本港台直播) |