极限学习机(ELM)的原理、实现和应用。ELM是一种非常高效且易用的机器学习算法,特别适合快速建模和原型开发。
极限学习机(ELM)概述
极限学习机是一种单隐层前馈神经网络,其核心特点是隐层参数随机生成且不需要调整,只需要学习输出权重。
| 特性 | 优势 | 应用场景 |
|---|---|---|
| 极快的学习速度 | 比传统神经网络快10-1000倍 | 实时系统、大规模数据 |
| 良好的泛化能力 | 不易过拟合 | 小样本学习 |
| 简单易实现 | 无需复杂调参 | 快速原型开发 |
| 通用逼近能力 | 可逼近任何连续函数 | 复杂非线性问题 |
ELM核心实现
1. 基础ELM回归模型
function [output_weight, train_accuracy, model] = elm_train_regression(X, Y, hidden_neurons, activation_func)
% ELM回归训练
% 输入:
% X - 训练数据 (样本数×特征数)
% Y - 目标值 (样本数×输出维度)
% hidden_neurons - 隐层神经元数量
% activation_func - 激活函数类型
% 输出:
% output_weight - 输出层权重
% train_accuracy - 训练精度
% model - 训练好的模型[n_samples, n_features] = size(X);[~, n_outputs] = size(Y);% 1. 随机生成输入权重和偏置rng(42); % 设置随机种子以保证可重复性input_weights = randn(n_features, hidden_neurons);bias = randn(1, hidden_neurons);% 2. 计算隐层输出H = calculate_hidden_output(X, input_weights, bias, activation_func);% 3. 计算输出权重 (Moore-Penrose广义逆)output_weight = pinv(H) * Y;% 4. 训练集预测和评估Y_pred = H * output_weight;train_accuracy = calculate_regression_accuracy(Y, Y_pred);% 保存模型model.input_weights = input_weights;model.bias = bias;model.output_weight = output_weight;model.activation_func = activation_func;model.hidden_neurons = hidden_neurons;fprintf('ELM训练完成!\n');fprintf(' 隐层神经元: %d, 训练RMSE: %.6f, R²: %.4f\n', ...hidden_neurons, train_accuracy.rmse, train_accuracy.r2);
endfunction H = calculate_hidden_output(X, input_weights, bias, activation_func)
% 计算隐层输出H = X * input_weights + repmat(bias, size(X,1), 1);switch activation_funccase 'sigmoid'H = 1 ./ (1 + exp(-H));case 'tanh'H = tanh(H);case 'relu'H = max(0, H);case 'sin'H = sin(H);case 'rbf'H = exp(-H.^2);otherwiseH = 1 ./ (1 + exp(-H)); % 默认sigmoidend
endfunction accuracy = calculate_regression_accuracy(Y_true, Y_pred)
% 计算回归精度指标mse = mean((Y_true - Y_pred).^2);rmse = sqrt(mse);ss_res = sum((Y_true - Y_pred).^2);ss_tot = sum((Y_true - mean(Y_true)).^2);r2 = 1 - (ss_res / ss_tot);mae = mean(abs(Y_true - Y_pred));accuracy = struct('mse', mse, 'rmse', rmse, 'r2', r2, 'mae', mae);
end
2. ELM分类模型
function [output_weight, train_accuracy, model] = elm_train_classification(X, Y, hidden_neurons, activation_func)
% ELM分类训练
% 输入:
% X - 训练数据
% Y - 类别标签 (样本数×1),类别从1开始
% hidden_neurons - 隐层神经元数量
% activation_func - 激活函数
% 输出:
% output_weight - 输出层权重
% train_accuracy - 训练精度
% model - 训练好的模型[n_samples, n_features] = size(X);n_classes = length(unique(Y));% 将标签转换为one-hot编码Y_onehot = zeros(n_samples, n_classes);for i = 1:n_samplesY_onehot(i, Y(i)) = 1;end% 训练回归ELM[output_weight, ~, model] = elm_train_regression(X, Y_onehot, hidden_neurons, activation_func);% 训练集预测Y_pred_onehot = elm_predict(model, X);[~, Y_pred] = max(Y_pred_onehot, [], 2);% 计算分类准确率train_accuracy = sum(Y_pred == Y) / n_samples;model.n_classes = n_classes;fprintf('ELM分类训练完成!\n');fprintf(' 类别数: %d, 训练准确率: %.4f\n', n_classes, train_accuracy);
end
3. 预测函数
function Y_pred = elm_predict(model, X)
% ELM预测
% 输入:
% model - 训练好的ELM模型
% X - 测试数据
% 输出:
% Y_pred - 预测结果% 计算隐层输出H = calculate_hidden_output(X, model.input_weights, model.bias, model.activation_func);% 计算输出Y_pred = H * model.output_weight;% 如果是分类问题,返回原始输出(可用于概率估计)% 需要类别预测时,使用 max(Y_pred, [], 2) 获取预测类别
endfunction Y_pred_class = elm_predict_class(model, X)
% ELM分类预测(返回类别)Y_pred = elm_predict(model, X);[~, Y_pred_class] = max(Y_pred, [], 2);
end
增强版ELM实现
1. 正则化ELM (防止过拟合)
function [output_weight, model] = elm_train_regularized(X, Y, hidden_neurons, activation_func, C)
% 正则化ELM训练
% 输入:
% C - 正则化参数
% 使用公式: β = (HᵀH + I/C)⁻¹HᵀY[n_samples, n_features] = size(X);% 随机生成输入权重和偏置input_weights = randn(n_features, hidden_neurons);bias = randn(1, hidden_neurons);% 计算隐层输出H = calculate_hidden_output(X, input_weights, bias, activation_func);% 正则化求解输出权重if n_samples >= hidden_neuronsoutput_weight = (H' * H + eye(hidden_neurons) / C) \ (H' * Y);elseoutput_weight = H' * ((H * H' + eye(n_samples) / C) \ Y);end% 保存模型model.input_weights = input_weights;model.bias = bias;model.output_weight = output_weight;model.activation_func = activation_func;model.hidden_neurons = hidden_neurons;model.C = C;
end
2. 增量ELM (在线学习)
function model = elm_incremental_learning(model, X_new, Y_new)
% 增量ELM学习
% 输入:
% model - 现有ELM模型
% X_new, Y_new - 新数据% 计算新数据的隐层输出H_new = calculate_hidden_output(X_new, model.input_weights, model.bias, model.activation_func);% 更新输出权重(使用递归最小二乘)% 这里使用简单的伪逆更新(实际应用中可用更高效的更新方法)H_old = calculate_hidden_output(model.last_X, model.input_weights, model.bias, model.activation_func);H_combined = [H_old; H_new];Y_combined = [model.last_Y; Y_new];model.output_weight = pinv(H_combined) * Y_combined;model.last_X = X_new;model.last_Y = Y_new;fprintf('增量学习完成,更新了 %d 个样本\n', size(X_new, 1));
end
应用
示例1:函数拟合
% 生成非线性函数数据
x = linspace(-10, 10, 1000)';
y = sin(x) + 0.5 * cos(2*x) + 0.3 * sin(3*x) + 0.1 * randn(size(x));% 划分训练测试集
train_ratio = 0.7;
n_train = floor(train_ratio * length(x));
X_train = x(1:n_train);
Y_train = y(1:n_train);
X_test = x(n_train+1:end);
Y_test = y(n_train+1:end);% ELM参数
hidden_neurons = 50;
activation_func = 'sigmoid';% 训练ELM
[output_weight, train_accuracy, model] = elm_train_regression(...X_train, Y_train, hidden_neurons, activation_func);% 测试预测
Y_pred = elm_predict(model, X_test);
test_accuracy = calculate_regression_accuracy(Y_test, Y_pred);% 可视化结果
figure;
subplot(2,1,1);
plot(X_train, Y_train, 'b.', 'MarkerSize', 8); hold on;
plot(X_test, Y_pred, 'r-', 'LineWidth', 2);
legend('训练数据', 'ELM预测', 'Location', 'best');
title('ELM函数拟合');
xlabel('x'); ylabel('y');
grid on;subplot(2,1,2);
plot(X_test, Y_test - Y_pred, 'g-', 'LineWidth', 1);
title('预测误差');
xlabel('x'); ylabel('误差');
grid on;fprintf('测试集性能:\n');
fprintf(' RMSE: %.6f, R²: %.4f, MAE: %.6f\n', ...test_accuracy.rmse, test_accuracy.r2, test_accuracy.mae);
示例2:分类问题
% 使用MATLAB内置鸢尾花数据集
load fisheriris;
X = meas;
Y = grp2idx(species);% 数据标准化
X = zscore(X);% 划分训练测试集
rng(42); % 可重复性
cv = cvpartition(Y, 'HoldOut', 0.3);
X_train = X(training(cv), :);
Y_train = Y(training(cv));
X_test = X(test(cv), :);
Y_test = Y(test(cv));% ELM分类
hidden_neurons = 30;
activation_func = 'sigmoid';[output_weight, train_accuracy, model] = elm_train_classification(...X_train, Y_train, hidden_neurons, activation_func);% 测试预测
Y_pred = elm_predict_class(model, X_test);
test_accuracy = sum(Y_pred == Y_test) / length(Y_test);% 混淆矩阵
C = confusionmat(Y_test, Y_pred);
figure;
confusionchart(C, categories(species));
title(sprintf('ELM分类混淆矩阵 (准确率: %.2f%%)', test_accuracy*100));fprintf('分类性能:\n');
fprintf(' 训练准确率: %.4f\n', train_accuracy);
fprintf(' 测试准确率: %.4f\n', test_accuracy);
示例3:参数敏感性分析
function analyze_elm_parameters(X, Y, problem_type)
% 分析ELM参数对性能的影响if strcmp(problem_type, 'regression')train_func = @elm_train_regression;elsetrain_func = @elm_train_classification;end% 测试不同的隐层神经元数量hidden_neurons_list = [10, 20, 50, 100, 200, 500];activation_funcs = {'sigmoid', 'tanh', 'relu', 'sin'};results = struct();figure;colors = lines(length(activation_funcs));for a = 1:length(activation_funcs)activation = activation_funcs{a};performance = zeros(size(hidden_neurons_list));for h = 1:length(hidden_neurons_list)hidden_neurons = hidden_neurons_list(h);try[~, accuracy, ~] = train_func(X, Y, hidden_neurons, activation);if strcmp(problem_type, 'regression')performance(h) = accuracy.r2; % 使用R²作为性能指标elseperformance(h) = accuracy; % 分类准确率endcatchperformance(h) = 0;endend% 绘制性能曲线plot(hidden_neurons_list, performance, 'o-', ...'Color', colors(a,:), 'LineWidth', 2, 'MarkerSize', 6);hold on;results.(activation) = performance;endxlabel('隐层神经元数量');if strcmp(problem_type, 'regression')ylabel('R²');title('ELM回归性能 vs 神经元数量');elseylabel('准确率');title('ELM分类性能 vs 神经元数量');endlegend(activation_funcs, 'Location', 'best');grid on;set(gca, 'XScale', 'log');
end% 使用示例
% analyze_elm_parameters(X_train, Y_train, 'classification');
实用工具函数
1. 交叉验证ELM
function [best_model, cv_results] = elm_cross_validation(X, Y, hidden_neurons_list, activation_funcs, k_folds, problem_type)
% ELM交叉验证if nargin < 6problem_type = 'regression';endcv = cvpartition(length(Y), 'KFold', k_folds);results = struct();best_accuracy = -inf;best_params = struct();for a = 1:length(activation_funcs)activation = activation_funcs{a};for h = 1:length(hidden_neurons_list)hidden_neurons = hidden_neurons_list(h);fold_accuracies = zeros(k_folds, 1);for fold = 1:k_foldstrain_idx = training(cv, fold);test_idx = test(cv, fold);X_train = X(train_idx, :);Y_train = Y(train_idx);X_test = X(test_idx, :);Y_test = Y(test_idx);tryif strcmp(problem_type, 'regression')[~, ~, model] = elm_train_regression(X_train, Y_train, hidden_neurons, activation);Y_pred = elm_predict(model, X_test);accuracy = calculate_regression_accuracy(Y_test, Y_pred);fold_accuracies(fold) = accuracy.r2;else[~, ~, model] = elm_train_classification(X_train, Y_train, hidden_neurons, activation);Y_pred = elm_predict_class(model, X_test);fold_accuracies(fold) = sum(Y_pred == Y_test) / length(Y_test);endcatchfold_accuracies(fold) = 0;endendmean_accuracy = mean(fold_accuracies);std_accuracy = std(fold_accuracies);% 记录结果param_name = sprintf('h%d_%s', hidden_neurons, activation);results.(param_name) = struct(...'mean_accuracy', mean_accuracy, ...'std_accuracy', std_accuracy, ...'fold_accuracies', fold_accuracies);% 更新最佳参数if mean_accuracy > best_accuracybest_accuracy = mean_accuracy;best_params.hidden_neurons = hidden_neurons;best_params.activation = activation;endendend% 使用最佳参数训练最终模型if strcmp(problem_type, 'regression')[~, ~, best_model] = elm_train_regression(X, Y, best_params.hidden_neurons, best_params.activation);else[~, ~, best_model] = elm_train_classification(X, Y, best_params.hidden_neurons, best_params.activation);endcv_results = results;fprintf('交叉验证完成! 最佳参数:\n');fprintf(' 隐层神经元: %d, 激活函数: %s, 平均性能: %.4f\n', ...best_params.hidden_neurons, best_params.activation, best_accuracy);
end
性能对比
% 与其他方法的快速对比
function compare_methods(X_train, Y_train, X_test, Y_test, problem_type)
% 比较ELM与其他方法的性能methods = {'ELM', '决策树', 'SVM', 'BP神经网络'};performances = zeros(length(methods), 1);training_times = zeros(length(methods), 1);% ELMtic;[~, ~, elm_model] = elm_train_regression(X_train, Y_train, 50, 'sigmoid');Y_pred_elm = elm_predict(elm_model, X_test);training_times(1) = toc;performances(1) = calculate_regression_accuracy(Y_test, Y_pred_elm).r2;% 决策树tic;tree = fitrtree(X_train, Y_train);Y_pred_tree = predict(tree, X_test);training_times(2) = toc;performances(2) = calculate_regression_accuracy(Y_test, Y_pred_tree).r2;% 显示结果figure;subplot(1,2,1);bar(performances);set(gca, 'XTickLabel', methods);ylabel('R²');title('方法性能比较');grid on;subplot(1,2,2);bar(training_times);set(gca, 'XTickLabel', methods);ylabel('训练时间(秒)');title('训练时间比较');grid on;
end
参考代码 ELM极限学习机 www.youwenfan.com/contentcnl/64732.html
使用建议
- 隐层神经元数量:通常从50-500开始尝试,数据量大时可适当增加
- 激活函数:sigmoid和tanh通常效果较好,可以都尝试
- 正则化参数:如果出现过拟合,尝试增加正则化参数C
- 数据标准化:建议对输入数据进行标准化处理
ELM确实是一个非常实用且高效的算法,特别适合需要快速建模的场景。