信息熵的特征选择算法MATLAB实现。信息熵是衡量特征重要性的强大工具,特别适用于高维数据降维。
1. 信息熵理论基础
1.1 基本概念
信息熵定义:
H(X) = -Σ P(x_i) log₂ P(x_i)
条件熵:
H(Y|X) = -Σ P(x_i) Σ P(y_j|x_i) log₂ P(y_j|x_i)
信息增益:
IG(Y,X) = H(Y) - H(Y|X)
2. 基于信息熵的特征选择算法
2.1 主框架类
classdef EntropyFeatureSelector < handle% 基于信息熵的特征选择器propertiesFeatureScores % 特征得分SelectedFeatures % 选择的特征索引FeatureNames % 特征名称Method % 选择方法NumFeatures % 选择的特征数量DiscretizeMethod % 离散化方法NumBins % 离散化分箱数endmethodsfunction obj = EntropyFeatureSelector(method, num_features)% 构造函数if nargin < 1obj.Method = 'information_gain';elseobj.Method = method;endif nargin < 2obj.NumFeatures = 10;elseobj.NumFeatures = num_features;endobj.DiscretizeMethod = 'equal_width';obj.NumBins = 10;endfunction fit(obj, X, y, feature_names)% 训练特征选择器% X: 特征矩阵 (n_samples × n_features)% y: 目标变量% feature_names: 特征名称 (可选)[n_samples, n_features] = size(X);if nargin < 4obj.FeatureNames = arrayfun(@(x) sprintf('Feature_%d', x), ...1:n_features, 'UniformOutput', false);elseobj.FeatureNames = feature_names;end% 离散化连续特征X_disc = obj.discretize_features(X);y_disc = obj.discretize_target(y);% 计算特征得分obj.FeatureScores = zeros(1, n_features);for i = 1:n_featuresswitch obj.Methodcase 'information_gain'obj.FeatureScores(i) = obj.information_gain(...X_disc(:, i), y_disc);case 'gain_ratio'obj.FeatureScores(i) = obj.gain_ratio(...X_disc(:, i), y_disc);case 'symmetrical_uncertainty'obj.FeatureScores(i) = obj.symmetrical_uncertainty(...X_disc(:, i), y_disc);case 'joint_entropy'obj.FeatureScores(i) = obj.joint_entropy_feature(...X_disc(:, i), y_disc);otherwiseerror('未知的特征选择方法: %s', obj.Method);endend% 选择特征obj.select_features();endfunction X_selected = transform(obj, X)% 转换数据,只保留选择的特征X_selected = X(:, obj.SelectedFeatures);endfunction X_selected = fit_transform(obj, X, y, feature_names)% 训练并转换数据obj.fit(X, y, feature_names);X_selected = obj.transform(X);endend
end
2.2 核心熵计算函数
function entropy_val = entropy(data)% 计算信息熵% data: 输入数据向量% 计算概率分布unique_vals = unique(data);probabilities = zeros(1, length(unique_vals));for i = 1:length(unique_vals)probabilities(i) = sum(data == unique_vals(i)) / length(data);end% 计算熵 (使用自然对数,结果单位是nats)entropy_val = -sum(probabilities .* log(probabilities + eps));
endfunction cond_entropy = conditional_entropy(X, y)% 计算条件熵 H(Y|X)unique_x = unique(X);cond_entropy = 0;for i = 1:length(unique_x)x_val = unique_x(i);y_given_x = y(X == x_val);if ~isempty(y_given_x)prob_x = sum(X == x_val) / length(X);entropy_y_given_x = entropy(y_given_x);cond_entropy = cond_entropy + prob_x * entropy_y_given_x;endend
endfunction joint_entropy = joint_entropy(X, y)% 计算联合熵 H(X,Y)% 创建联合分布joint_data = [X, y];joint_entropy = entropy(joint_data);
endfunction mi = mutual_information(X, y)% 计算互信息 I(X;Y) = H(X) + H(Y) - H(X,Y)entropy_x = entropy(X);entropy_y = entropy(y);joint_entropy_xy = joint_entropy(X, y);mi = entropy_x + entropy_y - joint_entropy_xy;
end
2.3 各种特征选择准则
methods (Access = private)function ig = information_gain(obj, X, y)% 信息增益: IG(Y,X) = H(Y) - H(Y|X)entropy_y = entropy(y);cond_entropy_y_given_x = conditional_entropy(X, y);ig = entropy_y - cond_entropy_y_given_x;endfunction gr = gain_ratio(obj, X, y)% 增益率: GR(Y,X) = IG(Y,X) / H(X)ig = obj.information_gain(X, y);entropy_x = entropy(X);if entropy_x == 0gr = 0;elsegr = ig / entropy_x;endendfunction su = symmetrical_uncertainty(obj, X, y)% 对称不确定性: SU(Y,X) = 2 * IG(Y,X) / (H(Y) + H(X))ig = obj.information_gain(X, y);entropy_y = entropy(y);entropy_x = entropy(X);if (entropy_y + entropy_x) == 0su = 0;elsesu = 2 * ig / (entropy_y + entropy_x);endendfunction je_score = joint_entropy_feature(obj, X, y)% 基于联合熵的特征评分% 联合熵越小,特征与目标变量的关联越强joint_entropy_val = joint_entropy(X, y);% 转换为评分(联合熵越小,评分越高)je_score = 1 / (joint_entropy_val + eps);endfunction discretized_data = discretize_features(obj, data)% 离散化连续特征[n_samples, n_features] = size(data);discretized_data = zeros(size(data));for i = 1:n_featuresfeature_data = data(:, i);switch obj.DiscretizeMethodcase 'equal_width'% 等宽离散化min_val = min(feature_data);max_val = max(feature_data);bin_edges = linspace(min_val, max_val, obj.NumBins + 1);case 'equal_frequency'% 等频离散化sorted_data = sort(feature_data);bin_edges = zeros(1, obj.NumBins + 1);bin_edges(1) = min(feature_data);bin_edges(end) = max(feature_data);for bin = 2:obj.NumBinsidx = round(bin * n_samples / obj.NumBins);bin_edges(bin) = sorted_data(max(1, min(idx, n_samples)));endcase 'kmeans'% K-means离散化[~, bin_centers] = kmeans(feature_data, obj.NumBins);bin_edges = [-inf; sort(bin_centers); inf];otherwiseerror('未知的离散化方法: %s', obj.DiscretizeMethod);end% 分配离散标签[~, ~, discretized_data(:, i)] = histcounts(feature_data, bin_edges);endendfunction discretized_target = discretize_target(obj, y)% 离散化目标变量(用于分类问题)if iscategorical(y) || isinteger(y)% 已经是离散的discretized_target = double(y);else% 连续目标变量需要离散化discretized_target = obj.discretize_features(y);endendfunction select_features(obj)% 根据得分选择特征[sorted_scores, sorted_indices] = sort(obj.FeatureScores, 'descend');% 选择前k个特征k = min(obj.NumFeatures, length(sorted_scores));obj.SelectedFeatures = sorted_indices(1:k);fprintf('选择了 %d 个特征:\n', k);for i = 1:kidx = sorted_indices(i);fprintf(' %d. %s: %.4f\n', i, obj.FeatureNames{idx}, sorted_scores(i));endend
end
3. 高级特征选择算法
3.1 基于互信息的特征选择
classdef MutualInformationFeatureSelector < EntropyFeatureSelector% 基于互信息的特征选择器methodsfunction obj = MutualInformationFeatureSelector(num_features)% 构造函数if nargin < 1num_features = 10;endobj@EntropyFeatureSelector('mutual_information', num_features);endfunction mi = mutual_information_feature(obj, X, y)% 计算互信息 I(X;Y)mi = mutual_information(X, y);endend
end
3.2 mRMR (最大相关最小冗余) 算法
classdef MRMRFeatureSelector < EntropyFeatureSelector% mRMR (Maximum Relevance Minimum Redundancy) 特征选择器propertiesSelectedFeatureSet % 选择的特征集合RelevanceScores % 相关性得分RedundancyScores % 冗余性得分endmethodsfunction obj = MRMRFeatureSelector(num_features)% 构造函数if nargin < 1num_features = 10;endobj@EntropyFeatureSelector('mrmr', num_features);endfunction fit(obj, X, y, feature_names)% mRMR特征选择[n_samples, n_features] = size(X);if nargin < 4obj.FeatureNames = arrayfun(@(x) sprintf('Feature_%d', x), ...1:n_features, 'UniformOutput', false);elseobj.FeatureNames = feature_names;end% 离散化特征X_disc = obj.discretize_features(X);y_disc = obj.discretize_target(y);% 计算特征与目标的相关性relevance = zeros(1, n_features);for i = 1:n_featuresrelevance(i) = mutual_information(X_disc(:, i), y_disc);end% mRMR前向选择selected = [];candidate_features = 1:n_features;% 选择第一个特征(相关性最高)[~, first_feature] = max(relevance);selected = [selected, first_feature];candidate_features(first_feature) = [];obj.RelevanceScores = zeros(1, obj.NumFeatures);obj.RedundancyScores = zeros(1, obj.NumFeatures);obj.RelevanceScores(1) = relevance(first_feature);obj.RedundancyScores(1) = 0;fprintf('mRMR特征选择过程:\n');fprintf('1. 选择特征 %d (%s), 相关性: %.4f\n', ...first_feature, obj.FeatureNames{first_feature}, relevance(first_feature));% 选择剩余特征for k = 2:min(obj.NumFeatures, n_features)best_score = -inf;best_feature = 0;avg_redundancy = 0;for i = 1:length(candidate_features)feature_idx = candidate_features(i);% 计算与已选特征的冗余性redundancy = 0;for j = 1:length(selected)redundancy = redundancy + mutual_information(...X_disc(:, feature_idx), X_disc(:, selected(j)));endredundancy = redundancy / length(selected);% mRMR准则: Relevance - Redundancymrmr_score = relevance(feature_idx) - redundancy;if mrmr_score > best_scorebest_score = mrmr_score;best_feature = feature_idx;avg_redundancy = redundancy;endendif best_feature > 0selected = [selected, best_feature];candidate_features(candidate_features == best_feature) = [];obj.RelevanceScores(k) = relevance(best_feature);obj.RedundancyScores(k) = avg_redundancy;fprintf('%d. 选择特征 %d (%s), mRMR得分: %.4f\n', ...k, best_feature, obj.FeatureNames{best_feature}, best_score);elsebreak;endendobj.SelectedFeatures = selected;obj.SelectedFeatureSet = selected;endfunction plot_mrmr_process(obj)% 绘制mRMR选择过程if isempty(obj.RelevanceScores)error('请先运行fit方法');endfigure('Position', [100, 100, 1200, 500]);subplot(1, 2, 1);k = 1:length(obj.RelevanceScores);plot(k, obj.RelevanceScores, 'bo-', 'LineWidth', 2, 'MarkerSize', 8);hold on;plot(k, obj.RedundancyScores, 'rs-', 'LineWidth', 2, 'MarkerSize', 8);xlabel('选择顺序');ylabel('得分');title('mRMR特征选择过程');legend('相关性', '平均冗余性', 'Location', 'best');grid on;subplot(1, 2, 2);mrmr_scores = obj.RelevanceScores - obj.RedundancyScores;bar(mrmr_scores, 'FaceColor', [0.2, 0.6, 0.8]);xlabel('选择顺序');ylabel('mRMR得分');title('mRMR得分');grid on;endend
end
3.3 基于JMI (Joint Mutual Information) 的算法
classdef JMIFeatureSelector < EntropyFeatureSelector% 基于联合互信息的特征选择器methodsfunction obj = JMIFeatureSelector(num_features)% 构造函数if nargin < 1num_features = 10;endobj@EntropyFeatureSelector('jmi', num_features);endfunction fit(obj, X, y, feature_names)% JMI特征选择[n_samples, n_features] = size(X);if nargin < 4obj.FeatureNames = arrayfun(@(x) sprintf('Feature_%d', x), ...1:n_features, 'UniformOutput', false);elseobj.FeatureNames = feature_names;end% 离散化特征X_disc = obj.discretize_features(X);y_disc = obj.discretize_target(y);% JMI前向选择selected = [];candidate_features = 1:n_features;% 选择第一个特征(与目标互信息最大)mi_scores = zeros(1, n_features);for i = 1:n_featuresmi_scores(i) = mutual_information(X_disc(:, i), y_disc);end[~, first_feature] = max(mi_scores);selected = [selected, first_feature];candidate_features(first_feature) = [];fprintf('JMI特征选择过程:\n');fprintf('1. 选择特征 %d (%s), MI: %.4f\n', ...first_feature, obj.FeatureNames{first_feature}, mi_scores(first_feature));% 选择剩余特征for k = 2:min(obj.NumFeatures, n_features)best_score = -inf;best_feature = 0;for i = 1:length(candidate_features)feature_idx = candidate_features(i);% 计算JMI得分: 与目标的条件互信息之和jmi_score = 0;for j = 1:length(selected)% I(feature; target | selected_feature)cond_mi = obj.conditional_mutual_information(...X_disc(:, feature_idx), y_disc, X_disc(:, selected(j)));jmi_score = jmi_score + cond_mi;endif jmi_score > best_scorebest_score = jmi_score;best_feature = feature_idx;endendif best_feature > 0selected = [selected, best_feature];candidate_features(candidate_features == best_feature) = [];fprintf('%d. 选择特征 %d (%s), JMI得分: %.4f\n', ...k, best_feature, obj.FeatureNames{best_feature}, best_score);elsebreak;endendobj.SelectedFeatures = selected;endfunction cmi = conditional_mutual_information(obj, X, Y, Z)% 计算条件互信息 I(X;Y|Z)% I(X;Y|Z) = H(X|Z) - H(X|Y,Z)% 或者使用联合熵计算: I(X;Y|Z) = H(X,Z) + H(Y,Z) - H(Z) - H(X,Y,Z)entropy_xz = joint_entropy([X, Z]);entropy_yz = joint_entropy([Y, Z]);entropy_z = entropy(Z);entropy_xyz = joint_entropy([X, Y, Z]);cmi = entropy_xz + entropy_yz - entropy_z - entropy_xyz;endend
end
4. 完整示例和应用
4.1 主演示程序
function main_entropy_feature_selection()% 基于信息熵的特征选择主演示程序clear; clc; close all;%% 生成示例数据fprintf('生成示例数据...\n');[X, y, feature_names] = generate_example_data();fprintf('数据维度: %d 个样本 × %d 个特征\n', size(X, 1), size(X, 2));fprintf('目标变量类别数: %d\n', length(unique(y)));%% 比较不同的特征选择方法methods = {'information_gain', '信息增益';'gain_ratio', '增益率'; 'symmetrical_uncertainty', '对称不确定性';'mrmr', 'mRMR算法';'jmi', '联合互信息'};num_selected = 15;results = cell(size(methods, 1), 1);figure('Position', [100, 100, 1400, 800]);for i = 1:size(methods, 1)method = methods{i, 1};method_name = methods{i, 2};fprintf('\n=== %s ===\n', method_name);% 创建特征选择器switch methodcase 'mrmr'selector = MRMRFeatureSelector(num_selected);case 'jmi'selector = JMIFeatureSelector(num_selected);otherwiseselector = EntropyFeatureSelector(method, num_selected);end% 执行特征选择tic;selector.fit(X, y, feature_names);time_elapsed = toc;% 存储结果results{i} = struct();results{i}.method = method_name;results{i}.selected_features = selector.SelectedFeatures;results{i}.feature_scores = selector.FeatureScores;results{i}.time = time_elapsed;% 绘制特征得分subplot(2, 3, i);[sorted_scores, sorted_idx] = sort(selector.FeatureScores, 'descend');top_features = min(20, length(sorted_scores));barh(sorted_scores(1:top_features), 'FaceColor', [0.3, 0.6, 0.9]);set(gca, 'YTickLabel', feature_names(sorted_idx(1:top_features)));ylabel('特征');xlabel('得分');title(sprintf('%s\n(耗时: %.2fs)', method_name, time_elapsed));grid on;fprintf('选择的前5个特征:\n');for j = 1:min(5, length(selector.SelectedFeatures))feat_idx = selector.SelectedFeatures(j);fprintf(' %d. %s: %.4f\n', j, feature_names{feat_idx}, ...selector.FeatureScores(feat_idx));endend%% 性能比较compare_feature_selection_performance(X, y, results);%% mRMR详细分析analyze_mrmr_performance(X, y, feature_names);
endfunction [X, y, feature_names] = generate_example_data()% 生成示例数据n_samples = 1000;n_features = 50;% 生成随机特征rng(42); % 设置随机种子保证可重复性X = randn(n_samples, n_features);% 创建有意义的特征名称feature_names = cell(1, n_features);for i = 1:n_featuresfeature_names{i} = sprintf('Feature_%02d', i);end% 创建目标变量(与部分特征相关)% 相关特征: 1, 5, 10, 15, 20relevant_features = [1, 5, 10, 15, 20, 25, 30];noise_level = 0.3;% 线性组合 + 噪声weights = randn(length(relevant_features), 1);linear_combination = X(:, relevant_features) * weights;% 添加非线性关系nonlinear_effect = sin(X(:, 1)) .* exp(X(:, 5)) + X(:, 10).^2;% 组合效应total_effect = linear_combination + 0.5 * nonlinear_effect + noise_level * randn(n_samples, 1);% 转换为分类问题(2类)y = double(total_effect > median(total_effect));fprintf('生成数据完成:\n');fprintf(' - 样本数: %d\n', n_samples);fprintf(' - 特征数: %d\n', n_features);fprintf(' - 相关特征: %s\n', mat2str(relevant_features));
endfunction compare_feature_selection_performance(X, y, results)% 比较不同特征选择方法的性能fprintf('\n=== 特征选择方法性能比较 ===\n');% 计算分类准确率作为评估指标cv = cvpartition(y, 'KFold', 5);classifier = @(X_train, y_train, X_test) ...predict(fitcsvm(X_train, y_train), X_test);accuracies = zeros(length(results), 1);for i = 1:length(results)method_result = results{i};selected_features = method_result.selected_features;if length(selected_features) < 2accuracies(i) = 0;continue;endX_selected = X(:, selected_features);% 交叉验证cv_accuracy = crossval(classifier, X_selected, y, 'partition', cv);accuracies(i) = 1 - cv_accuracy;fprintf('%s: 准确率 = %.4f, 耗时 = %.2fs\n', ...method_result.method, accuracies(i), method_result.time);end% 绘制性能比较图figure('Position', [100, 100, 1000, 600]);subplot(1, 2, 1);methods = cellfun(@(x) x.method, results, 'UniformOutput', false);bar(accuracies, 'FaceColor', [0.4, 0.7, 0.4]);set(gca, 'XTickLabel', methods, 'XTickLabelRotation', 45);ylabel('分类准确率');title('不同特征选择方法的性能比较');grid on;subplot(1, 2, 2);times = cellfun(@(x) x.time, results);bar(times, 'FaceColor', [0.8, 0.4, 0.4]);set(gca, 'XTickLabel', methods, 'XTickLabelRotation', 45);ylabel('运行时间 (秒)');title('计算时间比较');grid on;
endfunction analyze_mrmr_performance(X, y, feature_names)% 分析mRMR算法性能fprintf('\n=== mRMR算法详细分析 ===\n');mrmr_selector = MRMRFeatureSelector(20);mrmr_selector.fit(X, y, feature_names);% 绘制mRMR选择过程mrmr_selector.plot_mrmr_process();% 分析特征相关性analyze_feature_correlations(X, y, mrmr_selector.SelectedFeatures, feature_names);
endfunction analyze_feature_correlations(X, y, selected_features, feature_names)% 分析特征相关性figure('Position', [100, 100, 1200, 500]);% 计算特征-目标相关性subplot(1, 2, 1);correlations = zeros(1, length(selected_features));for i = 1:length(selected_features)if iscategorical(y) || isinteger(y)% 对于分类问题,使用ANOVA F值[~, tbl] = anova1(X(:, selected_features(i)), y, 'off');correlations(i) = tbl{2, 5}; % F统计量else% 对于回归问题,使用相关系数correlations(i) = abs(corr(X(:, selected_features(i)), y));endendbar(correlations, 'FaceColor', [0.2, 0.5, 0.8]);set(gca, 'XTickLabel', feature_names(selected_features), ...'XTickLabelRotation', 45);ylabel('特征-目标相关性');title('选择特征与目标的相关性');grid on;% 计算特征间相关性矩阵subplot(1, 2, 2);selected_X = X(:, selected_features);correlation_matrix = corr(selected_X);imagesc(correlation_matrix);colorbar;set(gca, 'XTick', 1:length(selected_features), ...'XTickLabel', feature_names(selected_features), ...'XTickLabelRotation', 45);set(gca, 'YTick', 1:length(selected_features), ...'YTickLabel', feature_names(selected_features));title('选择特征间的相关性矩阵');
end
4.2 实际数据集应用
function real_world_application()% 真实数据集应用示例fprintf('=== 真实数据集特征选择应用 ===\n');% 加载数据集(这里使用MATLAB内置数据集作为示例)load fisheriris;X = meas; % 特征矩阵y = species; % 目标变量feature_names = {'Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'};fprintf('数据集: Fisher Iris\n');fprintf('特征数: %d\n', size(X, 2));fprintf('样本数: %d\n', size(X, 1));fprintf('类别: %s\n', strjoin(unique(y), ', '));% 使用mRMR进行特征选择mrmr_selector = MRMRFeatureSelector(2); % 选择2个最重要的特征mrmr_selector.fit(X, y, feature_names);% 可视化结果visualize_iris_results(X, y, mrmr_selector, feature_names);
endfunction visualize_iris_results(X, y, selector, feature_names)% 可视化Iris数据集特征选择结果selected_features = selector.SelectedFeatures;figure('Position', [100, 100, 1500, 600]);% 原始特征空间subplot(1, 3, 1);gscatter(X(:, 1), X(:, 2), y);xlabel(feature_names{1});ylabel(feature_names{2});title('原始特征空间 (前两个特征)');grid on;legend('Location', 'best');% 选择的特征空间subplot(1, 3, 2);if length(selected_features) >= 2feat1 = selected_features(1);feat2 = selected_features(2);gscatter(X(:, feat1), X(:, feat2), y);xlabel(feature_names{feat1});ylabel(feature_names{feat2});title('mRMR选择的特征空间');grid on;legend('Location', 'best');end% 特征重要性subplot(1, 3, 3);scores = selector.FeatureScores;[sorted_scores, sorted_idx] = sort(scores, 'descend');barh(sorted_scores, 'FaceColor', [0.3, 0.6, 0.3]);set(gca, 'YTick', 1:length(feature_names), ...'YTickLabel', feature_names(sorted_idx));xlabel('特征重要性得分');title('特征重要性排序');grid on;fprintf('\n特征重要性排序:\n');for i = 1:length(feature_names)fprintf(' %d. %s: %.4f\n', i, feature_names{sorted_idx(i)}, sorted_scores(i));end
end
5. 高级功能和优化
5.1 并行计算优化
classdef ParallelEntropyFeatureSelector < EntropyFeatureSelector% 并行计算优化的特征选择器methodsfunction fit_parallel(obj, X, y, feature_names)% 并行版本的特征选择[n_samples, n_features] = size(X);if nargin < 4obj.FeatureNames = arrayfun(@(x) sprintf('Feature_%d', x), ...1:n_features, 'UniformOutput', false);elseobj.FeatureNames = feature_names;end% 离散化特征X_disc = obj.discretize_features(X);y_disc = obj.discretize_target(y);% 并行计算特征得分obj.FeatureScores = zeros(1, n_features);parfor i = 1:n_featuresswitch obj.Methodcase 'information_gain'obj.FeatureScores(i) = obj.information_gain(...X_disc(:, i), y_disc);case 'gain_ratio'obj.FeatureScores(i) = obj.gain_ratio(...X_disc(:, i), y_disc);case 'symmetrical_uncertainty'obj.FeatureScores(i) = obj.symmetrical_uncertainty(...X_disc(:, i), y_disc);otherwiseobj.FeatureScores(i) = 0;endend% 选择特征obj.select_features();endend
end
5.2 稳定性分析
function stability_analysis(X, y, feature_names)% 特征选择稳定性分析fprintf('=== 特征选择稳定性分析 ===\n');num_runs = 10;num_selected = 10;% 存储每次运行的选择结果selection_results = zeros(num_runs, num_selected);for run = 1:num_runs% 自助采样n_samples = size(X, 1);bootstrap_indices = randsample(n_samples, n_samples, true);X_bootstrap = X(bootstrap_indices, :);y_bootstrap = y(bootstrap_indices);% 特征选择selector = MRMRFeatureSelector(num_selected);selector.fit(X_bootstrap, y_bootstrap, feature_names);selection_results(run, :) = selector.SelectedFeatures(1:num_selected);end% 计算稳定性stability = compute_selection_stability(selection_results);fprintf('特征选择稳定性: %.4f\n', stability);% 可视化稳定性结果plot_stability_analysis(selection_results, feature_names, stability);
endfunction stability = compute_selection_stability(selection_results)% 计算特征选择稳定性[num_runs, num_selected] = size(selection_results);% 计算每对运行之间的相似度similarities = zeros(num_runs * (num_runs - 1) / 2, 1);idx = 1;for i = 1:num_runsfor j = i+1:num_runsset_i = selection_results(i, :);set_j = selection_results(j, :);% Jaccard相似度intersection = length(intersect(set_i, set_j));union_set = length(union(set_i, set_j));similarities(idx) = intersection / union_set;idx = idx + 1;endendstability = mean(similarities);
endfunction plot_stability_analysis(selection_results, feature_names, stability)% 绘制稳定性分析结果[num_runs, num_selected] = size(selection_results);figure('Position', [100, 100, 1200, 600]);% 特征选择频率subplot(1, 2, 1);all_selected_features = selection_results(:);unique_features = unique(all_selected_features);selection_frequency = zeros(1, length(unique_features));for i = 1:length(unique_features)selection_frequency(i) = sum(all_selected_features == unique_features(i)) / num_runs;end[sorted_freq, sorted_idx] = sort(selection_frequency, 'descend');bar(sorted_freq, 'FaceColor', [0.7, 0.3, 0.3]);feature_labels = cell(1, length(unique_features));for i = 1:length(unique_features)feat_idx = unique_features(sorted_idx(i));feature_labels{i} = feature_names{feat_idx};endset(gca, 'XTickLabel', feature_labels, 'XTickLabelRotation', 45);ylabel('选择频率');title(sprintf('特征选择频率 (稳定性: %.4f)', stability));grid on;% 选择结果热图subplot(1, 2, 2);selection_matrix = zeros(num_runs, max(unique_features));for i = 1:num_runsselection_matrix(i, selection_results(i, :)) = 1;endimagesc(selection_matrix);colorbar;xlabel('特征索引');ylabel('Bootstrap运行');title('特征选择结果热图');
end
参考代码 基于信息熵的特征选择算法 www.youwenfan.com/contentcnj/64622.html
6. 总结
基于信息熵的特征选择算法具有以下优势:
- 理论基础扎实:基于信息论,有明确的数学解释
- 无需分布假设:对数据分布没有特定要求
- 能够发现非线性关系:基于概率分布,能捕捉非线性关联
- 适用于各种数据类型:通过离散化可处理连续和离散特征
主要算法比较:
- 信息增益:简单有效,但偏向多值特征
- 增益率:克服了信息增益的偏置问题
- 对称不确定性:标准化处理,适合比较不同特征
- mRMR:平衡相关性和冗余性,效果优秀
- JMI:考虑特征间的交互作用,更全面
这些算法特别适用于:
- 高维数据降维
- 生物信息学中的基因选择
- 文本分类中的特征选择
- 任何需要可解释特征重要性的场景
通过合理选择算法参数和适当的预处理,基于信息熵的特征选择能够显著提升模型性能并增强结果的可解释性。