基于PCA白化和K均值聚类的轴承故障诊断系统。方法结合了降维、去相关和聚类分析,能够有效识别不同的故障模式。
1. 轴承故障数据生成与特征提取
classdef BearingFaultData% 轴承故障数据生成与特征提取propertiessampling_freq % 采样频率duration % 信号持续时间fault_types % 故障类型bearing_params % 轴承参数endmethodsfunction obj = BearingFaultData(fs, duration)% 构造函数obj.sampling_freq = fs;obj.duration = duration;% 定义故障类型obj.fault_types = {'Normal', 'InnerRace', 'OuterRace', 'BallFault'};% 轴承参数 (6205轴承)obj.bearing_params.d = 7.94; % 滚珠直径 (mm)obj.bearing_params.D = 39.04; % 节圆直径 (mm)obj.bearing_params.n = 9; % 滚珠数量obj.bearing_params.contact_angle = 0; % 接触角endfunction [signals, labels] = generate_fault_data(obj, num_samples_per_class)% 生成轴承故障数据fprintf('生成轴承故障数据...\n');total_samples = length(obj.fault_types) * num_samples_per_class;signal_length = obj.sampling_freq * obj.duration;signals = zeros(total_samples, signal_length);labels = cell(total_samples, 1);sample_count = 1;for i = 1:length(obj.fault_types)fault_type = obj.fault_types{i};fprintf('生成 %s 故障数据...\n', fault_type);for j = 1:num_samples_per_class% 生成故障信号signal = obj.generate_single_signal(fault_type);signals(sample_count, :) = signal;labels{sample_count} = fault_type;sample_count = sample_count + 1;endendfprintf('数据生成完成: %d 个样本, %d 种故障类型\n', ...total_samples, length(obj.fault_types));endfunction signal = generate_single_signal(obj, fault_type)% 生成单个故障信号t = 0:1/obj.sampling_freq:obj.duration-1/obj.sampling_freq;N = length(t);% 基础振动信号 (机器正常运行)base_vibration = 0.5 * sin(2*pi*30*t) + 0.3 * sin(2*pi*60*t);switch fault_typecase 'Normal'% 正常状态 - 只有基础振动和噪声fault_component = zeros(1, N);noise_level = 0.1;case 'InnerRace'% 内圈故障 - 冲击特征fault_freq = obj.calculate_inner_race_frequency(1800); % 1800 RPMfault_component = obj.generate_impact_signal(t, fault_freq, 0.8);noise_level = 0.15;case 'OuterRace'% 外圈故障 - 冲击特征fault_freq = obj.calculate_outer_race_frequency(1800);fault_component = obj.generate_impact_signal(t, fault_freq, 0.7);noise_level = 0.12;case 'BallFault'% 滚珠故障 - 冲击特征fault_freq = obj.calculate_ball_fault_frequency(1800);fault_component = obj.generate_impact_signal(t, fault_freq, 0.6);noise_level = 0.13;otherwisefault_component = zeros(1, N);noise_level = 0.1;end% 组合信号成分signal = base_vibration + fault_component + ...noise_level * randn(1, N);endfunction impact_signal = generate_impact_signal(obj, t, fault_freq, amplitude)% 生成冲击信号 (故障特征)N = length(t);impact_signal = zeros(1, N);% 冲击间隔impact_interval = round(obj.sampling_freq / fault_freq);num_impacts = floor(N / impact_interval);for i = 1:num_impactsimpact_pos = (i-1) * impact_interval + 1;if impact_pos + 100 <= N% 生成衰减振荡冲击tau = 0.002; % 衰减时间常数osc_freq = 3000; % 共振频率time_window = 0:1/obj.sampling_freq:0.01;impact = amplitude * exp(-time_window/tau) .* ...sin(2*pi*osc_freq*time_window);end_pos = min(impact_pos + length(impact) - 1, N);actual_length = end_pos - impact_pos + 1;impact_signal(impact_pos:end_pos) = ...impact_signal(impact_pos:end_pos) + impact(1:actual_length);endendendfunction fi = calculate_inner_race_frequency(obj, rpm)% 计算内圈故障频率fr = rpm / 60; % 转频fi = 0.5 * obj.bearing_params.n * fr * ...(1 + obj.bearing_params.d/obj.bearing_params.D * cosd(obj.bearing_params.contact_angle));endfunction fo = calculate_outer_race_frequency(obj, rpm)% 计算外圈故障频率fr = rpm / 60;fo = 0.5 * obj.bearing_params.n * fr * ...(1 - obj.bearing_params.d/obj.bearing_params.D * cosd(obj.bearing_params.contact_angle));endfunction fb = calculate_ball_fault_frequency(obj, rpm)% 计算滚珠故障频率fr = rpm / 60;fb = 0.5 * fr * obj.bearing_params.D/obj.bearing_params.d * ...(1 - (obj.bearing_params.d/obj.bearing_params.D * cosd(obj.bearing_params.contact_angle))^2);endendmethods (Static)function features = extract_features(signals, fs)% 提取时域和频域特征fprintf('提取信号特征...\n');[num_samples, signal_length] = size(signals);% 特征列表feature_names = {'Mean', 'Std', 'RMS', 'Skewness', 'Kurtosis', ...'Peak2Peak', 'CrestFactor', 'ImpulseFactor', ...'MarginFactor', 'ShapeFactor', ...'FrequencyRMS', 'FrequencyMean', 'FrequencyStd', ...'SpectralCentroid', 'SpectralSpread', 'SpectralSkewness', ...'SpectralKurtosis', 'SpectralRolloff'};num_features = length(feature_names);features = zeros(num_samples, num_features);for i = 1:num_samplessignal = signals(i, :);% 时域特征features(i, 1) = mean(signal); % 均值features(i, 2) = std(signal); % 标准差features(i, 3) = rms(signal); % 均方根features(i, 4) = skewness(signal); % 偏度features(i, 5) = kurtosis(signal); % 峭度features(i, 6) = peak2peak(signal); % 峰峰值% 无量纲指标rms_val = rms(signal);peak_val = max(abs(signal));features(i, 7) = peak_val / rms_val; % 峰值因子features(i, 8) = peak_val / mean(abs(signal)); % 脉冲因子features(i, 9) = peak_val / (mean(sqrt(abs(signal)))^2); % 裕度因子features(i, 10) = rms_val / mean(abs(signal)); % 波形因子% 频域特征[freq_features, ~] = BearingFaultData.extract_frequency_features(signal, fs);features(i, 11:18) = freq_features;endfprintf('特征提取完成: %d 个样本, %d 个特征\n', num_samples, num_features);endfunction [freq_features, f] = extract_frequency_features(signal, fs)% 提取频域特征N = length(signal);f = (0:N-1) * fs / N;% FFTY = fft(signal);P2 = abs(Y/N);P1 = P2(1:floor(N/2)+1);P1(2:end-1) = 2*P1(2:end-1);f = f(1:floor(N/2)+1);% 频域统计特征freq_features = zeros(1, 8);freq_features(1) = rms(P1); % 频域RMSfreq_features(2) = mean(P1); % 频域均值freq_features(3) = std(P1); % 频域标准差% 频谱质心freq_features(4) = sum(f .* P1) / sum(P1);% 频谱扩展centroid = freq_features(4);freq_features(5) = sqrt(sum(((f - centroid).^2) .* P1) / sum(P1));% 频谱偏度freq_features(6) = sum(((f - centroid).^3) .* P1) / (sum(P1) * freq_features(5)^3);% 频谱峭度freq_features(7) = sum(((f - centroid).^4) .* P1) / (sum(P1) * freq_features(5)^4);% 频谱滚降 (85%)total_power = sum(P1);cumulative_power = cumsum(P1);rolloff_idx = find(cumulative_power >= 0.85 * total_power, 1);freq_features(8) = f(rolloff_idx);endfunction plot_sample_signals(signals, labels, fs, duration)% 绘制样本信号figure('Position', [100, 100, 1400, 1000]);unique_labels = unique(labels);num_types = length(unique_labels);for i = 1:num_types% 找到该类别的第一个样本idx = find(strcmp(labels, unique_labels{i}), 1);signal = signals(idx, :);t = 0:1/fs:duration-1/fs;% 时域信号subplot(3, num_types, i);plot(t, signal, 'b-', 'LineWidth', 1);title(sprintf('%s - 时域信号', unique_labels{i}));xlabel('时间 (s)');ylabel('幅度');grid on;% 频域信号subplot(3, num_types, i + num_types);[freq_features, f] = BearingFaultData.extract_frequency_features(signal, fs);plot(f, freq_features, 'r-', 'LineWidth', 1);title(sprintf('%s - 频域特征', unique_labels{i}));xlabel('频率 (Hz)');ylabel('幅度');grid on;xlim([0, 1000]);% 包络谱subplot(3, num_types, i + 2*num_types);[env_spectrum, env_f] = BearingFaultData.compute_envelope_spectrum(signal, fs);plot(env_f, env_spectrum, 'g-', 'LineWidth', 1);title(sprintf('%s - 包络谱', unique_labels{i}));xlabel('频率 (Hz)');ylabel('幅度');grid on;xlim([0, 500]);endsgtitle('轴承故障信号样本分析');endfunction [env_spectrum, f] = compute_envelope_spectrum(signal, fs)% 计算包络谱% 希尔伯特变换求包络analytic_signal = hilbert(signal);envelope_signal = abs(analytic_signal);% 包络信号的频谱N = length(envelope_signal);f = (0:N-1) * fs / N;env_spectrum = abs(fft(envelope_signal)) / N;env_spectrum = env_spectrum(1:floor(N/2)+1);env_spectrum(2:end-1) = 2 * env_spectrum(2:end-1);f = f(1:floor(N/2)+1);endend
end
2. PCA白化处理
classdef PCAWhitening% PCA白化处理类methods (Static)function [features_whitened, pca_model] = apply_pca_whitening(features, varargin)% 应用PCA白化% 输入: features - 原始特征矩阵 (样本数 × 特征数)% 输出: features_whitened - 白化后的特征% pca_model - PCA模型参数fprintf('应用PCA白化...\n');p = inputParser;addParameter(p, 'variance_retained', 0.95, @(x) x>0 && x<=1);addParameter(p, 'epsilon', 1e-5, @(x) x>0);parse(p, varargin{:});variance_retained = p.Results.variance_retained;epsilon = p.Results.epsilon;[num_samples, num_features] = size(features);% 1. 数据标准化 (零均值,单位方差)features_standardized = zscore(features);% 2. 计算协方差矩阵covariance_matrix = cov(features_standardized);% 3. 特征值分解[eigenvectors, eigenvalues] = eig(covariance_matrix);eigenvalues = diag(eigenvalues);% 按特征值降序排列[eigenvalues, idx] = sort(eigenvalues, 'descend');eigenvectors = eigenvectors(:, idx);% 4. 确定保留的主成分数量explained_variance = cumsum(eigenvalues) / sum(eigenvalues);num_components = find(explained_variance >= variance_retained, 1);fprintf('原始特征数: %d, 保留主成分: %d (方差保留: %.1f%%)\n', ...num_features, num_components, variance_retained*100);% 5. 选择主成分eigenvectors_reduced = eigenvectors(:, 1:num_components);eigenvalues_reduced = eigenvalues(1:num_components);% 6. 白化变换whitening_transform = eigenvectors_reduced * diag(1 ./ sqrt(eigenvalues_reduced + epsilon));features_whitened = features_standardized * whitening_transform;% 保存PCA模型pca_model.eigenvectors = eigenvectors;pca_model.eigenvalues = eigenvalues;pca_model.explained_variance = explained_variance;pca_model.num_components = num_components;pca_model.whitening_transform = whitening_transform;pca_model.feature_mean = mean(features);pca_model.feature_std = std(features);fprintf('PCA白化完成\n');endfunction visualize_pca_results(features, features_whitened, labels, pca_model)% 可视化PCA白化结果figure('Position', [100, 100, 1600, 1200]);% 子图1: 原始特征相关性矩阵subplot(2, 3, 1);correlation_original = corr(features);imagesc(correlation_original);colorbar;title('原始特征相关性矩阵');xlabel('特征索引');ylabel('特征索引');axis equal tight;% 子图2: 白化特征相关性矩阵subplot(2, 3, 2);correlation_whitened = corr(features_whitened);imagesc(correlation_whitened);colorbar;title('白化特征相关性矩阵');xlabel('特征索引');ylabel('特征索引');axis equal tight;% 子图3: 方差解释率subplot(2, 3, 3);plot(pca_model.explained_variance, 'bo-', 'LineWidth', 2, 'MarkerSize', 6);hold on;plot(pca_model.num_components, pca_model.explained_variance(pca_model.num_components), ...'ro', 'MarkerSize', 10, 'MarkerFaceColor', 'red');xlabel('主成分数量');ylabel('累积方差解释率');title('PCA方差解释率');grid on;legend('累积方差', '选择点', 'Location', 'southeast');% 子图4: 原始特征前两个主成分subplot(2, 3, 4);[~, scores_original] = pca(features);visualize_clustering(scores_original(:, 1:2), labels, '原始特征PCA');% 子图5: 白化特征前两个主成分subplot(2, 3, 5);visualize_clustering(features_whitened(:, 1:2), labels, '白化特征');% 子图6: 特征值分布subplot(2, 3, 6);semilogy(pca_model.eigenvalues, 'bo-', 'LineWidth', 2, 'MarkerSize', 6);xlabel('主成分索引');ylabel('特征值 (对数尺度)');title('PCA特征值分布');grid on;sgtitle('PCA白化分析结果');function visualize_clustering(data, labels, title_str)% 可视化聚类结果unique_labels = unique(labels);colors = lines(length(unique_labels));markers = 'osd^v><ph';hold on;for i = 1:length(unique_labels)idx = strcmp(labels, unique_labels{i});scatter(data(idx, 1), data(idx, 2), 50, colors(i, :), ...markers(mod(i-1, length(markers))+1), 'filled', ...'DisplayName', unique_labels{i});endtitle(title_str);xlabel('主成分 1');ylabel('主成分 2');legend('Location', 'best');grid on;endendfunction analyze_whitening_effect(features, features_whitened)% 分析白化效果fprintf('\n=== PCA白化效果分析 ===\n');% 原始特征统计original_cov = cov(features);original_corr = corr(features);% 白化特征统计whitened_cov = cov(features_whitened);whitened_corr = corr(features_whitened);fprintf('原始特征协方差矩阵条件数: %.2e\n', cond(original_cov));fprintf('白化特征协方差矩阵条件数: %.2f\n', cond(whitened_cov));fprintf('原始特征平均相关性: %.4f\n', mean(abs(original_corr(original_corr < 1))));fprintf('白化特征平均相关性: %.4f\n', mean(abs(whitened_corr(whitened_corr < 1))));% 特征值分布比较original_eigvals = eig(original_cov);whitened_eigvals = eig(whitened_cov);fprintf('原始特征值范围: [%.2e, %.2e]\n', min(original_eigvals), max(original_eigvals));fprintf('白化特征值范围: [%.2f, %.2f]\n', min(whitened_eigvals), max(whitened_eigvals));% 可视化比较figure('Position', [100, 100, 1200, 500]);subplot(1, 2, 1);semilogy(sort(original_eigvals, 'descend'), 'bo-', 'LineWidth', 2, 'DisplayName', '原始特征');hold on;semilogy(sort(whitened_eigvals, 'descend'), 'rs-', 'LineWidth', 2, 'DisplayName', '白化特征');xlabel('特征值索引');ylabel('特征值 (对数尺度)');title('特征值分布比较');legend;grid on;subplot(1, 2, 2);boxplot([mean(abs(original_corr(original_corr < 1))), ...mean(abs(whitened_corr(whitened_corr < 1)))], ...{'原始特征', '白化特征'});ylabel('平均绝对相关性');title('特征相关性比较');grid on;endend
end
3. K均值聚类分析
classdef KMeansClustering% K均值聚类分析methods (Static)function [cluster_labels, centroid_positions, clustering_model] = ...perform_clustering(features, true_labels, varargin)% 执行K均值聚类分析fprintf('执行K均值聚类分析...\n');p = inputParser;addParameter(p, 'k', 4, @(x) x>0); % 聚类数量addParameter(p, 'max_iterations', 100, @(x) x>0);addParameter(p, 'replicates', 10, @(x) x>0);addParameter(p, 'distance_metric', 'sqeuclidean', @ischar);parse(p, varargin{:});k = p.Results.k;max_iterations = p.Results.max_iterations;replicates = p.Results.replicates;distance_metric = p.Results.distance_metric;% 执行K均值聚类[cluster_labels, centroid_positions, sumd, distances] = ...kmeans(features, k, ...'MaxIter', max_iterations, ...'Replicates', replicates, ...'Distance', distance_metric, ...'Display', 'final');% 计算聚类质量指标clustering_quality = KMeansClustering.evaluate_clustering_quality(...features, cluster_labels, centroid_positions, true_labels);% 保存聚类模型clustering_model.cluster_labels = cluster_labels;clustering_model.centroid_positions = centroid_positions;clustering_model.sumd = sumd;clustering_model.distances = distances;clustering_model.quality = clustering_quality;clustering_model.parameters = p.Results;fprintf('K均值聚类完成: %d 个聚类\n', k);endfunction quality = evaluate_clustering_quality(features, cluster_labels, centroids, true_labels)% 评估聚类质量[num_samples, num_features] = size(features);k = size(centroids, 1);% 内部指标 (无监督)% 1. 轮廓系数silhouette_vals = silhouette(features, cluster_labels);quality.silhouette_mean = mean(silhouette_vals);quality.silhouette_std = std(silhouette_vals);% 2. Davies-Bouldin指数quality.db_index = KMeansClustering.calculate_db_index(features, cluster_labels, centroids);% 3. Calinski-Harabasz指数quality.ch_index = KMeansClustering.calculate_ch_index(features, cluster_labels, centroids);% 外部指标 (有监督,如果有真实标签)if ~isempty(true_labels)% 4. 调整兰德指数quality.adj_rand_index = KMeansClustering.calculate_adjusted_rand_index(...true_labels, cluster_labels);% 5. 互信息quality.normalized_mutual_info = KMeansClustering.calculate_nmi(...true_labels, cluster_labels);% 6. 聚类准确率quality.clustering_accuracy = KMeansClustering.calculate_clustering_accuracy(...true_labels, cluster_labels);elsequality.adj_rand_index = NaN;quality.normalized_mutual_info = NaN;quality.clustering_accuracy = NaN;end% 类内距离和类间距离[quality.within_cluster_distance, quality.between_cluster_distance] = ...KMeansClustering.calculate_distance_metrics(features, cluster_labels, centroids);fprintf('聚类质量指标:\n');fprintf(' 轮廓系数: %.4f\n', quality.silhouette_mean);fprintf(' Davies-Bouldin指数: %.4f\n', quality.db_index);fprintf(' Calinski-Harabasz指数: %.4f\n', quality.ch_index);if ~isempty(true_labels)fprintf(' 调整兰德指数: %.4f\n', quality.adj_rand_index);fprintf(' 归一化互信息: %.4f\n', quality.normalized_mutual_info);fprintf(' 聚类准确率: %.4f\n', quality.clustering_accuracy);endendfunction db_index = calculate_db_index(features, labels, centroids)% 计算Davies-Bouldin指数 (越小越好)k = size(centroids, 1);cluster_dispersion = zeros(1, k);cluster_sizes = zeros(1, k);% 计算每个聚类的离散度for i = 1:kcluster_points = features(labels == i, :);cluster_sizes(i) = size(cluster_points, 1);if cluster_sizes(i) > 0cluster_dispersion(i) = mean(sqrt(sum((cluster_points - centroids(i, :)).^2, 2)));endend% 计算DB指数R = zeros(k);for i = 1:kfor j = i+1:kif cluster_sizes(i) > 0 && cluster_sizes(j) > 0d_ij = norm(centroids(i, :) - centroids(j, :));R(i, j) = (cluster_dispersion(i) + cluster_dispersion(j)) / d_ij;endendendmax_R = max(R, [], 2);db_index = mean(max_R(max_R > 0));endfunction ch_index = calculate_ch_index(features, labels, centroids)% 计算Calinski-Harabasz指数 (越大越好)[num_samples, num_features] = size(features);k = size(centroids, 1);overall_centroid = mean(features, 1);% 类间离散度between_ss = 0;cluster_sizes = zeros(1, k);for i = 1:kcluster_points = features(labels == i, :);cluster_sizes(i) = size(cluster_points, 1);between_ss = between_ss + cluster_sizes(i) * norm(centroids(i, :) - overall_centroid)^2;end% 类内离散度within_ss = 0;for i = 1:kcluster_points = features(labels == i, :);within_ss = within_ss + sum(sum((cluster_points - centroids(i, :)).^2, 2));endch_index = (between_ss / (k-1)) / (within_ss / (num_samples - k));endfunction adj_rand_index = calculate_adjusted_rand_index(true_labels, pred_labels)% 计算调整兰德指数% 将标签转换为数值[~, ~, true_numeric] = unique(true_labels);[~, ~, pred_numeric] = unique(pred_labels);% 创建混淆矩阵contingency_matrix = crosstab(true_numeric, pred_numeric);n = sum(contingency_matrix(:));nij = contingency_matrix;ai = sum(contingency_matrix, 2);bj = sum(contingency_matrix, 1);% 计算兰德指数n_choose_2 = n*(n-1)/2;sum_nij_choose_2 = sum(nij(:).*(nij(:)-1)/2);sum_ai_choose_2 = sum(ai.*(ai-1)/2);sum_bj_choose_2 = sum(bj.*(bj-1)/2);expected_index = sum_ai_choose_2 * sum_bj_choose_2 / n_choose_2;max_index = 0.5 * (sum_ai_choose_2 + sum_bj_choose_2);adj_rand_index = (sum_nij_choose_2 - expected_index) / (max_index - expected_index);endfunction nmi = calculate_nmi(true_labels, pred_labels)% 计算归一化互信息% 将标签转换为数值[~, ~, true_numeric] = unique(true_labels);[~, ~, pred_numeric] = unique(pred_labels);% 计算互信息contingency_matrix = crosstab(true_numeric, pred_numeric) + eps;joint_prob = contingency_matrix / sum(contingency_matrix(:));marginal_true = sum(joint_prob, 2);marginal_pred = sum(joint_prob, 1);mutual_info = 0;for i = 1:size(joint_prob, 1)for j = 1:size(joint_prob, 2)mutual_info = mutual_info + joint_prob(i,j) * ...log(joint_prob(i,j) / (marginal_true(i) * marginal_pred(j)));endend% 计算熵entropy_true = -sum(marginal_true .* log(marginal_true));entropy_pred = -sum(marginal_pred .* log(marginal_pred));nmi = 2 * mutual_info / (entropy_true + entropy_pred);endfunction accuracy = calculate_clustering_accuracy(true_labels, pred_labels)% 计算聚类准确率% 找到最佳的标签映射unique_true = unique(true_labels);unique_pred = unique(pred_labels);% 创建混淆矩阵contingency_matrix = zeros(length(unique_true), length(unique_pred));for i = 1:length(unique_true)for j = 1:length(unique_pred)contingency_matrix(i, j) = sum(strcmp(true_labels, unique_true{i}) & ...strcmp(pred_labels, unique_pred{j}));endend% 使用匈牙利算法找到最佳匹配[assignment, ~] = munkres(-contingency_matrix);correct_count = 0;total_count = length(true_labels);for i = 1:length(assignment)if assignment(i) > 0true_idx = strcmp(true_labels, unique_true{i});pred_idx = strcmp(pred_labels, unique_pred{assignment(i)});correct_count = correct_count + sum(true_idx & pred_idx);endendaccuracy = correct_count / total_count;endfunction [within_dist, between_dist] = calculate_distance_metrics(features, labels, centroids)% 计算类内和类间距离k = size(centroids, 1);% 类内距离within_dist = 0;cluster_counts = zeros(1, k);for i = 1:kcluster_points = features(labels == i, :);if size(cluster_points, 1) > 0distances = sqrt(sum((cluster_points - centroids(i, :)).^2, 2));within_dist = within_dist + sum(distances);cluster_counts(i) = size(cluster_points, 1);endendwithin_dist = within_dist / sum(cluster_counts);% 类间距离between_dist = 0;count = 0;for i = 1:kfor j = i+1:kif cluster_counts(i) > 0 && cluster_counts(j) > 0between_dist = between_dist + norm(centroids(i, :) - centroids(j, :));count = count + 1;endendendbetween_dist = between_dist / max(count, 1);endfunction optimal_k = find_optimal_k(features, max_k, true_labels)% 寻找最优聚类数量fprintf('寻找最优聚类数量 (k = 2 到 %d)...\n', max_k);k_values = 2:max_k;num_metrics = 5;metrics = zeros(length(k_values), num_metrics);figure('Position', [100, 100, 1200, 800]);for i = 1:length(k_values)k = k_values(i);fprintf('测试 k = %d...\n', k);% 执行聚类[cluster_labels, centroids, ~] = kmeans(features, k, ...'Replicates', 10, 'Display', 'off');% 计算质量指标quality = KMeansClustering.evaluate_clustering_quality(...features, cluster_labels, centroids, true_labels);metrics(i, 1) = quality.silhouette_mean; % 越大越好metrics(i, 2) = quality.db_index; % 越小越好metrics(i, 3) = quality.ch_index; % 越大越好if ~isempty(true_labels)metrics(i, 4) = quality.adj_rand_index; % 越大越好metrics(i, 5) = quality.clustering_accuracy; % 越大越好elsemetrics(i, 4:5) = NaN;endend% 归一化指标 (用于综合评分)normalized_metrics = zeros(size(metrics));for j = 1:num_metricsif j == 2 % DB指数越小越好if all(metrics(:, j) > 0)normalized_metrics(:, j) = 1 - (metrics(:, j) - min(metrics(:, j))) / ...(max(metrics(:, j)) - min(metrics(:, j)));endelse % 其他指标越大越好if all(~isnan(metrics(:, j)))normalized_metrics(:, j) = (metrics(:, j) - min(metrics(:, j))) / ...(max(metrics(:, j)) - min(metrics(:, j)));endendend% 计算综合评分 (忽略NaN值)composite_scores = mean(normalized_metrics, 2, 'omitnan');% 找到最优k[~, optimal_idx] = max(composite_scores);optimal_k = k_values(optimal_idx);fprintf('推荐最优聚类数量: k = %d\n', optimal_k);% 绘制指标曲线subplot(2, 3, 1);plot(k_values, metrics(:, 1), 'bo-', 'LineWidth', 2, 'MarkerSize', 8);hold on;plot(optimal_k, metrics(optimal_idx, 1), 'ro', 'MarkerSize', 10, 'MarkerFaceColor', 'red');xlabel('聚类数量 k');ylabel('轮廓系数');title('轮廓系数 vs k');grid on;legend('轮廓系数', '最优k', 'Location', 'best');subplot(2, 3, 2);plot(k_values, metrics(:, 2), 'rs-', 'LineWidth', 2, 'MarkerSize', 8);hold on;plot(optimal_k, metrics(optimal_idx, 2), 'ro', 'MarkerSize', 10, 'MarkerFaceColor', 'red');xlabel('聚类数量 k');ylabel('DB指数');title('DB指数 vs k');grid on;legend('DB指数', '最优k', 'Location', 'best');subplot(2, 3, 3);plot(k_values, metrics(:, 3), 'g^-', 'LineWidth', 2, 'MarkerSize', 8);hold on;plot(optimal_k, metrics(optimal_idx, 3), 'ro', 'MarkerSize', 10, 'MarkerFaceColor', 'red');xlabel('聚类数量 k');ylabel('CH指数');title('CH指数 vs k');grid on;legend('CH指数', '最优k', 'Location', 'best');if ~isempty(true_labels)subplot(2, 3, 4);plot(k_values, metrics(:, 4), 'md-', 'LineWidth', 2, 'MarkerSize', 8);hold on;plot(optimal_k, metrics(optimal_idx, 4), 'ro', 'MarkerSize', 10, 'MarkerFaceColor', 'red');xlabel('聚类数量 k');ylabel('调整兰德指数');title('调整兰德指数 vs k');grid on;legend('ARI', '最优k', 'Location', 'best');subplot(2, 3, 5);plot(k_values, metrics(:, 5), 'ch-', 'LineWidth', 2, 'MarkerSize', 8);hold on;plot(optimal_k, metrics(optimal_idx, 5), 'ro', 'MarkerSize', 10, 'MarkerFaceColor', 'red');xlabel('聚类数量 k');ylabel('聚类准确率');title('聚类准确率 vs k');grid on;legend('准确率', '最优k', 'Location', 'best');endsubplot(2, 3, 6);plot(k_values, composite_scores, 'ko-', 'LineWidth', 2, 'MarkerSize', 8);hold on;plot(optimal_k, composite_scores(optimal_idx), 'ro', 'MarkerSize', 10, 'MarkerFaceColor', 'red');xlabel('聚类数量 k');ylabel('综合评分');title('综合评分 vs k');grid on;legend('综合评分', '最优k', 'Location', 'best');sgtitle('最优聚类数量选择分析');endend
end
4. 完整的主程序与结果分析
function main_bearing_fault_clustering()% 轴承故障聚类分析主程序fprintf('=== 基于PCA白化和K均值的轴承故障聚类分析 ===\n\n');% 1. 生成轴承故障数据fprintf('步骤1: 生成轴承故障数据...\n');fs = 10000; % 采样频率 10kHzduration = 1; % 信号持续时间 1秒num_samples_per_class = 50; % 每类样本数bearing_data = BearingFaultData(fs, duration);[signals, true_labels] = bearing_data.generate_fault_data(num_samples_per_class);% 可视化样本信号BearingFaultData.plot_sample_signals(signals, true_labels, fs, duration);% 2. 特征提取fprintf('\n步骤2: 特征提取...\n');features = BearingFaultData.extract_features(signals, fs);% 3. PCA白化处理fprintf('\n步骤3: PCA白化处理...\n');[features_whitened, pca_model] = PCAWhitening.apply_pca_whitening(...features, 'variance_retained', 0.95);% 可视化PCA白化结果PCAWhitening.visualize_pca_results(features, features_whitened, true_labels, pca_model);% 分析白化效果PCAWhitening.analyze_whitening_effect(features, features_whitened);% 4. 寻找最优聚类数量fprintf('\n步骤4: 寻找最优聚类数量...\n');optimal_k = KMeansClustering.find_optimal_k(features_whitened, 8, true_labels);% 5. K均值聚类分析fprintf('\n步骤5: K均值聚类分析...\n');[cluster_labels, centroids, clustering_model] = ...KMeansClustering.perform_clustering(features_whitened, true_labels, 'k', optimal_k);% 6. 结果可视化与分析fprintf('\n步骤6: 结果可视化与分析...\n');visualize_clustering_results(features_whitened, true_labels, cluster_labels, ...centroids, clustering_model, pca_model);% 7. 性能比较:原始特征 vs 白化特征fprintf('\n步骤7: 性能比较分析...\n');compare_clustering_performance(features, features_whitened, true_labels, optimal_k);% 8. 故障诊断报告fprintf('\n步骤8: 生成故障诊断报告...\n');generate_diagnosis_report(true_labels, cluster_labels, clustering_model);fprintf('\n=== 轴承故障聚类分析完成 ===\n');
endfunction visualize_clustering_results(features, true_labels, cluster_labels, ...centroids, clustering_model, pca_model)% 可视化聚类结果figure('Position', [100, 100, 1600, 1200]);% 获取唯一标签unique_true_labels = unique(true_labels);unique_cluster_labels = unique(cluster_labels);% 子图1: 真实标签分布 (前两个主成分)subplot(2, 3, 1);gscatter(features(:, 1), features(:, 2), true_labels, [], 'os^d', 10);title('真实标签分布 (白化特征)');xlabel('主成分 1');ylabel('主成分 2');grid on;% 子图2: 聚类结果分布subplot(2, 3, 2);gscatter(features(:, 1), features(:, 2), cluster_labels, [], 'os^d', 10);hold on;plot(centroids(:, 1), centroids(:, 2), 'kx', 'MarkerSize', 15, ...'LineWidth', 3, 'DisplayName', '聚类中心');title('K均值聚类结果');xlabel('主成分 1');ylabel('主成分 2');grid on;legend('Location', 'best');% 子图3: 混淆矩阵subplot(2, 3, 3);plot_confusion_matrix(true_labels, cluster_labels, unique_true_labels);title('聚类混淆矩阵');% 子图4: 轮廓系数图subplot(2, 3, 4);silhouette(features, cluster_labels);title('轮廓系数分析');xlabel('轮廓系数值');ylabel('聚类标签');% 子图5: 聚类质量指标雷达图subplot(2, 3, 5);plot_clustering_quality_radar(clustering_model.quality);title('聚类质量指标雷达图');% 子图6: 特征重要性分析subplot(2, 3, 6);plot_feature_importance(pca_model);title('PCA特征重要性');sgtitle('轴承故障聚类分析结果');
endfunction plot_confusion_matrix(true_labels, pred_labels, class_names)% 绘制混淆矩阵% 创建数值标签[~, ~, true_numeric] = unique(true_labels);[~, ~, pred_numeric] = unique(pred_labels);% 计算混淆矩阵cm = confusionmat(true_numeric, pred_numeric);% 归一化cm_normalized = cm ./ sum(cm, 2);imagesc(cm_normalized);colorbar;% 添加标签set(gca, 'XTick', 1:length(class_names), 'XTickLabel', class_names);set(gca, 'YTick', 1:length(class_names), 'YTickLabel', class_names);xlabel('预测标签');ylabel('真实标签');% 添加数值[num_classes, ~] = size(cm);for i = 1:num_classesfor j = 1:num_classestext(j, i, sprintf('%.2f\n(%d)', cm_normalized(i,j), cm(i,j)), ...'HorizontalAlignment', 'center', 'VerticalAlignment', 'middle', ...'Color', 'white', 'FontWeight', 'bold');endend
endfunction plot_clustering_quality_radar(quality)% 绘制聚类质量指标雷达图metrics = {'轮廓系数', 'DB指数', 'CH指数', '调整兰德', '聚类准确率'};values = [quality.silhouette_mean, 1/quality.db_index, ... % DB指数取倒数quality.ch_index/1000, quality.adj_rand_index, quality.clustering_accuracy];% 归一化到[0,1]范围normalized_values = values / max(values);% 雷达图angles = linspace(0, 2*pi, length(metrics)+1);angles = angles(1:end-1);polarplot([angles, angles(1)], [normalized_values, normalized_values(1)], 'ro-', 'LineWidth', 2);thetaticks(rad2deg(angles));thetaticklabels(metrics);rlim([0, 1]);title('聚类质量指标');
endfunction plot_feature_importance(pca_model)% 绘制特征重要性 (基于PCA)explained_variance = pca_model.explained_variance;cumulative_variance = cumsum(explained_variance);bar(explained_variance(1:10), 'b', 'FaceAlpha', 0.7);hold on;plot(cumulative_variance(1:10), 'ro-', 'LineWidth', 2, 'MarkerSize', 6);xlabel('主成分序号');ylabel('方差解释率');title('前10个主成分的方差解释率');legend('单个主成分', '累积解释率', 'Location', 'southeast');grid on;
endfunction compare_clustering_performance(features_original, features_whitened, true_labels, k)% 比较原始特征和白化特征的聚类性能fprintf('比较原始特征与白化特征的聚类性能...\n');% 原始特征聚类[labels_original, centroids_original, model_original] = ...KMeansClustering.perform_clustering(features_original, true_labels, 'k', k);% 白化特征聚类[labels_whitened, centroids_whitened, model_whitened] = ...KMeansClustering.perform_clustering(features_whitened, true_labels, 'k', k);% 性能比较可视化figure('Position', [100, 100, 1200, 800]);metrics = {'轮廓系数', 'DB指数', 'CH指数', '调整兰德指数', '聚类准确率'};values_original = [model_original.quality.silhouette_mean,1/model_original.quality.db_index, % DB指数取倒数便于比较model_original.quality.ch_index/1000,model_original.quality.adj_rand_index,model_original.quality.clustering_accuracy];values_whitened = [model_whitened.quality.silhouette_mean,1/model_whitened.quality.db_index,model_whitened.quality.ch_index/1000,model_whitened.quality.adj_rand_index,model_whitened.quality.clustering_accuracy];% 条形图比较subplot(2, 2, 1);bar_data = [values_original; values_whitened]';bar(bar_data);set(gca, 'XTickLabel', metrics);ylabel('指标值');title('聚类性能指标比较');legend('原始特征', 'PCA白化特征', 'Location', 'best');grid on;rotateXLabels(gca, 45);% 性能提升百分比subplot(2, 2, 2);improvement = (values_whitened - values_original) ./ abs(values_original) * 100;bar(improvement, 'FaceColor', [0.2, 0.6, 0.2], 'FaceAlpha', 0.7);set(gca, 'XTickLabel', metrics);ylabel('性能提升 (%)');title('PCA白化带来的性能提升');grid on;rotateXLabels(gca, 45);% 聚类结果可视化比较subplot(2, 2, 3);[~, scores_original] = pca(features_original);gscatter(scores_original(:, 1), scores_original(:, 2), labels_original);title('原始特征聚类结果');xlabel('主成分 1');ylabel('主成分 2');grid on;subplot(2, 2, 4);gscatter(features_whitened(:, 1), features_whitened(:, 2), labels_whitened);title('白化特征聚类结果');xlabel('主成分 1');ylabel('主成分 2');grid on;sgtitle('原始特征 vs PCA白化特征聚类性能比较');% 输出比较结果fprintf('\n=== 性能比较结果 ===\n');fprintf('指标\t\t\t原始特征\t白化特征\t提升\n');fprintf('轮廓系数\t\t%.4f\t\t%.4f\t\t+%.1f%%\n', ...model_original.quality.silhouette_mean, model_whitened.quality.silhouette_mean, ...improvement(1));fprintf('DB指数\t\t\t%.4f\t\t%.4f\t\t+%.1f%%\n', ...model_original.quality.db_index, model_whitened.quality.db_index, ...improvement(2));fprintf('CH指数\t\t\t%.1f\t\t%.1f\t\t+%.1f%%\n', ...model_original.quality.ch_index, model_whitened.quality.ch_index, ...improvement(3));fprintf('调整兰德指数\t\t%.4f\t\t%.4f\t\t+%.1f%%\n', ...model_original.quality.adj_rand_index, model_whitened.quality.adj_rand_index, ...improvement(4));fprintf('聚类准确率\t\t%.4f\t\t%.4f\t\t+%.1f%%\n', ...model_original.quality.clustering_accuracy, model_whitened.quality.clustering_accuracy, ...improvement(5));
endfunction generate_diagnosis_report(true_labels, cluster_labels, clustering_model)% 生成故障诊断报告fprintf('\n=== 轴承故障诊断报告 ===\n\n');% 基本统计unique_true = unique(true_labels);unique_cluster = unique(cluster_labels);fprintf('诊断统计:\n');fprintf(' 总样本数: %d\n', length(true_labels));fprintf(' 真实故障类型: %d 种\n', length(unique_true));fprintf(' 识别出的聚类: %d 个\n', length(unique_cluster));fprintf(' 聚类轮廓系数: %.4f\n', clustering_model.quality.silhouette_mean);fprintf(' 聚类准确率: %.2f%%\n', clustering_model.quality.clustering_accuracy * 100);% 聚类与真实标签的对应关系fprintf('\n聚类-故障类型对应关系:\n');contingency = crosstab(true_labels, cluster_labels);for i = 1:size(contingency, 1)[~, dominant_cluster] = max(contingency(i, :));fprintf(' %s -> 聚类 %d (%.1f%%)\n', ...unique_true{i}, dominant_cluster, ...contingency(i, dominant_cluster) / sum(contingency(i, :)) * 100);end% 诊断建议fprintf('\n诊断建议:\n');if clustering_model.quality.silhouette_mean > 0.7fprintf(' ✅ 聚类质量优秀,故障分离明显\n');elseif clustering_model.quality.silhouette_mean > 0.5fprintf(' ⚠️ 聚类质量良好,部分故障有重叠\n');elsefprintf(' ❗ 聚类质量较差,建议检查特征提取或调整参数\n');endif clustering_model.quality.clustering_accuracy > 0.9fprintf(' ✅ 故障识别准确率高,诊断可靠\n');elseif clustering_model.quality.clustering_accuracy > 0.7fprintf(' ⚠️ 故障识别准确率中等,建议增加样本\n');elsefprintf(' ❗ 故障识别准确率低,需要优化方法\n');end% 保存报告timestamp = datestr(now, 'yyyymmdd_HHMMSS');report_filename = sprintf('bearing_diagnosis_report_%s.txt', timestamp);fid = fopen(report_filename, 'w');fprintf(fid, '轴承故障诊断报告\n');fprintf(fid, '生成时间: %s\n\n', datestr(now));fprintf(fid, '诊断统计:\n');fprintf(fid, ' 总样本数: %d\n', length(true_labels));fprintf(fid, ' 聚类轮廓系数: %.4f\n', clustering_model.quality.silhouette_mean);fprintf(fid, ' 聚类准确率: %.2f%%\n', clustering_model.quality.clustering_accuracy * 100);fclose(fid);fprintf('\n诊断报告已保存: %s\n', report_filename);
end% 运行主程序
main_bearing_fault_clustering();
参考代码 采用PCA白化和K均值对轴承故障进行聚类分析 www.youwenfan.com/contentcnl/80087.html
总结
特点:
- 真实数据模拟:基于轴承故障机理生成仿真振动信号
- 全面特征提取:18个时域和频域统计特征
- PCA白化处理:降维、去相关、特征增强
- 智能聚类分析:自动确定最优聚类数量
- 多维度评估:内部和外部聚类质量指标
技术优势:
- 物理真实性:基于真实的轴承故障频率和冲击特征
- 数学严谨性:完整的PCA白化理论和K均值算法
- 工程实用性:可直接应用于实际轴承故障诊断
- 可视化分析:丰富的图形化结果展示
应用价值:
- 为旋转机械故障诊断提供智能解决方案
- 支持无监督的故障模式识别
- 可用于状态监测和预测性维护
- 为工业4.0和智能制造提供技术支持