NSGA2算法特征选择MATLAB实现（多目标）

利用nsga2进行进行特征选择其主要思想是：将子集的选择看作是一个搜索寻优问题（wrapper方法），生成不同的组合，对组合进行评价，再与其他的组合进行比较。这样就将子集的选择看作是一个是一个优化问题。

需要优化的两个目标为特征数和精度。

nsga2是一个多目标优化算法。

具体的nsga2通用算法请看：https://omegaxyz.com/2018/01/22/new_nsga2/

具体的特征选择代码在上述代码的基础上改了两个①主函数②评价函数，增加了一个数据分成训练集和测试集的函数：

function divide_datasets()
load Parkinson.mat;
dataMat=Parkinson_f;
len=size(dataMat,1);
%归一化
maxV = max(dataMat);
minV = min(dataMat);
range = maxV-minV;
newdataMat = (dataMat-repmat(minV,[len,1]))./(repmat(range,[len,1]));

Indices   =  crossvalind('Kfold', length(Parkinson_label), 10);
site = find(Indices==1|Indices==2|Indices==3);
train_F = newdataMat(site,:);
train_L = Parkinson_label(site);
site2 = find(Indices~=1&Indices~=2&Indices~=3);
test_F = newdataMat(site2,:);
test_L =Parkinson_label(site2);
save train_F train_F;
save train_L train_L;
save test_F test_F;
save test_L test_L;
end
%what doesn't kill you makes you stronger, stand a little taller,doesn't mean i'm over cause you're gonw.

function divide_datasets()

load Parkinson.mat;

dataMat=Parkinson_f;

len=size(dataMat,1);

%归一化

maxV = max(dataMat);

minV = min(dataMat);

range = maxV-minV;

newdataMat = (dataMat-repmat(minV,[len,1]))./(repmat(range,[len,1]));

Indices = crossvalind('Kfold', length(Parkinson_label), 10);

site = find(Indices==1|Indices==2|Indices==3);

train_F = newdataMat(site,:);

train_L = Parkinson_label(site);

site2 = find(Indices~=1&Indices~=2&Indices~=3);

test_F = newdataMat(site2,:);

test_L =Parkinson_label(site2);

save train_F train_F;

save train_L train_L;

save test_F test_F;

save test_L test_L;

end

%what doesn't kill you makes you stronger, stand a little taller,doesn't mean i'm over cause you're gonw.

MATLAB代码主函数:

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%此处可以更改
%更多机器学习内容请访问omegaxyz.com
clc;
clear;
pop = 500; %种群数量
gen = 100; %迭代次数
M = 2; %目标数量
V = 22; %维度
min_range = zeros(1, V); %下界
max_range = ones(1,V); %上界
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%特征选择
divide_datasets();
global answer
answer=cell(M,3);
global choice     %选出的特征个数
choice=0.8;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
chromosome = initialize_variables(pop, M, V, min_range, max_range);
chromosome = non_domination_sort_mod(chromosome, M, V);

for i = 1 : gen
    pool = round(pop/2);
    tour = 2;
    parent_chromosome = tournament_selection(chromosome, pool, tour);
    mu = 20;
    mum = 20;
    offspring_chromosome = genetic_operator(parent_chromosome,M, V, mu, mum, min_range, max_range);
    [main_pop,~] = size(chromosome);
    [offspring_pop,~] = size(offspring_chromosome);
    clear temp
    intermediate_chromosome(1:main_pop,:) = chromosome;
    intermediate_chromosome(main_pop + 1 : main_pop + offspring_pop,1 : M+V) = offspring_chromosome;
    intermediate_chromosome = non_domination_sort_mod(intermediate_chromosome, M, V);
    chromosome = replace_chromosome(intermediate_chromosome, M, V, pop);
    if ~mod(i,100)
        clc;
        fprintf('%d generations completed\n',i);
    end
end

if M == 2
    plot(chromosome(:,V + 1),chromosome(:,V + 2),'*');
    xlabel('f_1'); ylabel('f_2');
    title('Pareto Optimal Front');
elseif M == 3
    plot3(chromosome(:,V + 1),chromosome(:,V + 2),chromosome(:,V + 3),'*');
    xlabel('f_1'); ylabel('f_2'); zlabel('f_3');
    title('Pareto Optimal Surface');
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%此处可以更改

%更多机器学习内容请访问omegaxyz.com

clc;

clear;

pop = 500; %种群数量

gen = 100; %迭代次数

M = 2; %目标数量

V = 22; %维度

min_range = zeros(1, V); %下界

max_range = ones(1,V); %上界

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%特征选择

divide_datasets();

global answer

answer=cell(M,3);

global choice %选出的特征个数

choice=0.8;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

chromosome = initialize_variables(pop, M, V, min_range, max_range);

chromosome = non_domination_sort_mod(chromosome, M, V);

for i = 1 : gen

pool = round(pop/2);

tour = 2;

parent_chromosome = tournament_selection(chromosome, pool, tour);

mu = 20;

mum = 20;

offspring_chromosome = genetic_operator(parent_chromosome,M, V, mu, mum, min_range, max_range);

[main_pop,~] = size(chromosome);

[offspring_pop,~] = size(offspring_chromosome);

clear temp

intermediate_chromosome(1:main_pop,:) = chromosome;

intermediate_chromosome(main_pop + 1 : main_pop + offspring_pop,1 : M+V) = offspring_chromosome;

intermediate_chromosome = non_domination_sort_mod(intermediate_chromosome, M, V);

chromosome = replace_chromosome(intermediate_chromosome, M, V, pop);

if ~mod(i,100)

clc;

fprintf('%d generations completed\n',i);

end

if M == 2

plot(chromosome(:,V + 1),chromosome(:,V + 2),'*');

xlabel('f_1'); ylabel('f_2');

title('Pareto Optimal Front');

elseif M == 3

plot3(chromosome(:,V + 1),chromosome(:,V + 2),chromosome(:,V + 3),'*');

xlabel('f_1'); ylabel('f_2'); zlabel('f_3');

title('Pareto Optimal Surface');

end

评价函数（利用林志仁SVM进行训练）：

function f = evaluate_objective(x, M, V, i)
f = [];
global answer
global choice
load train_F.mat;
load train_L.mat;
load test_F.mat;
load test_L.mat;
temp_x = x(1:V);
inmodel = temp_x>choice;%%%%%设定恰当的阈值选择特征
f(1) = sum(inmodel(1,:));
answer(i,1)={f(1)};
model = libsvmtrain(train_L,train_F(:,inmodel), '-s 0 -t 2 -c 1.2 -g 2.8');
[predict_label, ~, ~] = libsvmpredict(test_L,test_F(:,inmodel),model,'-q'); 
error=0;
for j=1:length(test_L)
    if(predict_label(j,1) ~= test_L(j,1))
        error = error+1;
    end
end
error = error/length(test_L);
f(2) = error;
answer(i,2)={error};
answer(i,3)={inmodel};
end

function f = evaluate_objective(x, M, V, i)

f = [];

global answer

global choice

load train_F.mat;

load train_L.mat;

load test_F.mat;

load test_L.mat;

temp_x = x(1:V);

inmodel = temp_x>choice;%%%%%设定恰当的阈值选择特征

f(1) = sum(inmodel(1,:));

answer(i,1)={f(1)};

model = libsvmtrain(train_L,train_F(:,inmodel), '-s 0 -t 2 -c 1.2 -g 2.8');

[predict_label, ~, ~] = libsvmpredict(test_L,test_F(:,inmodel),model,'-q');

error=0;

for j=1:length(test_L)

if(predict_label(j,1) ~= test_L(j,1))

error = error+1;

end

error = error/length(test_L);

f(2) = error;

answer(i,2)={error};

answer(i,3)={inmodel};

end

选的的数据集请从UCI上下载。

结果：

①pareto面

最后粒子的数据（选出的特征数和精确度）

PSO单目标特征选择请见：https://omegaxyz.com/2018/01/21/psofs/

21 评论

wang

2023-10-26 / 16:10 回复

求.mat文件
Star

2022-04-21 / 21:18 回复

特征项为0的时候，错误率不应该是100%吗，为什么是25%
- xyjisaw
  
  2022-04-22 / 15:17 回复
  
  瞎猜有时候也能蒙对
Ban

2021-03-12 / 18:47 回复

运行主函数时出现这个情况：
输入参数的数目不足。

出错 evaluate_objective (line 12)
answer(i,1)={f(1)};

出错 initialize_variables (line 9)
f(i,V + 1:K) = evaluate_objective(f(i,:), M, V);

出错 nsga_2_optimization (line 18)
chromosome = initialize_variables(pop, M, V, min_range, max_range);
请问怎么解决，刚刚开始接触请大佬帮我解答一下！
- Alphen.
  
  2021-11-19 / 16:32 回复
  
  我也遇到了同样的问题，希望大佬能帮忙解答一下，十分感谢
  - wang
    
    2023-10-26 / 18:05 回复
    
    请问你解决了吗
James

2021-01-14 / 19:03 回复

请问添加了数据集后，运行出现这个错误：
未定义函数或变量 ‘Parkinson_f’。
出错 divide_datasets (line 3)
dataMat=Parkinson_f;

Parkinson_f没有定义，怎么修改
- wang
  
  2023-10-26 / 16:09 回复
  
  同问，你解决了吗？
杨

2020-09-24 / 15:43 回复

请问initialize_variables函数没有更改吗？如果没有更改，f会出现问题。
- xyjisaw
  
  2020-09-24 / 23:26 回复
  
  可以试试随机初始化
李路

2020-07-03 / 12:09 回复

大神，answer=cell(M,3);为啥要设为两行三列呢，3是什么含义呢，temp_x = x(1:V);这句话代表啥意思呢，f(1) = sum(inmodel(1,:));这个是特征的目标函数吗，为啥要求和呢，希望大神给予指导，我看好久了，实在不知道这几句为啥是这样的，还有就是把svmtrain和svmpredict换为其他网络时，如BP神经网络，子文件BPtrain的第一句应该怎么写呢，怎么调用都说输入参数不足。非常感谢大神能给予指导
- xyjisaw
  
  2020-07-03 / 17:50 回复
  
  你好，这个是多目标特征选择问题，有两个目标，其中一个是特征数，因此有一个是要求特征总和，如果要换成其他网络，请查看输入的接口，输入进分类器也就是你的BP是种群或者个体的编码也就是X值。
diseir

2020-07-02 / 09:55 回复

大佬，上面给的程序选择特征的个数是随机的，如果想要人为给定特征选择的个数范围，比如说在2到5之间，但最终得到的pareto前沿图只出现了个数为2或3的点，并不能显示出所有的点（如个数为4，5的点就缺失了），您知道为什么么？
- xyjisaw
  
  2020-07-02 / 10:42 回复
  
  抱歉，我不太清楚。
无

2020-07-01 / 22:42 回复

麻烦大佬给一下这个例子的所有代码可以么？（虽说是只有主函数和评价函数做了大的修改，其他nsga通用算法也会多少有一些变化）好人一生平安！好人一生平安！好人一生平安！
匿名

2019-08-23 / 11:34 回复

请问可以给一下Parkinson.mat文件吗？用自己下载的mat文件各种出错，感激不尽
董佳豪

2019-03-11 / 20:24 回复

评价函数中function f = evaluate_objective(x, M, V, i)的是输入的什么啊？因为在上个NSGA2代码里面，评价函数是function f = evaluate_objective(x, M, V），这里没有i，所以我有点不明白，还望老师指导一下，谢谢！！
- xyjisaw
  
  2019-03-11 / 23:15 回复
  
  x是当前个体，M是目标数量，V是维度，i是个体的标号，有时i是我作为控制变量的，有的时候可以不用。
  - 匿名
    
    2019-03-13 / 09:12 回复
    
    明白了非常感谢
匿名

2018-05-03 / 16:56 回复

能麻烦大神给一下你的Parkinson.mat文件吗？用自己下载的mat文件各种出错，感激不尽

NSGA2算法特征选择MATLAB实现（多目标）

大模型AlpacaFarm分析

NLG文本评估任务或许并不需要真值或参考文本

大模型中的RepE表征工程

大模型也是一种优化器（LLM as Optimizer）

全栈开发与快速部署Demo

学术idea自动发现与生成

自回归语言模型（language model）Python实现

粉丝期待的三体电影宇宙（近四十部电影与电视剧集）

基于历史对比学习的时序知识图谱推理

泰拉瑞亚Terriaria快速部署Linux服务器

21 评论

留下评论取消回复

相关文章

21 评论

留下评论取消回复