# Function to load and preprocess the data defget_data(): # Load the hyperspectral dataset dat = loadmat('./高光谱数据集/KSC.mat')['KSC'] lab = loadmat('./高光谱数据集/KSC_gt.mat')['KSC_gt']
# Reshape the data and labels dat = dat.reshape((-1, 176)) lab = lab.reshape((-1)) print(dat.shape) # Print the shape of the data data, label = [], [] # Filter out data with non-zero labels for i inrange(dat.shape[0]): if lab[i] != 0: # Correct condition: check if the label is not 0 data.append(dat[i, :]) label.append(lab[i]) # Convert lists to numpy arrays data = np.array(data) label = np.array(label) return data, label
from sklearn.preprocessing import StandardScaler from sklearn.feature_selection import VarianceThreshold import numpy as np
# Function to process the data defprocess(data): # Standardize the data data = StandardScaler().fit_transform(data) print('shape={}'.format(data.shape))
# Apply VarianceThreshold (default threshold is 0) selector = VarianceThreshold() # Instantiate, default threshold 0 data = selector.fit_transform(data) print(data.shape)
# Calculate the median variance and apply VarianceThreshold with it median_num = np.median(np.var(data, axis=0)) # Calculate the median variance data = VarianceThreshold(median_num).fit_transform(data) print(data.shape)
import pandas as pd from sklearn.feature_selection import SelectKBest, f_classif
# Function to select k best features defselect_k(data, label, k): # Apply SelectKBest with f_classif scoring results = SelectKBest(f_classif, k=k).fit(data, label) print(results) # Create a DataFrame to store feature scores, p-values, and selection status features = pd.DataFrame({ "score": results.scores_, "pvalue": results.pvalues_, "select": results.get_support() }) # Sort the features by score in descending order features = features.sort_values("score", ascending=False) print(features) # Get the indices of the selected features index = results.get_support(indices=True) print(index) return index
from sklearn.feature_selection import RFE from sklearn.linear_model import LogisticRegression
# Function to perform Recursive Feature Elimination (RFE) defrfe(data, label, n): # Initialize RFE with LogisticRegression as the estimator and select 'n' features results = RFE(estimator=LogisticRegression(), n_features_to_select=n) print(results) # Fit RFE to the data and labels results.fit(data, label) # Get the indices of the selected features index = results.get_support(indices=True) print(index) return index
import numpy as np from sklearn.model_selection import train_test_split
# Function to select specific features based on the provided indices and split the data defselect_index_data(index, data, label): # Initialize an empty list to store selected features data_after = [] # Append selected features (columns) from the data based on the indices for i in index: data_after.append(data[:, i]) # Convert the list of selected features into a NumPy array and transpose it data_after = np.array(data_after).transpose() print(data_after.shape) # Print the shape of the selected data print(label.shape) # Print the shape of the labels # Split the selected data and labels into training and test sets return train_test_split(data_after, label, test_size=0.3, random_state=1)
from sklearn.svm import SVC from sklearn import metrics
# Function to measure the performance of the SVM classifier defmeasure_feature(train_data, test_data, train_label, test_label, gamma, c): # Initialize the SVM classifier with a polynomial kernel clf = SVC(kernel='poly', gamma=gamma, C=c) # Train the classifier clf.fit(train_data, train_label) # Predict the labels for the test data predict = clf.predict(test_data) # Get model parameters (if needed) clf.get_params(deep=True) # Calculate performance metrics acc = metrics.accuracy_score(test_label, predict) f1 = metrics.f1_score(test_label, predict, average='micro') recall = metrics.recall_score(test_label, predict, average='micro') precision = metrics.precision_score(test_label, predict, average='micro') return acc, f1, recall, precision
if __name__ == '__main__': # Load and preprocess the data data, label = get_data() data = process(data) # Select features using SelectKBest or RFE # index = rfe(data, label, n=30) # Uncomment if using RFE index = select_k(data, label, k=50) # Using SelectKBest with k=50 # Select train and test data based on selected features train_data, test_data, train_label, test_label = select_index_data(index, data, label) # Print shapes of train and test sets print(train_data.shape, test_data.shape, train_label.shape, test_label.shape) # Define hyperparameters for the SVM classifier gamma, c = 0.125, 60 # Measure performance on the training data train_acc, train_f1, train_recall, train_precision = measure_feature( train_data, train_data, train_label, train_label, gamma, c ) # Measure performance on the test data test_acc, test_f1, test_recall, test_precision = measure_feature( train_data, test_data, train_label, test_label, gamma, c ) # Print training and test accuracy print(f"Training Accuracy: {train_acc}, Test Accuracy: {test_acc}") # Print other performance metrics print(f"Training F1 Score: {train_f1}, Test F1 Score: {test_f1}") print(f"Training Recall: {train_recall}, Test Recall: {test_recall}") print(f"Training Precision: {train_precision}, Test Precision: {test_precision}")
import pandas as pd import numpy as np from scipy.io import loadmat import sklearn from sklearn import preprocessing from sklearn.ensemble import RandomForestClassifier as RFC from sklearn.feature_selection import SelectFromModel from sklearn.feature_selection import VarianceThreshold from sklearn.feature_selection import SelectKBest, f_classif from sklearn.linear_model import LogisticRegression from sklearn.feature_selection import RFE from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix from sklearn import metrics
defget_data(): dat = loadmat('./高光谱数据集/KSC.mat')['KSC'] lab = loadmat('./高光谱数据集/KSC_gt.mat')['KSC_gt']
dat = dat.reshape(-1, 176) lab = lab.reshape(-1) print(dat.shape)
data, label = [], [] for i inrange(dat.shape[0]): if lab[i].all() != 0: data.append(dat[i, :]) label.append(lab[i])
data = np.array(data) label = np.array(label) return data, label
defprocess(data): data = preprocessing.StandardScaler().fit_transform(data) print('shape={}'.format(data.shape)) selector = VarianceThreshold() # 实例化,不填参数默认方差为0 data = selector.fit_transform(data) print(data.shape) median_num = np.median(data) data = VarianceThreshold(median_num).fit_transform(data) print(data.shape) return data