import pandas as pd import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split # 加载数据 url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data" names = ['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean', 'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se', 'fractal_dimension_se', 'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst'] data = pd.read_csv(url, names=names) # 前10个特征作为基因和基因变异 X = data.iloc[:, 2:12] # 诊断结果作为标签 y = data['diagnosis'].map({'M': 1, 'B': 0}) # 将数据集分为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # 训练逻辑回归模型 lr = LogisticRegression() lr.fit(X_train, y_train) # 预测测试集结果 y_pred = lr.predict(X_test) # 计算准确率 accuracy = np.mean(y_pred == y_test) print("Accuracy: ", accuracy)