|
@@ -1,34 +0,0 @@
|
|
-import pandas as pd
|
|
|
|
-import numpy as np
|
|
|
|
-from sklearn.linear_model import LogisticRegression
|
|
|
|
-from sklearn.model_selection import train_test_split
|
|
|
|
-
|
|
|
|
-# 加载数据
|
|
|
|
-url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
|
|
|
|
-names = ['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',
|
|
|
|
- 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean',
|
|
|
|
- 'symmetry_mean', 'fractal_dimension_mean', 'radius_se', 'texture_se', 'perimeter_se',
|
|
|
|
- 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave points_se',
|
|
|
|
- 'symmetry_se', 'fractal_dimension_se', 'radius_worst', 'texture_worst',
|
|
|
|
- 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst',
|
|
|
|
- 'concavity_worst', 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst']
|
|
|
|
-data = pd.read_csv(url, names=names)
|
|
|
|
-
|
|
|
|
-# 前10个特征作为基因和基因变异
|
|
|
|
-X = data.iloc[:, 2:12]
|
|
|
|
-# 诊断结果作为标签
|
|
|
|
-y = data['diagnosis'].map({'M': 1, 'B': 0})
|
|
|
|
-
|
|
|
|
-# 将数据集分为训练集和测试集
|
|
|
|
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
|
|
|
|
-
|
|
|
|
-# 训练逻辑回归模型
|
|
|
|
-lr = LogisticRegression()
|
|
|
|
-lr.fit(X_train, y_train)
|
|
|
|
-
|
|
|
|
-# 预测测试集结果
|
|
|
|
-y_pred = lr.predict(X_test)
|
|
|
|
-
|
|
|
|
-# 计算准确率
|
|
|
|
-accuracy = np.mean(y_pred == y_test)
|
|
|
|
-print("Accuracy: ", accuracy)
|
|
|