main.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.linear_model import LogisticRegression
  4. from sklearn.model_selection import train_test_split
  5. # 加载数据
  6. url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
  7. names = ['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',
  8. 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean',
  9. 'symmetry_mean', 'fractal_dimension_mean', 'radius_se', 'texture_se', 'perimeter_se',
  10. 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave points_se',
  11. 'symmetry_se', 'fractal_dimension_se', 'radius_worst', 'texture_worst',
  12. 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst',
  13. 'concavity_worst', 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst']
  14. data = pd.read_csv(url, names=names)
  15. # 前10个特征作为基因和基因变异
  16. X = data.iloc[:, 2:12]
  17. # 诊断结果作为标签
  18. y = data['diagnosis'].map({'M': 1, 'B': 0})
  19. # 将数据集分为训练集和测试集
  20. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
  21. # 训练逻辑回归模型
  22. lr = LogisticRegression()
  23. lr.fit(X_train, y_train)
  24. # 预测测试集结果
  25. y_pred = lr.predict(X_test)
  26. # 计算准确率
  27. accuracy = np.mean(y_pred == y_test)
  28. print("Accuracy: ", accuracy)