123456789101112131415161718192021222324252627 |
- import pandas as pd
- from sklearn.model_selection import train_test_split
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.metrics import accuracy_score
- import joblib
- # 1. 准备数据
- data = pd.read_csv('snp_data.csv')
- X = data.iloc[:, :-1]
- y = data.iloc[:, -1]
- # 2. 划分训练集和测试集
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
- # 3. 选择算法
- rfc = RandomForestClassifier(n_estimators=100, random_state=42)
- # 4. 训练模型
- rfc.fit(X_train, y_train)
- # 5. 评估模型
- y_pred = rfc.predict(X_test)
- accuracy = accuracy_score(y_test, y_pred)
- print(f"Accuracy: {accuracy:.2f}")
- # 6. 模型保存
- joblib.dump(rfc, 'snp_model.pkl')
|