预测正确的标签应该是

我有下面的代码正在运行,并且使用训练/测试数据能够返回93%的精度。数据集中的数据被标记为“ 1”或“ 2”,我想做的是为模型提供计数向量,并为其确定应该附加什么标记。

The dataset used can be found here: https://mega.nz/file/MdhmRI6A#WLfjK7e4moaCUb5ff4HchY8Xyavx-UpINKp27QH3N_g

from pandas import read_csv
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import os, json, tkinter, sys, csv
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from tkinter import filedialog
from tkinter import messagebox
from tkinter import *
from PIL import ImageTk, Image
import numpy as np
import matplotlib.pyplot as plt


filename ='Dataset/Dataset1.csv'
featureList = []
featureSet = pd.read_csv(filename)
for cols in featureSet.columns:
featureList.append(cols)
dataframe = read_csv(filename, names=featureList, skiprows=1)
array = dataframe.values
row_size= dataframe.shape[0]
row_size = row_size - 1
column_size= dataframe.shape[1]
column_size = column_size -1
X = array[:,0:column_size]
Y = array[0:, row_size]
test_size = 0.20
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,random_state=0)
clf = DecisionTreeClassifier()
clf = clf.fit(X_train,Y_train)
print("\n\n\n\n")
Y_pred = clf.predict(X_test)
print("Accuracy of test: %.2f" %(metrics.accuracy_score(Y_test, Y_pred)) + "%")
评论