commit 17a50c0ec90ac37ae7fd0d407cb8f2062fcfd4de Author: Davud Topalovic Date: Thu Dec 22 11:10:56 2022 +0100 Initial commit diff --git a/Decision Tree Classification in Python.pdf b/Decision Tree Classification in Python.pdf new file mode 100644 index 0000000..e7636aa Binary files /dev/null and b/Decision Tree Classification in Python.pdf differ diff --git a/README.md b/README.md new file mode 100644 index 0000000..3a89074 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# Decision-Tree-Algorithm + +Here we explain the concept and math behind Decision Tree Algorithm, code it completely from scratch using only two libraries: numpy and pandas and implement it. + +In this repository you can find following files: +1. pdf file: Decision Tree Classification in Python.pdf which is a seminary paper (under the work) that explains the concept and math behind Decision Tree Algorithm +2. Jupyter notebook file: Decision_Tree_Classifier.ipynb that consists the code of our Decision Tree Algorithm as well as it's two implementations(two differents datasets) +3. First dataset: iris_data.csv +4. Second dataset: iphone_purchase_records.csv diff --git a/decision tree classification.ipynb b/decision tree classification.ipynb new file mode 100644 index 0000000..3ffdabe --- /dev/null +++ b/decision tree classification.ipynb @@ -0,0 +1,470 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import tools" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthtype
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
55.43.91.70.40
64.63.41.40.30
75.03.41.50.20
84.42.91.40.20
94.93.11.50.10
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width type\n", + "0 5.1 3.5 1.4 0.2 0\n", + "1 4.9 3.0 1.4 0.2 0\n", + "2 4.7 3.2 1.3 0.2 0\n", + "3 4.6 3.1 1.5 0.2 0\n", + "4 5.0 3.6 1.4 0.2 0\n", + "5 5.4 3.9 1.7 0.4 0\n", + "6 4.6 3.4 1.4 0.3 0\n", + "7 5.0 3.4 1.5 0.2 0\n", + "8 4.4 2.9 1.4 0.2 0\n", + "9 4.9 3.1 1.5 0.1 0" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "col_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'type']\n", + "data = pd.read_csv(\"iris.csv\", skiprows=1, header=None, names=col_names)\n", + "data.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Node class" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "class Node():\n", + " def __init__(self, feature_index=None, threshold=None, left=None, right=None, info_gain=None, value=None):\n", + " ''' constructor ''' \n", + " \n", + " # for decision node\n", + " self.feature_index = feature_index\n", + " self.threshold = threshold\n", + " self.left = left\n", + " self.right = right\n", + " self.info_gain = info_gain\n", + " \n", + " # for leaf node\n", + " self.value = value" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tree class" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class DecisionTreeClassifier():\n", + " def __init__(self, min_samples_split=2, max_depth=2):\n", + " ''' constructor '''\n", + " \n", + " # initialize the root of the tree \n", + " self.root = None\n", + " \n", + " # stopping conditions\n", + " self.min_samples_split = min_samples_split\n", + " self.max_depth = max_depth\n", + " \n", + " def build_tree(self, dataset, curr_depth=0):\n", + " ''' recursive function to build the tree ''' \n", + " \n", + " X, Y = dataset[:,:-1], dataset[:,-1]\n", + " num_samples, num_features = np.shape(X)\n", + " \n", + " # split until stopping conditions are met\n", + " if num_samples>=self.min_samples_split and curr_depth<=self.max_depth:\n", + " # find the best split\n", + " best_split = self.get_best_split(dataset, num_samples, num_features)\n", + " # check if information gain is positive\n", + " if best_split[\"info_gain\"]>0:\n", + " # recur left\n", + " left_subtree = self.build_tree(best_split[\"dataset_left\"], curr_depth+1)\n", + " # recur right\n", + " right_subtree = self.build_tree(best_split[\"dataset_right\"], curr_depth+1)\n", + " # return decision node\n", + " return Node(best_split[\"feature_index\"], best_split[\"threshold\"], \n", + " left_subtree, right_subtree, best_split[\"info_gain\"])\n", + " \n", + " # compute leaf node\n", + " leaf_value = self.calculate_leaf_value(Y)\n", + " # return leaf node\n", + " return Node(value=leaf_value)\n", + " \n", + " def get_best_split(self, dataset, num_samples, num_features):\n", + " ''' function to find the best split '''\n", + " \n", + " # dictionary to store the best split\n", + " best_split = {}\n", + " max_info_gain = -float(\"inf\")\n", + " \n", + " # loop over all the features\n", + " for feature_index in range(num_features):\n", + " feature_values = dataset[:, feature_index]\n", + " possible_thresholds = np.unique(feature_values)\n", + " # loop over all the feature values present in the data\n", + " for threshold in possible_thresholds:\n", + " # get current split\n", + " dataset_left, dataset_right = self.split(dataset, feature_index, threshold)\n", + " # check if childs are not null\n", + " if len(dataset_left)>0 and len(dataset_right)>0:\n", + " y, left_y, right_y = dataset[:, -1], dataset_left[:, -1], dataset_right[:, -1]\n", + " # compute information gain\n", + " curr_info_gain = self.information_gain(y, left_y, right_y, \"gini\")\n", + " # update the best split if needed\n", + " if curr_info_gain>max_info_gain:\n", + " best_split[\"feature_index\"] = feature_index\n", + " best_split[\"threshold\"] = threshold\n", + " best_split[\"dataset_left\"] = dataset_left\n", + " best_split[\"dataset_right\"] = dataset_right\n", + " best_split[\"info_gain\"] = curr_info_gain\n", + " max_info_gain = curr_info_gain\n", + " \n", + " # return best split\n", + " return best_split\n", + " \n", + " def split(self, dataset, feature_index, threshold):\n", + " ''' function to split the data '''\n", + " \n", + " dataset_left = np.array([row for row in dataset if row[feature_index]<=threshold])\n", + " dataset_right = np.array([row for row in dataset if row[feature_index]>threshold])\n", + " return dataset_left, dataset_right\n", + " \n", + " def information_gain(self, parent, l_child, r_child, mode=\"entropy\"):\n", + " ''' function to compute information gain '''\n", + " \n", + " weight_l = len(l_child) / len(parent)\n", + " weight_r = len(r_child) / len(parent)\n", + " if mode==\"gini\":\n", + " gain = self.gini_index(parent) - (weight_l*self.gini_index(l_child) + weight_r*self.gini_index(r_child))\n", + " else:\n", + " gain = self.entropy(parent) - (weight_l*self.entropy(l_child) + weight_r*self.entropy(r_child))\n", + " return gain\n", + " \n", + " def entropy(self, y):\n", + " ''' function to compute entropy '''\n", + " \n", + " class_labels = np.unique(y)\n", + " entropy = 0\n", + " for cls in class_labels:\n", + " p_cls = len(y[y == cls]) / len(y)\n", + " entropy += -p_cls * np.log2(p_cls)\n", + " return entropy\n", + " \n", + " def gini_index(self, y):\n", + " ''' function to compute gini index '''\n", + " \n", + " class_labels = np.unique(y)\n", + " gini = 0\n", + " for cls in class_labels:\n", + " p_cls = len(y[y == cls]) / len(y)\n", + " gini += p_cls**2\n", + " return 1 - gini\n", + " \n", + " def calculate_leaf_value(self, Y):\n", + " ''' function to compute leaf node '''\n", + " \n", + " Y = list(Y)\n", + " return max(Y, key=Y.count)\n", + " \n", + " def print_tree(self, tree=None, indent=\" \"):\n", + " ''' function to print the tree '''\n", + " \n", + " if not tree:\n", + " tree = self.root\n", + "\n", + " if tree.value is not None:\n", + " print(tree.value)\n", + "\n", + " else:\n", + " print(\"X_\"+str(tree.feature_index), \"<=\", tree.threshold, \"?\", tree.info_gain)\n", + " print(\"%sleft:\" % (indent), end=\"\")\n", + " self.print_tree(tree.left, indent + indent)\n", + " print(\"%sright:\" % (indent), end=\"\")\n", + " self.print_tree(tree.right, indent + indent)\n", + " \n", + " def fit(self, X, Y):\n", + " ''' function to train the tree '''\n", + " \n", + " dataset = np.concatenate((X, Y), axis=1)\n", + " self.root = self.build_tree(dataset)\n", + " \n", + " def predict(self, X):\n", + " ''' function to predict new dataset '''\n", + " \n", + " preditions = [self.make_prediction(x, self.root) for x in X]\n", + " return preditions\n", + " \n", + " def make_prediction(self, x, tree):\n", + " ''' function to predict a single data point '''\n", + " \n", + " if tree.value!=None: return tree.value\n", + " feature_val = x[tree.feature_index]\n", + " if feature_val<=tree.threshold:\n", + " return self.make_prediction(x, tree.left)\n", + " else:\n", + " return self.make_prediction(x, tree.right)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train-Test split" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "X = data.iloc[:, :-1].values\n", + "Y = data.iloc[:, -1].values.reshape(-1,1)\n", + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2, random_state=41)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fit the model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X_2 <= 1.9 ? 0.33741385372714494\n", + " left:0.0\n", + " right:X_3 <= 1.5 ? 0.427106638180289\n", + " left:X_2 <= 4.9 ? 0.05124653739612173\n", + " left:1.0\n", + " right:2.0\n", + " right:X_2 <= 5.0 ? 0.019631171921475288\n", + " left:X_1 <= 2.8 ? 0.20833333333333334\n", + " left:2.0\n", + " right:1.0\n", + " right:2.0\n" + ] + } + ], + "source": [ + "classifier = DecisionTreeClassifier(min_samples_split=3, max_depth=3)\n", + "classifier.fit(X_train,Y_train)\n", + "classifier.print_tree()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test the model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9333333333333333" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Y_pred = classifier.predict(X_test) \n", + "from sklearn.metrics import accuracy_score\n", + "accuracy_score(Y_test, Y_pred)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/iphone_purchase_records.csv b/iphone_purchase_records.csv new file mode 100644 index 0000000..3ce4962 --- /dev/null +++ b/iphone_purchase_records.csv @@ -0,0 +1,401 @@ +Gender,Age,Salary,Purchase Iphone +Male,19,19000,0 +Male,35,20000,0 +Female,26,43000,0 +Female,27,57000,0 +Male,19,76000,0 +Male,27,58000,0 +Female,27,84000,0 +Female,32,150000,1 +Male,25,33000,0 +Female,35,65000,0 +Female,26,80000,0 +Female,26,52000,0 +Male,20,86000,0 +Male,32,18000,0 +Male,18,82000,0 +Male,29,80000,0 +Male,47,25000,1 +Male,45,26000,1 +Male,46,28000,1 +Female,48,29000,1 +Male,45,22000,1 +Female,47,49000,1 +Male,48,41000,1 +Female,45,22000,1 +Male,46,23000,1 +Male,47,20000,1 +Male,49,28000,1 +Female,47,30000,1 +Male,29,43000,0 +Male,31,18000,0 +Male,31,74000,0 +Female,27,137000,1 +Female,21,16000,0 +Female,28,44000,0 +Male,27,90000,0 +Male,35,27000,0 +Female,33,28000,0 +Male,30,49000,0 +Female,26,72000,0 +Female,27,31000,0 +Female,27,17000,0 +Female,33,51000,0 +Male,35,108000,0 +Male,30,15000,0 +Female,28,84000,0 +Male,23,20000,0 +Male,25,79000,0 +Female,27,54000,0 +Male,30,135000,1 +Female,31,89000,0 +Female,24,32000,0 +Female,18,44000,0 +Female,29,83000,0 +Female,35,23000,0 +Female,27,58000,0 +Female,24,55000,0 +Female,23,48000,0 +Male,28,79000,0 +Male,22,18000,0 +Female,32,117000,0 +Male,27,20000,0 +Male,25,87000,0 +Female,23,66000,0 +Male,32,120000,1 +Female,59,83000,0 +Male,24,58000,0 +Male,24,19000,0 +Female,23,82000,0 +Female,22,63000,0 +Female,31,68000,0 +Male,25,80000,0 +Female,24,27000,0 +Female,20,23000,0 +Female,33,113000,0 +Male,32,18000,0 +Male,34,112000,1 +Male,18,52000,0 +Female,22,27000,0 +Female,28,87000,0 +Female,26,17000,0 +Male,30,80000,0 +Male,39,42000,0 +Male,20,49000,0 +Male,35,88000,0 +Female,30,62000,0 +Female,31,118000,1 +Male,24,55000,0 +Female,28,85000,0 +Male,26,81000,0 +Male,35,50000,0 +Male,22,81000,0 +Female,30,116000,0 +Male,26,15000,0 +Female,29,28000,0 +Female,29,83000,0 +Female,35,44000,0 +Female,35,25000,0 +Male,28,123000,1 +Male,35,73000,0 +Female,28,37000,0 +Male,27,88000,0 +Male,28,59000,0 +Female,32,86000,0 +Female,33,149000,1 +Female,19,21000,0 +Male,21,72000,0 +Female,26,35000,0 +Male,27,89000,0 +Male,26,86000,0 +Female,38,80000,0 +Female,39,71000,0 +Female,37,71000,0 +Male,38,61000,0 +Male,37,55000,0 +Male,42,80000,0 +Male,40,57000,0 +Male,35,75000,0 +Male,36,52000,0 +Male,40,59000,0 +Male,41,59000,0 +Female,36,75000,0 +Male,37,72000,0 +Female,40,75000,0 +Male,35,53000,0 +Female,41,51000,0 +Female,39,61000,0 +Male,42,65000,0 +Male,26,32000,0 +Male,30,17000,0 +Female,26,84000,0 +Male,31,58000,0 +Male,33,31000,0 +Male,30,87000,0 +Female,21,68000,0 +Female,28,55000,0 +Male,23,63000,0 +Female,20,82000,0 +Male,30,107000,1 +Female,28,59000,0 +Male,19,25000,0 +Male,19,85000,0 +Female,18,68000,0 +Male,35,59000,0 +Male,30,89000,0 +Female,34,25000,0 +Female,24,89000,0 +Female,27,96000,1 +Female,41,30000,0 +Male,29,61000,0 +Male,20,74000,0 +Female,26,15000,0 +Male,41,45000,0 +Male,31,76000,0 +Female,36,50000,0 +Male,40,47000,0 +Female,31,15000,0 +Male,46,59000,0 +Male,29,75000,0 +Male,26,30000,0 +Female,32,135000,1 +Male,32,100000,1 +Male,25,90000,0 +Female,37,33000,0 +Male,35,38000,0 +Female,33,69000,0 +Female,18,86000,0 +Female,22,55000,0 +Female,35,71000,0 +Male,29,148000,1 +Female,29,47000,0 +Male,21,88000,0 +Male,34,115000,0 +Female,26,118000,0 +Female,34,43000,0 +Female,34,72000,0 +Female,23,28000,0 +Female,35,47000,0 +Male,25,22000,0 +Male,24,23000,0 +Female,31,34000,0 +Male,26,16000,0 +Female,31,71000,0 +Female,32,117000,1 +Male,33,43000,0 +Female,33,60000,0 +Male,31,66000,0 +Female,20,82000,0 +Female,33,41000,0 +Male,35,72000,0 +Male,28,32000,0 +Male,24,84000,0 +Female,19,26000,0 +Male,29,43000,0 +Male,19,70000,0 +Male,28,89000,0 +Male,34,43000,0 +Female,30,79000,0 +Female,20,36000,0 +Male,26,80000,0 +Male,35,22000,0 +Male,35,39000,0 +Male,49,74000,0 +Female,39,134000,1 +Female,41,71000,0 +Female,58,101000,1 +Female,47,47000,0 +Female,55,130000,1 +Female,52,114000,0 +Female,40,142000,1 +Female,46,22000,0 +Female,48,96000,1 +Male,52,150000,1 +Female,59,42000,0 +Male,35,58000,0 +Male,47,43000,0 +Female,60,108000,1 +Male,49,65000,0 +Male,40,78000,0 +Female,46,96000,0 +Male,59,143000,1 +Female,41,80000,0 +Male,35,91000,1 +Male,37,144000,1 +Male,60,102000,1 +Female,35,60000,0 +Male,37,53000,0 +Female,36,126000,1 +Male,56,133000,1 +Female,40,72000,0 +Female,42,80000,1 +Female,35,147000,1 +Male,39,42000,0 +Male,40,107000,1 +Male,49,86000,1 +Female,38,112000,0 +Male,46,79000,1 +Male,40,57000,0 +Female,37,80000,0 +Female,46,82000,0 +Female,53,143000,1 +Male,42,149000,1 +Male,38,59000,0 +Female,50,88000,1 +Female,56,104000,1 +Female,41,72000,0 +Female,51,146000,1 +Female,35,50000,0 +Female,57,122000,1 +Male,41,52000,0 +Female,35,97000,1 +Female,44,39000,0 +Male,37,52000,0 +Female,48,134000,1 +Female,37,146000,1 +Female,50,44000,0 +Female,52,90000,1 +Female,41,72000,0 +Male,40,57000,0 +Female,58,95000,1 +Female,45,131000,1 +Female,35,77000,0 +Male,36,144000,1 +Female,55,125000,1 +Female,35,72000,0 +Male,48,90000,1 +Female,42,108000,1 +Male,40,75000,0 +Male,37,74000,0 +Female,47,144000,1 +Male,40,61000,0 +Female,43,133000,0 +Female,59,76000,1 +Male,60,42000,1 +Male,39,106000,1 +Female,57,26000,1 +Male,57,74000,1 +Male,38,71000,0 +Male,49,88000,1 +Female,52,38000,1 +Female,50,36000,1 +Female,59,88000,1 +Male,35,61000,0 +Male,37,70000,1 +Female,52,21000,1 +Male,48,141000,0 +Female,37,93000,1 +Female,37,62000,0 +Female,48,138000,1 +Male,41,79000,0 +Female,37,78000,1 +Male,39,134000,1 +Male,49,89000,1 +Male,55,39000,1 +Male,37,77000,0 +Female,35,57000,0 +Female,36,63000,0 +Male,42,73000,1 +Female,43,112000,1 +Male,45,79000,0 +Male,46,117000,1 +Female,58,38000,1 +Male,48,74000,1 +Female,37,137000,1 +Male,37,79000,1 +Female,40,60000,0 +Male,42,54000,0 +Female,51,134000,0 +Female,47,113000,1 +Male,36,125000,1 +Female,38,50000,0 +Female,42,70000,0 +Male,39,96000,1 +Female,38,50000,0 +Female,49,141000,1 +Female,39,79000,0 +Female,39,75000,1 +Female,54,104000,1 +Male,35,55000,0 +Male,45,32000,1 +Male,36,60000,0 +Female,52,138000,1 +Female,53,82000,1 +Male,41,52000,0 +Female,48,30000,1 +Female,48,131000,1 +Female,41,60000,0 +Male,41,72000,0 +Female,42,75000,0 +Male,36,118000,1 +Female,47,107000,1 +Male,38,51000,0 +Female,48,119000,1 +Male,42,65000,0 +Male,40,65000,0 +Male,57,60000,1 +Female,36,54000,0 +Male,58,144000,1 +Male,35,79000,0 +Female,38,55000,0 +Male,39,122000,1 +Female,53,104000,1 +Male,35,75000,0 +Female,38,65000,0 +Female,47,51000,1 +Male,47,105000,1 +Female,41,63000,0 +Male,53,72000,1 +Female,54,108000,1 +Male,39,77000,0 +Male,38,61000,0 +Female,38,113000,1 +Male,37,75000,0 +Female,42,90000,1 +Female,37,57000,0 +Male,36,99000,1 +Male,60,34000,1 +Male,54,70000,1 +Female,41,72000,0 +Male,40,71000,1 +Male,42,54000,0 +Male,43,129000,1 +Female,53,34000,1 +Female,47,50000,1 +Female,42,79000,0 +Male,42,104000,1 +Female,59,29000,1 +Female,58,47000,1 +Male,46,88000,1 +Male,38,71000,0 +Female,54,26000,1 +Female,60,46000,1 +Male,60,83000,1 +Female,39,73000,0 +Male,59,130000,1 +Female,37,80000,0 +Female,46,32000,1 +Female,46,74000,0 +Female,42,53000,0 +Male,41,87000,1 +Female,58,23000,1 +Male,42,64000,0 +Male,48,33000,1 +Female,44,139000,1 +Male,49,28000,1 +Female,57,33000,1 +Male,56,60000,1 +Female,49,39000,1 +Male,39,71000,0 +Male,47,34000,1 +Female,48,35000,1 +Male,48,33000,1 +Male,47,23000,1 +Female,45,45000,1 +Male,60,42000,1 +Female,39,59000,0 +Female,46,41000,1 +Male,51,23000,1 +Female,50,20000,1 +Male,36,33000,0 +Female,49,36000,1 \ No newline at end of file diff --git a/iris_data.csv b/iris_data.csv new file mode 100644 index 0000000..5c4316c --- /dev/null +++ b/iris_data.csv @@ -0,0 +1,151 @@ +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica +