- pandas - scikit-learn - numpy

Car Evaluation Dataset

A choice to buy or not buy an automobile based on its physical qualifications.

Training of Model

Answer between 0 to 1.

Training Summary

Selected Model:

Not available

Test Split:

Not available

Accuracy Score:

Not available

Weighted F1 Score:

Not available

Car Parameters

import pandas as pd import pickle from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score, f1_score from pyodide.http import open_url import numpy as np def upSampling(data): from sklearn.utils import resample # Majority Class Dataframe df_majority = data[(data['score']==0)] samples_in_majority = data[data.score == 0].shape[0] # Minority Class Dataframe of all the three labels df_minority_1 = data[(data['score']==1)] df_minority_2 = data[(data['score']==2)] df_minority_3 = data[(data['score']==3)] # upsample minority classes df_minority_upsampled_1 = resample(df_minority_1, replace=True, n_samples= samples_in_majority, random_state=42) df_minority_upsampled_2 = resample(df_minority_2, replace=True, n_samples= samples_in_majority, random_state=42) df_minority_upsampled_3 = resample(df_minority_3, replace=True, n_samples= samples_in_majority, random_state=42) # Combine majority class with upsampled minority classes df_upsampled = pd.concat([df_minority_upsampled_1, df_minority_upsampled_2, df_minority_upsampled_3, df_majority]) return df_upsampled def datasetPreProcessing(): # Reading the content of CSV file. csv_url_content = open_url("https://raw.githubusercontent.com/aryan0141/PyScript.js-Tutorial/master/car.csv") data = pd.read_csv(csv_url_content) pyscript.write("headingText", "Pre-Processing the Dataset...") # This is used to send messages to the HTML DOM. # Removing all the null values data.isna().sum() # Removing all the duplicates data.drop_duplicates() coloumns = ['buying', 'maint', 'doors', 'people', 'luggaage', 'safety', 'score'] # Converting Categorical Data into Numerical Data data['buying'] = data['buying'].replace('low', 0) data['buying'] = data['buying'].replace('med', 1) data['buying'] = data['buying'].replace('high', 2) data['buying'] = data['buying'].replace('vhigh', 3) data['maint'] = data['maint'].replace('low', 0) data['maint'] = data['maint'].replace('med', 1) data['maint'] = data['maint'].replace('high', 2) data['maint'] = data['maint'].replace('vhigh', 3) data['doors'] = data['doors'].replace('2', 0) data['doors'] = data['doors'].replace('3', 1) data['doors'] = data['doors'].replace('4', 2) data['doors'] = data['doors'].replace('5more', 3) data['people'] = data['people'].replace('2', 0) data['people'] = data['people'].replace('4', 1) data['people'] = data['people'].replace('more', 2) data['luggaage'] = data['luggaage'].replace('small', 0) data['luggaage'] = data['luggaage'].replace('med', 1) data['luggaage'] = data['luggaage'].replace('big', 2) data['safety'] = data['safety'].replace('low', 0) data['safety'] = data['safety'].replace('med', 1) data['safety'] = data['safety'].replace('high', 2) data['score'] = data['score'].replace('unacc', 0) data['score'] = data['score'].replace('acc', 1) data['score'] = data['score'].replace('good', 2) data['score'] = data['score'].replace('vgood', 3) upsampled_data = upSampling(data) return upsampled_data def model_selection(): selectedModel = document.querySelector('input[name="modelSelection"]:checked').value; if selectedModel == "rf": document.getElementById("selectedModelContentBox").innerText = "Random Forest Classifier"; return RandomForestClassifier(n_estimators=100) elif selectedModel == "lr": document.getElementById("selectedModelContentBox").innerText = "Logistic Regression"; return LogisticRegression() elif selectedModel == "gb": document.getElementById("selectedModelContentBox").innerText = "Gradient Boosting Classifier"; return GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0) else: document.getElementById("selectedModelContentBox").innerText = "MLP Classifier"; return MLPClassifier() def classifier(model, X_train, X_test, y_train, y_test): clf = model clf.fit(X_train, y_train) y_pred = clf.predict(X_test) y_score = clf.fit(X_train, y_train) acc_score = accuracy_score(y_test, y_pred) f1Score = f1_score(y_test, y_pred, average='weighted') return acc_score, model, f1Score def trainModel(e=None): global trained_model processed_data = datasetPreProcessing() # Take the Test Split as an input by the user test_split = float(document.getElementById("test_split").value) # If the test split is greater than 1 or less than 0 then we will throw an error. if test_split > 1 or test_split < 0: pyscript.write("headingText", "Choose Test Split between 0 to 1") return document.getElementById("testSplitContentBox").innerText = test_split; X = processed_data[['buying', 'maint', 'doors', 'people', 'luggaage', 'safety']] y = processed_data['score'] # Splitting the Dataset into training and testing. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_split, random_state=42) # Below function can return the classification model choosen by the user model = model_selection() pyscript.write("headingText", "Model Training Started...") acc_score, trained_model, f1Score = classifier(model, X_train, X_test, y_train, y_test) pyscript.write("headingText", "Model Training Completed.") # Writing the value of accuracy and f1-score to the DOM document.getElementById("accuracyContentBox").innerText = f"{round(acc_score*100, 2)}%"; document.getElementById("f1ContentBox").innerText = f"{round(f1Score*100, 2)}%"; # Below code is to enable the Model Training Button when the Model is successfully trained. document.getElementById("submitBtn").classList.remove("disabled"); document.getElementById("submitBtn").disabled = False; document.getElementById("trainModelBtn").classList.remove("disabled"); document.getElementById("trainModelBtn").disabled = False; if e: e.preventDefault() return False def testModel(e=None): buying_price = int(document.getElementById("buying_price").value) maintanence_price = int(document.getElementById("maintanence_price").value) doors = int(document.getElementById("doors").value) persons = int(document.getElementById("persons").value) luggage = int(document.getElementById("luggage").value) safety = int(document.getElementById("safety").value) arr = np.array([buying_price, maintanence_price, doors, persons, luggage, safety]).astype('float32') arr = np.expand_dims(arr, axis=0) result = trained_model.predict(arr) condition = "" if result[0] == 0: condition = "Unaccepted" elif result[0] == 1: condition = "Accepted" elif result[0] == 2: condition = "Good" else: condition = "Very Good" pyscript.write("resultText", f"Predicted Value: {condition}") if e: e.preventDefault() return False document.getElementById("submitBtn").onclick = testModel document.getElementById("trainModelBtn").onclick = trainModel