I have this code to predict credit card defects and it works perfectly, but I check here if anyone can make it more efficient or more compact. It's quite long though, but please, support me.

```
# Import the necessary libraries.
import numpy as np
import pandas as pd
import matplotlib.pyplot as a plt
# Extract the data from the .csv file.
file = C: \ Users \ alhut \ OneDrive \ Desktop \ default credit card \ creditcard_default.csv & # 39;
dataset = pd.read_csv (file, index_col = & # 39; ID & # 39;)
dataset.rename (columns = lambda x: x.lower (), inplace = True)
# Preparing data using dummy functions (hot-encoding). The basic values are: other_education, female, not_married.
dataset['grad_school'] = (dataset['education'] == 1) .astype (& # 39; int)
dataset['universty'] = (dataset['education'] == 2) .astype (& # 39; int & # 39;)
dataset['high_school'] = (dataset['education'] == 3) .astype (& # 39; int & # 39;)
dataset.drop (& # 39; education & # 39 ;, axis = 1, inplace = True) # Removes the education column because all information is available in the functions above.
dataset['male'] = (dataset['sex'] == 1) .astype (& # 39; int)
dataset.drop ('sex', axis = 1, inplace = True)
dataset['married'] = (dataset['marriage'] == 1) .astype (& # 39; int)
dataset.drop (& # 39; marriage & # 39 ;, axis = 1, inplace = True)
# In the case of payment functions, <= 0 means that the payment has not been delayed.
pay_features = ['pay_0','pay_2','pay_3','pay_4','pay_5','pay_6']
for p in pay_features:
dataset.loc[dataset[p]<=0, p] = 0
dataset.rename(columns={'default_payment_next_month':'default'}, inplace=True) # Renames last column for convenience.
# Importing objects from sklearn to help with the predictions.
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, precision_recall_curve
from sklearn.preprocessing import RobustScaler
# Scaling and fitting the x and y variables and creating the x and y test and train variables.
target_name = 'default'
X = dataset.drop('default', axis=1)
robust_scaler = RobustScaler()
X = robust_scaler.fit_transform(X)
y = dataset[target_name]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=123, stratify=y)
# Creating a confusion matrix.
def CMatrix(CM, labels=['pay','default']):
df = pd.DataFrame(data=CM, index=labels, columns=labels)
df.index.name='TRUE'
df.columns.name='PREDICTION'
df.loc['TOTAL'] = df.sum()
df['Total'] = df.sum(axis=1)
return df
# Preparing a pandas DataFrame to analyze models (evaluation metrics).
metrics = pd.DataFrame(index=['accuracy', 'precision', 'recall'],
columns=['NULL','LogisticReg','ClassTree','NaiveBayes'])
#######################
# The Null Model.
y_pred_test = np.repeat(y_train.value_counts().idxmax(), y_test.size)
metrics.loc['accuracy','NULL'] = accuracy_score(y_pred=y_pred_test, y_true=y_test)
metrics.loc['precision','NULL'] = precision_score(y_pred=y_pred_test, y_true=y_test)
metrics.loc['recall','NULL'] = recall_score(y_pred=y_pred_test, y_true=y_test)
CM = confusion_matrix(y_pred=y_pred_test, y_true=y_test)
CMatrix(CM)
# A. Logistic Regression.
# 1- Import the estimator object (model).
from sklearn.linear_model import LogisticRegression
# 2- Create an instance of the estimator.
logistic_regression = LogisticRegression(n_jobs=-1, random_state=15)
# 3- Use the trainning data to train the estimator.
logistic_regression.fit(X_train, y_train)
# 4- Evaluate the model.
y_pred_test = logistic_regression.predict(X_test)
metrics.loc['accuracy','LogisticReg'] = accuracy_score(y_pred=y_pred_test, y_true=y_test)
metrics.loc['precision','LogisticReg'] = precision_score(y_pred=y_pred_test, y_true=y_test)
metrics.loc['recall','LogisticReg'] = recall_score(y_pred=y_pred_test, y_true=y_test)
# Confusion Matrix.
CM = confusion_matrix(y_pred=y_pred_test, y_true=y_test)
CMatrix(CM)
# B. Classification Trees.
# 1- Import the estimator object (model).
from sklearn.tree import DecisionTreeClassifier
# 2- Create an instance of the estimator.
class_tree = DecisionTreeClassifier(min_samples_split=30, min_samples_leaf=10, random_state=10)
# 3- Use the trainning data to train the estimator.
class_tree.fit(X_train, y_train)
# 4- Evaluate the model.
y_pred_test = class_tree.predict(X_test)
metrics.loc['accuracy','ClassTree'] = accuracy_score(y_pred=y_pred_test, y_true=y_test)
metrics.loc['precision','ClassTree'] = precision_score(y_pred=y_pred_test, y_true=y_test)
metrics.loc['recall','ClassTree'] = recall_score(y_pred=y_pred_test, y_true=y_test)
# Confusion Matrix.
CM = confusion_matrix(y_pred=y_pred_test, y_true=y_test)
CMatrix(CM)
# C. Naive Bayes Classifier
# 1- Import the estimator object (model).
from sklearn.naive_bayes import GaussianNB
# 2- Create an instance of the estimator.
NBC = GaussianNB()
# 3- Use the trainning data to train the estimator.
NBC.fit(X_train, y_train)
# 4- Evaluate the model.
y_pred_test = NBC.predict(X_test)
metrics.loc['accuracy','NaiveBayes'] = accuracy_score(y_pred=y_pred_test, y_true=y_test)
metrics.loc['precision','NaiveBayes'] = precision_score(y_pred=y_pred_test, y_true=y_test)
metrics.loc['recall','NaiveBayes'] = recall_score(y_pred=y_pred_test, y_true=y_test)
# Confusion Matrix.
CM = confusion_matrix(y_pred=y_pred_test, y_true=y_test)
CMatrix(CM)
#######################
# Comparing the models with percentages.
100*metrics
# Comparing the models with a bar graph.
fig, ax = plt.subplots(figsize=(8,5))
metrics.plot(kind='barh', ax=ax)
ax.grid();
# Adjusting the precision and recall values for the logistic regression model and the Naive Bayes Classifier model.
precision_nb, recall_nb, thresholds_nb = precision_recall_curve(y_true=y_test, probas_pred=NBC.predict_proba(X_test)[:,1])
precision_lr, recall_lr, thresholds_lr = precision_recall_curve(y_true=y_test, probas_pred=logistic_regression.predict_proba(X_test)[:,1])
# Plotting the new values for the logistic regression model and the Naive Bayes Classifier model.
fig, ax = plt.subplots(figsize=(8,5))
ax.plot(precision_nb, recall_nb, label='NaiveBayes')
ax.plot(precision_lr, recall_lr, label='LogisticReg')
ax.set_xlabel('Precision')
ax.set_ylabel('Recall')
ax.set_title('Precision-Recall Curve')
ax.hlines(y=0.5, xmin=0, xmax=1, color='r')
ax.legend()
ax.grid();
# Creating a confusion matrix for modified Logistic Regression Classifier.
fig, ax = plt.subplots(figsize=(8,5))
ax.plot(thresholds_lr, precision_lr[1:], label='Precision')
ax.plot(thresholds_lr, recall_lr[1:], label='Recall')
ax.set_xlabel('Classification Threshold')
ax.set_ylabel('Precision, Recall')
ax.set_title('Logistic Regression Classifier: Precision-Recall')
ax.hlines(y=0.6, xmin=0, xmax=1, color='r')
ax.legend()
ax.grid();
# Adjusting the threshold to 0.2.
y_pred_proba = logistic_regression.predict_proba(X_test)[:,1]
y_pred_test = (y_pred_proba >= 0.2) .type (& # 39; int & # 39;)
# Confusion matrix.
CM = confusion_matrix (y_pred = y_pred_test, y_true = y_test)
print ('Recall:', str (100 * reminder_score (y_pred = y_pred_test, y_true = y_test)) +% (% & # 39;%)
print ('Precision:', str (100 * precision_score (y_pred = y_pred_test, y_true = y_test)) +% (% & # 39;)
CMatrix (CM)
#########################
# Define a function to make individual predictions.
def make_ind_prediction (new_data):
data = new_data.values.reshape (1, -1)
data = robust_scaler.transform (data)
prob = logistic_regression.predict_proba (data)[0][1]
if prob> = 0.2:
return & # 39; Will default. & # 39;
other:
back & # 39; Will pay. & # 39;
# Make individual predictions using given data.
from imported collections OrderedDict
new_customer = OrderedDict ([('limit_bal', 4000),('age', 50 ),('bill_amt1', 500),
('bill_amt2', 35509 ),('bill_amt3', 689 ),('bill_amt4', 0 ),
('bill_amt5', 0 ),('bill_amt6', 0 ), ('pay_amt1', 0 ),('pay_amt2', 35509 ),
('pay_amt3', 0 ),('pay_amt4', 0 ),('pay_amt5', 0 ), ('pay_amt6', 0 ),
('male', 1 ),('grad_school', 0 ),('university', 1 ), ('high_school', 0 ),
('married', 1 ),('pay_0', -1 ),('pay_2', -1 ),('pay_3', -1 ),
('pay_4', 0),('pay_5', -1), ('pay_6', 0)])
new_customer = pd.Series (new_customer)
make_ind_prediction (new_customer)
```