In [22]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import pickle

In [23]:
# Dummy data
data = {'feature1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'feature2': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
        'target': [0, 0, 0, 1, 1, 1, 1, 0, 0, 0]}

df = pd.DataFrame(data)
df.head()

Unnamed: 0,feature1,feature2,target
0,1,10,0
1,2,9,0
2,3,8,0
3,4,7,1
4,5,6,1


In [24]:
# Split data into features and target
X = df[['feature1']]
y = df['target']
#Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)

In [25]:
# Create Decision Stump model
model = DecisionTreeClassifier(max_depth=1, random_state=42)

In [26]:
# Train the model
model.fit(X_train, y_train)

In [27]:
print(f"Critical Feature: {model.tree_.feature[0]}")
print(f"Threshold: {model.tree_.threshold[0]:.2f}")

Critical Feature: 0
Threshold: 3.50


In [28]:
# Make predictions
y_pred = model.predict(X_test)
y_pred

array([1, 0, 1])

In [29]:
# Evaluate
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test,y_pred)
print(f"Accuracy: {accuracy}")
print("Classification report: \n", report)

Accuracy: 0.6666666666666666
Classification report: 
               precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.50      1.00      0.67         1

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3



In [36]:
from sklearn.inspection import permutation_importance

result = permutation_importance(model, X_train, y_train, n_repeats=10)
print(f"Feature 1 Impact: {result.importances_mean[0]:.2f}")

Feature 1 Impact: 0.23


In [37]:
#Save Model
filename = 'decision_stump_model.pkl'
pickle.dump(model, open(filename, 'wb'))

In [12]:
#Load Model
loaded_model = pickle.load(open(filename, 'rb'))
print("Loaded model prediction",loaded_model.predict(X_test))

Loaded model prediction [1 0 1]


## Hyper parameter tuning

In [14]:
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import pickle

# Dummy data
data = {'feature1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'feature2': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
        'target': [0, 0, 0, 1, 1, 1, 1, 0, 0, 0]}

df = pd.DataFrame(data)

# Split data into features and target
X = df[['feature1','feature2']]
y = df['target']
#Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)
# Define Hyperparameter grid
params = {'max_depth':[1],
          'criterion':['gini', 'entropy'],
         'splitter': ['best', 'random']
          }

# Model with grid search for different features
grid = GridSearchCV(DecisionTreeClassifier(random_state=42),param_grid=params, cv=2)
grid.fit(X_train,y_train)

print(f"Best parameters for DecisionStump: {grid.best_params_}")
print("Best Score for Decision Stump: ", grid.best_score_)

Best parameters for DecisionStump: {'criterion': 'gini', 'max_depth': 1, 'splitter': 'best'}
Best Score for Decision Stump:  0.41666666666666663
