In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
import pickle

In [17]:
# Dummy data
data = {'feature1': [9, 9, 9, 4, 5, 6, 7, 8, 9, 10],
        'feature2': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
        'target': [0, 0, 0, 1, 1, 1, 1, 0, 0, 0]}

df = pd.DataFrame(data)
df.head()


Unnamed: 0,feature1,feature2,target
0,9,10,0
1,9,9,0
2,9,8,0
3,4,7,1
4,5,6,1


In [18]:
# Split data into features and target
X = df[['feature1','feature2']]
y = df['target']

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state=42)

In [19]:
# Create a Gaussian Naive Bayes model
model = GaussianNB()

# Train the model
model.fit(X_train, y_train)

In [20]:
# Make predictions
y_pred = model.predict(X_test)
print("model prediction: ", y_pred)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification report: \n", report)

model prediction:  [0 0 1]
Accuracy: 1.0
Classification report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



In [21]:
# Save the model
filename = 'gaussian_naive_bayes_model.pkl'
pickle.dump(model, open(filename, 'wb'))

# Load the model
loaded_model = pickle.load(open(filename, 'rb'))
print("Loaded model prediction:", loaded_model.predict(X_test))

Loaded model prediction: [0 0 1]


In [22]:
## Hyper parameter tuning

In [23]:
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
import pickle

# Dummy data
data = {'feature1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'feature2': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
        'target': [0, 0, 0, 1, 1, 1, 1, 0, 0, 0]}

df = pd.DataFrame(data)

# Split data into features and target
X = df[['feature1','feature2']]
y = df['target']

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state=42)

# Define a grid of hyperparameters
params = {'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]}

# Grid search for hyperparameter tuning
grid = GridSearchCV(GaussianNB(), params, cv=2)
grid.fit(X_train, y_train)

print(f"Best parameters for Naive Bayes: {grid.best_params_}")
print("Best Score for Naive Bayes: ", grid.best_score_)

Best parameters for Naive Bayes: {'var_smoothing': 1e-09}
Best Score for Naive Bayes:  0.5833333333333333
