In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import joblib # For saving and loading models


In [2]:
# 1. Create Dummy Data (Classification Task)
data = pd.DataFrame({
    'feature_X': [2, 3, 4, 5, 6, 2.5, 3.5, 4.5, 5.5, 6.5],
    'feature_Y': [3, 4, 5, 6, 7, 3.5, 4.5, 5.5, 6.5, 7.5],
    'target_class': [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] # 0 and 1 represent two classes
})
print("Original Data:\n", data)


Original Data:
    feature_X  feature_Y  target_class
0        2.0        3.0             0
1        3.0        4.0             0
2        4.0        5.0             0
3        5.0        6.0             0
4        6.0        7.0             0
5        2.5        3.5             1
6        3.5        4.5             1
7        4.5        5.5             1
8        5.5        6.5             1
9        6.5        7.5             1


In [3]:
# 2. Split Data into Features (X) and Target (y)
X = data[['feature_X', 'feature_Y']]
y = data['target_class']

# 3. Split Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # 30% for testing

# 4. Feature Scaling (StandardScaler)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train) # Fit on training data, then transform
X_test_scaled = scaler.transform(X_test)      # Transform test data using fitted scaler

# 5. Initialize and Train KNN Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=3) # Hyperparameter: k=3
knn_classifier.fit(X_train_scaled, y_train)

# 6. Make Predictions on Test Set
y_pred = knn_classifier.predict(X_test_scaled)
print("\nPredictions on Test Set:\n", y_pred)


Predictions on Test Set:
 [0 0 0]


In [6]:
# 7. Evaluate Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy on Test Set: {accuracy:.2f}") # Output: 0.67 in this example


Accuracy on Test Set: 0.33


In [7]:
y_prob = knn_classifier.predict_proba(X_test_scaled)
print("\nPredicted Probabilities (for each class):\n", y_prob)


Predicted Probabilities (for each class):
 [[0.66666667 0.33333333]
 [0.66666667 0.33333333]
 [0.66666667 0.33333333]]


In [8]:
# --- Saving ---
joblib.dump(knn_classifier, 'knn_model.joblib') # Save KNN model
joblib.dump(scaler, 'scaler.joblib')           # Save scaler
print("\nKNN model and scaler saved to disk.")


KNN model and scaler saved to disk.


In [None]:
#--- Loading ---
loaded_knn_model = joblib.load('knn_model.joblib')
loaded_scaler = joblib.load('scaler.joblib')
print("\nKNN model and scaler loaded from disk.")

In [11]:
from sklearn.model_selection import GridSearchCV

# ... (Data preparation, splitting, and scaling as in previous example) ...

# Define hyperparameter grid to search
param_grid = {
    'n_neighbors': [1, 3, 5, 7, 9, 11, 15, 20], # Values of k to try
    'weights': ['uniform', 'distance']          # Weighting options
}

# Initialize KNN classifier
knn = KNeighborsClassifier()

# Set up GridSearchCV with cross-validation (cv=5 means 5-fold CV)
grid_search = GridSearchCV(knn, param_grid, cv=2, scoring='accuracy') # scoring='accuracy' for classification

# Perform grid search on training data
grid_search.fit(X_train_scaled, y_train)

# Best hyperparameter combination found
best_params = grid_search.best_params_
print(f"Best Hyperparameters: {best_params}")

# Best model from grid search (trained with best hyperparameters on entire training data)
best_knn_model = grid_search.best_estimator_

# Evaluate best model on test set
y_pred_best = best_knn_model.predict(X_test_scaled)
accuracy_best = accuracy_score(y_test, y_pred_best)
print(f"Accuracy of Best KNN Model on Test Set: {accuracy_best:.2f}")

# You can now use best_knn_model for deployment

Best Hyperparameters: {'n_neighbors': 3, 'weights': 'uniform'}
Accuracy of Best KNN Model on Test Set: 0.33


Traceback (most recent call last):
  File "D:\Users\delhi\miniconda3\envs\rl\lib\site-packages\sklearn\model_selection\_validation.py", line 949, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
  File "D:\Users\delhi\miniconda3\envs\rl\lib\site-packages\sklearn\metrics\_scorer.py", line 288, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "D:\Users\delhi\miniconda3\envs\rl\lib\site-packages\sklearn\metrics\_scorer.py", line 380, in _score
    y_pred = method_caller(
  File "D:\Users\delhi\miniconda3\envs\rl\lib\site-packages\sklearn\metrics\_scorer.py", line 90, in _cached_call
    result, _ = _get_response_values(
  File "D:\Users\delhi\miniconda3\envs\rl\lib\site-packages\sklearn\utils\_response.py", line 214, in _get_response_values
    y_pred = prediction_method(X)
  File "D:\Users\delhi\miniconda3\envs\rl\lib\site-packages\sklearn\neighbors\_classification.py", line 262, in predict
    probabilities =

In [None]:


y_prob = knn_classifier.predict_proba(X_test_scaled)
print("\nPredicted Probabilities (for each class):\n", y_prob)
# Output format: [[prob_class0, prob_class1], ...] for each test sample

# There is no 'r-value' output directly from KNN in the sense of correlation or regression.
# The 'output' in classification is primarily the predicted class labels and/or probabilities.
# In regression (using KNeighborsRegressor), the output would be the predicted continuous value.

# 9. Saving and Loading the Model (and Scaler) for Later Use

# --- Saving ---
joblib.dump(knn_classifier, 'knn_model.joblib') # Save KNN model
joblib.dump(scaler, 'scaler.joblib')           # Save scaler
print("\nKNN model and scaler saved to disk.")

# --- Loading ---
# loaded_knn_model = joblib.load('knn_model.joblib')
# loaded_scaler = joblib.load('scaler.joblib')
# print("\nKNN model and scaler loaded from disk.")

# You can now use loaded_knn_model to make predictions on new scaled data