The goal was to build the model to classify the image whether the image contain the ship or not.

Image Example

The process will be as follow

  1. Explore/Understand data (label and features)
  2. Create Test set
  3. Preparation of data
  4. Train model
  5. Evaluate the model
  6. Visualize the parameter.
  1. Explore/Understand data.

2. Create Test set.

from sklearn.model_selection import train_test_split

y = labels
X = data_bw

X_train = None
X_test = None
y_train = None
y_test = None

X = X.reshape(X.shape[0], -1)

# Split data into train and test
# Create variables X_train, X_test, y_train, y_test


X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.90, random_state=42)

print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)
print("y_train shape: ", y_train.shape)
print("y_test shape: ", y_test.shape)

3.Preperation of data.

import time
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import Pipeline


## Data Scaler
#  Create a StandardScaler object
#    scaler: sklearn standard scaler
scaler = None



scaler = StandardScaler()



## Classification Model
#  Create a classifier
#    clf: sklearn classifier
#    name: string, name of your classifier
#    model_pipeline: sklearn Pipeline, if you use pipeline, please use this variable
clf = None
name = None


clf = LogisticRegression(solver='liblinear')
name = "LogisticRegression"
model_pipeline = Pipeline([('scaler', scaler), ('logisreg_classify',clf)])

4. Train Model

from sklearn.metrics import accuracy_score
# Set variable
# score_in_sample: a scalar number, score for your in-sample examples
score_in_sample = None


fitted_pipe = model_pipeline.fit(X_train, y_train)
score_in_sample = accuracy_score(fitted_pipe.predict(X_train), y_train)


print("Model: {m:s} in sample score={s:3.2f}\n".format(m=name, s=score_in_sample))
# Set variable 
#  scores: an array of scores (length 5), one for each fold that is out-of-sample during cross-validation
#  k: number of folds
cross_val_scores = None
k = 5

t0 = time.time()


cross_val_scores = cross_val_score(model_pipeline, X_train, y_train, cv=k)


print("Model: {m:s} avg cross validation score={s:3.2f}\n".format(m=name, s=cross_val_scores.mean()) )
# Set num_parameters equal to the number of non-intercept parameters in the model
num_parameters = None


num_parameters = model_pipeline['logisreg_classify'].coef_[0].shape


print("\nShape of intercept: {i}; shape of coefficients: {c}".format(i=clf.intercept_.shape,
                                                                   c=num_parameters) )

5. Evaluate the Model

# Set variable to store the model accuracy on the test set
score_out_of_sample = None

# YOUR CODE HERE
y_pred = fitted_pipe.predict(X_test)
y_pred_prob = fitted_pipe.predict_proba(X_test) 
score_out_of_sample = accuracy_score(y_pred , y_test)

# raise NotImplementedError()

print("Model: {m:s} out-of-sample score={s:3.2f}\n".format(m=name, s=score_out_of_sample))
unique, counts = np.unique(y_train, return_counts=True)
print(np.asarray((unique, counts)).T)

Leave a comment

Trending