The goal was to build the model to classify the image whether the image contain the ship or not.
Image Example

The process will be as follow
- Explore/Understand data (label and features)
- Create Test set
- Preparation of data
- Train model
- Evaluate the model
- Visualize the parameter.
- Explore/Understand data.
2. Create Test set.
from sklearn.model_selection import train_test_split
y = labels
X = data_bw
X_train = None
X_test = None
y_train = None
y_test = None
X = X.reshape(X.shape[0], -1)
# Split data into train and test
# Create variables X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.90, random_state=42)
print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)
print("y_train shape: ", y_train.shape)
print("y_test shape: ", y_test.shape)
3.Preperation of data.
import time
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
## Data Scaler
# Create a StandardScaler object
# scaler: sklearn standard scaler
scaler = None
scaler = StandardScaler()
## Classification Model
# Create a classifier
# clf: sklearn classifier
# name: string, name of your classifier
# model_pipeline: sklearn Pipeline, if you use pipeline, please use this variable
clf = None
name = None
clf = LogisticRegression(solver='liblinear')
name = "LogisticRegression"
model_pipeline = Pipeline([('scaler', scaler), ('logisreg_classify',clf)])
4. Train Model
from sklearn.metrics import accuracy_score
# Set variable
# score_in_sample: a scalar number, score for your in-sample examples
score_in_sample = None
fitted_pipe = model_pipeline.fit(X_train, y_train)
score_in_sample = accuracy_score(fitted_pipe.predict(X_train), y_train)
print("Model: {m:s} in sample score={s:3.2f}\n".format(m=name, s=score_in_sample))
# Set variable
# scores: an array of scores (length 5), one for each fold that is out-of-sample during cross-validation
# k: number of folds
cross_val_scores = None
k = 5
t0 = time.time()
cross_val_scores = cross_val_score(model_pipeline, X_train, y_train, cv=k)
print("Model: {m:s} avg cross validation score={s:3.2f}\n".format(m=name, s=cross_val_scores.mean()) )
# Set num_parameters equal to the number of non-intercept parameters in the model
num_parameters = None
num_parameters = model_pipeline['logisreg_classify'].coef_[0].shape
print("\nShape of intercept: {i}; shape of coefficients: {c}".format(i=clf.intercept_.shape,
c=num_parameters) )
5. Evaluate the Model
# Set variable to store the model accuracy on the test set
score_out_of_sample = None
# YOUR CODE HERE
y_pred = fitted_pipe.predict(X_test)
y_pred_prob = fitted_pipe.predict_proba(X_test)
score_out_of_sample = accuracy_score(y_pred , y_test)
# raise NotImplementedError()
print("Model: {m:s} out-of-sample score={s:3.2f}\n".format(m=name, s=score_out_of_sample))
unique, counts = np.unique(y_train, return_counts=True)
print(np.asarray((unique, counts)).T)




Leave a comment