# 23. Solutions to exercises¶

## 23.1. Data science tools¶

### 23.1.1. Section 2¶

#### 23.1.1.1. Section 2.1.1.1¶

$touch new_file.txt$ touch ~/Documents/new_file.txt

#### 23.4.3.6. Solution Section 13.3.4.1¶

# Part 1:
img_bold0 = nib.Nifti1Image(data_bold[:, :, :, 0], img_bold.affine)
img_bold0_resampled = resample_from_to(img_bold0, (img_t1.shape, img_t1.affine))

data_bold0_resampled = img_bold0_resampled.get_fdata()

fig, ax = plt.subplots(1, 2)
ax[0].matshow(data_t1[:, :, data_t1.shape[-1]//2])
im = ax[1].matshow(data_bold0_resampled[:, :, data_bold0_resampled.shape[-1]//2])


Part 2: We’d expect more loss of information going from the T1-weighted data to the BOLD data than vice versa. However, this is exclusively because of loss of information in the T1-weighted data. There is no information in the BOLD data beyond the resolution at which it was originally sampled.

## 23.5. Image processing¶

### 23.5.1. Section 14¶

#### 23.5.1.1. Solution Section 14.4.1.1¶

print(gray_img[200, 70])
print(gray_img[400, 200])


The values of these pixels are relatively low (less than 0.5). This is because they have high values in the red and blue channels and these are weighted much lower in the conversion to gray scale than the values of the green channel.

#### 23.5.1.2. Solution Section 14.4.2.1¶

mri_img = np.random.randn(100, 100, 100)

kernel = np.ones((20, 20, 20))
result = np.zeros(mri_img.shape)

for ii in range(result.shape[0]):
for jj in range(result.shape[1]):
for kk in range(result.shape[2]):
jj:jj+kernel.shape[1],
kk:kk+kernel.shape[2]]
weighted_neighborhood = neighborhood * kernel
conv_voxel = np.sum(weighted_neighborhood)
result[ii, jj, kk] = conv_voxel


#### 23.5.1.3. Solution Section 14.4.3.1¶

from skimage.filters import sobel, sobel_h, sobel_v
fig, ax = plt.subplots(1, 3)
sobel_collins = sobel(gray_img)
ax[0].matshow(sobel_collins)
sobel_v_collins = sobel_v(gray_img)
ax[1].matshow(sobel_v_collins)
sobel_h_collins = sobel_h(gray_img)
ax[2].matshow(sobel_h_collins)
fig, ax = plt.subplots()
sobel_brain = sobel(brain)
ax.matshow(sobel_brain[:, :, 10])


These functions are edge detectors! The _h and _v versions of this function emphasize specifically horizontal and vertical edges. These functions don’t work on 3-dimensional images because they don’t have a clear notion of “horizontal” and “vertical” associated with them.

#### 23.5.1.4. Solution Section 14.4.4.1¶

# Part 1
def morphological_white_tophat(image, selem=disk(7)):
return image - dilation(erosion(image, selem=selem), selem=selem)

def morphological_black_tophat(image, selem=disk(7)):
return image - erosion(dilation(image, selem=selem), selem=selem)

fig, ax = plt.subplots(1, 2)
ax[0].matshow(morphological_white_tophat(shepp_logan))
ax[1].matshow(morphological_black_tophat(shepp_logan))

fig, ax = plt.subplots(1, 2)
ax[0].matshow(morphological_white_tophat(shepp_logan, selem=disk(14)))
ax[1].matshow(morphological_black_tophat(shepp_logan, selem=disk(14)))

# Part 2
selem = disk(7)
fig, ax = plt.subplots(1, 2)
ax[0].matshow(brain[:, :, 10])
ax[1].matshow(dilation(erosion(brain[:, :, 10], selem=selem), selem=selem))

from skimage.morphology import ball
selem = ball(7)
fig, ax = plt.subplots(1, 2)
ax[0].matshow(brain[:, :, 10])
ax[1].matshow(dilation(erosion(brain, selem=selem), selem=selem)[:, :, 10])


### 23.5.2. Section 15¶

#### 23.5.2.1. Solution Section 15.1.1.1¶

# Part 1
min_intraclass_variance = np.inf

unique_vals = np.unique(slice10)

for ii in range(len(unique_vals)):
for jj in range(ii, len(unique_vals)):
candidate1 = unique_vals[ii]
candidate2 = unique_vals[jj]
foreground1 = slice10[slice10 < candidate1]
foreground2 = slice10[(slice10>=candidate1) & (slice10 < candidate2)]
background = slice10[slice10 >= candidate2]
if len(foreground1) and len(foreground2) and len(background):
foreground1_variance = np.var(foreground1) * len(foreground1)
foreground2_variance = np.var(foreground2) * len(foreground2)
background_variance =  np.var(background) * len(background)
intraclass_variance = (foreground1_variance +
foreground2_variance +
background_variance)
if intraclass_variance < min_intraclass_variance:
min_intraclass_variance = intraclass_variance
threshold1 = candidate1
threshold2 = candidate2

segmentation = np.zeros_like(slice10)
segmentation[(slice10 > threshold1) & (slice10 < threshold2)] = 1
fig, ax = plt.subplots()
ax.imshow(slice10, cmap="bone")
p = ax.imshow(segmentation, alpha=0.5)

from skimage.filters import threshold_multiotsu
threshold1, threshold2 = threshold_multiotsu(slice10)

segmentation = np.zeros_like(slice10)
segmentation[(slice10 > threshold1) & (slice10 < threshold2)] = 1
fig, ax = plt.subplots()
ax.imshow(slice10, cmap="bone")
p = ax.imshow(segmentation, alpha=0.5)

# Part 2
from skimage.filters import try_all_threshold
try_all_threshold(slice10)


The Minimum or Otsu methods both seem to be similarly good at doing this. One way to evaluate this objectively would be to look at the histogram of this slice across all time points in the BOLD time series and seeing that they all remain in a similar segment.

### 23.5.3. Section 16¶

#### 23.5.3.1. Solution Section 16.1.2.1¶

metric = CCMetric(3)
sdr = SymmetricDiffeomorphicRegistration(metric)

mapping = sdr.optimize(mni_data, t1_resamp_data, prealign=affine3d.affine)
t1_warped = mapping.transform(t1_resamp_data)

fig, axes = plt.subplots(1, 3, figsize=(8, 4))
ax = axes.ravel()

ax[0].imshow(mni_data[:, :, 85]/np.max(mni_data))
ax[1].imshow(t1_warped[:, :, 85]/np.max(t1_warped))

stereo = np.zeros((193, 229, 3), dtype=np.uint8)
stereo[..., 0] = 255 * mni_data[:, :, 85]/np.max(mni_data)
stereo[..., 1] = 255 * t1_warped[:, :, 85]/np.max(t1_warped)
ax[2].imshow(stereo)
fig.tight_layout()


Diffeomorphic registration substantially improves the registration to the template. There is a risk of warping the features substantially, to a point where they take on an unrealistic shape, similar to some of the distortions that happen in the photo.

## 23.6. Machine learning¶

### 23.6.1. Section 18¶

#### 23.6.1.1. Solution Section 18.2.3.2¶

# Part 1
from sklearn.metrics import mean_squared_error, mean_absolute_error

print(mean_squared_error(y, y_pred))
print(mean_absolute_error(y, y_pred))

# Part 2

import numpy as np
def max_absolute_error(y, y_pred):
return np.max(np.abs(y - y_pred))

print(max_absolute_error(y, y_pred))


### 23.6.2. Section 19¶

#### 23.6.2.1. Solution Section 19.1.1.1¶

est = LinearRegression()
est.fit(x[:, None], y)

test_x, test_y = make_xy(30)

x_range = np.linspace(test_x.min(), test_x.max(), 100)
reg_line = est.predict(x_range[:, None])

fig, ax = plt.subplots()
ax.scatter(test_x, test_y)
ax.plot(x_range, reg_line)

mse = mean_squared_error(y, est.predict(test_x[:, None]))
ax.set_title(f"Mean squared error: {mse:.2f}");


The error does increase when applied to new data, but this increase is much smaller than the increase observed for the 10th-order polynomial.

#### 23.6.2.3. Solution Section 20.1.3.1¶

We have higher out-of-sample accuracy because our training sample is larger, so the model has more data to learn from. The downside is that our evaluation dataset is smaller, so we get widely varying test accuracies (between 0.45 and 0.62) that depend on the characteristics of the test sample.

#### 23.6.2.4. Solution Section 20.1.5.1¶

y_adjusted = abide_data['age_resid']
r2_cv = cross_val_score(est, X, y_adjusted, cv=k)

print("Individual fold scores:", r2_cv)
print(f"\nMean cross-validated R^2: ", np.mean(r2_cv))


This result tells us that there is not much information about age, above and beyond the information about site.

#### 23.6.2.5. Solution Section 20.2.1.1¶

n_features = [5, 30, 100]
r2 = []
train_scores
test_scores
fig, axes = plt.subplots(1, 3)
for ii in range(len(feature_sets)):
ax = axes[ii]
X_subset = X.sample(n_features[ii], axis=1)
results = learning_curve(est, X_subset, y, train_sizes=train_sizes,
cv=5, shuffle=True)
sizes, train_scores, test_scores = results
train_mean = train_scores.mean(1)
test_mean = test_scores.mean(1)

ax.plot(sizes, train_mean, 'o-', label='Training')
ax.plot(sizes, test_mean, 'o-', label='Test');
ax.grid(True, axis='y')
ax.set_ylim(0, 1)
ax.set_xlabel('Sample size (n)')
ax.set_ylabel('$R^2$');
ax.set_title(f"{n_features[ii]} features used" )
fig.set_tight_layout("tight")


### 23.6.3. Section 21¶

#### 23.6.3.1. Solution Section 21.2.1.2¶

# Part 1
n_features = [10, 100, 400]
for nn in n_features:
X = scale(features.sample(nn, axis=1, random_state=99))
alpha = np.logspace(-3, 1, 100)
ax = plot_coef_path(Lasso, X, y, alpha, max_iter=2000)
ax.set_title(f"{nn} features")
ax.set_xlim(1e-3, 10)

# Part 2
y_resid = phenotypes['age_resid']
n_features = [10, 100, 400]
for nn in n_features:
X = scale(features.sample(nn, axis=1, random_state=99))
ax = plot_coef_path(Lasso, X, y_resid, alpha, max_iter=2000)
ax.set_title(f"{nn} features")
ax.set_xlim(1e-3, 10)


#### 23.6.3.2. Solution Section 21.2.1.5¶

from sklearn.model_selection import validation_curve, cross_val_score
from sklearn.linear_model import LinearRegression
from ndslib.viz import plot_train_test

x_range = np.logspace(-5, 5, 100)
train_scores, test_scores = validation_curve(
Ridge(), X, y, param_name='alpha',
param_range=x_range, cv=5, scoring='r2')

ols_r2 = cross_val_score(
LinearRegression(), X, y, scoring='r2', cv=5).mean()

ax = plot_train_test(
x_range, train_scores, test_scores, 'Ridge',
hlines={'OLS (test)': ols_r2})

ax.set_title("Age prediction performance")


### 23.6.4. Section 22¶

#### 23.6.4.1. Solution Section 22.1.1.1¶

x11 = 100
x12 = 40
w_2_11 = -2
w_2_21 = 3
w_2_12 = 0.5
w_2_22 = -1
x21 = w_2_11 * x11 + w_2_21 * x12
x22 = w_2_12 * x11 + w_2_22 * x12
w_3_11 = 1
w_3_21 = 2
x31 = w_3_11 * x21 + w_3_21 * x22
print(x31)


#### 23.6.4.2. Solution Section 22.1.2.1¶

x11 = 100
x12 = 40
w_2_11 = -2
w_2_21 = 3
w_2_12 = 0.5
w_2_22 = -1
x21 = relu(w_2_11 * x11 + w_2_21 * x12)
x22 = relu(w_2_12 * x11 + w_2_22 * x12)
w_3_11 = 1
w_3_21 = 2
x31 = relu(w_3_11 * x21 + w_3_21 * x22)
print(x31)


#### 23.6.4.3. Solution Section 22.2.1.1¶

x11 = 100
x12 = 40
w_2_11 = -2
w_2_21 = 3
w_2_12 = 0.5
w_2_22 = -1
x21 = relu(w_2_11 * x11 + w_2_21 * x12)
x22 = relu(w_2_12 * x11 + w_2_22 * x12)
w_3_11 = 1
w_3_21 = 2
x31 = relu(w_3_11 * x21 + w_3_21 * x22)
e31 = (x31 - 15)**2

iterations = 0

while e31 > 0.01:
iterations = iterations + 1
# This part is similar to the two-layer network
e_3_11 = x21 * d_relu(x31) * e31
e_3_21 = x22 * d_relu(x31) * e31

w_3_11 = w_3_11 - lr * e_3_11
w_3_21 = w_3_21 - lr * e_3_21

# Propagate the error to the second layer:
e21 = w_3_11 * e_3_11
e22 = w_3_21 * e_3_21

e_2_11 = x11 * d_relu(x21) * e21
e_2_21 = x12 * d_relu(x21) * e22
e_2_12 = x12 * d_relu(x22) * e21
e_2_22 = x12 * d_relu(x22) * e22

w_2_11 = w_2_11 - lr * e_2_11
w_2_21 = w_2_21 - lr * e_2_21
w_2_12 = w_2_12 - lr * e_2_11
w_2_22 = w_2_22 - lr * e_2_21

# Forward propagate through the network again with the same inputs
x21 = relu(w_2_11 * x11 + w_2_21 * x12)
x22 = relu(w_2_12 * x11 + w_2_22 * x12)
x31 = relu(w_3_11 * x21 + w_3_21 * x22)
e31 = (x31 - 15)**2

print(x31)
print(iterations)


#### 23.6.4.4. Solution Section 22.3.1.1¶

model = keras.Sequential(
[
keras.Input(shape=(X_train.shape[-1],)),
layers.Dense(8, activation="relu"),
layers.Dense(16, activation="relu"),
layers.Dense(32, activation="relu"),
layers.Dense(16, activation="relu"),
layers.Dense(3, activation="softmax"),
]
)

model.compile(loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, categorical_y_train, epochs=5, batch_size=5,
verbose=2)

y_predict = model.predict(X_test).argmax(axis=1)
print(accuracy_score(y_test, y_predict))

model.summary()


Increasing the number of layers and units can improve accuracy, but it also increases the number of parameters.

#### 23.6.4.5. Solution Section 22.4.1.1¶

import pandas as pd
import numpy as np
new_table = pd.merge(pd.DataFrame(data=dict(subject=subject)),
subject_table,
left_on="subject",
right_on="subject")
new_table.groupby("site")["group"].value_counts()

aut_in_kki = 22 / (67 + 22)
aut_in_ohsu = 36 / (53 + 36)
guess_with_site = np.mean([1-aut_in_kki, 1-aut_in_ohsu])
print(guess_with_site)

y_site = to_categorical(new_table["site"] == "ABIDEII-KKI_1")
X_train, X_test, y_train, y_test = train_test_split(
X, y_site, train_size=0.8, random_state=100)
history = model.fit(X_train, y_train, batch_size=5, epochs=5,
verbose=2)

model.evaluate(X_test, y_test, verbose=2)


Based on these results – chance performance given knowledge of site is 0.67 and accuracy of classifying the site is very high – we would have to conclude that the performance of the algorithm could be explained entirely by classifying the site.