Contents

Solutions to exercises

Contents

23. Solutions to exercises

23.1. Data science tools

23.1.1. Section 2

23.1.1.1. Section 2.1.1.1

$ touch new_file.txt

$ touch ~/Documents/new_file.txt

$ mv ~/new_file.txt ~/Documents/new_file.txt

The main difference between cp and mv is that after running cp a copy of the file would still exist in the original location, while after running mv there would be no copy remaining.

23.2. Programming

23.2.1. Section 5

23.2.1.1. Solution Section 5.2.3.3

print(type(number_of_subjects * number_of_timepoints))
print(type(number_of_timepoints / number_of_scans))

Python changes the results of division into a float because results of some division operations between integers are not integers themselves

23.2.1.2. Solution Section 5.2.3.5

my_string = "supercalifragilisticexpialidocious"
number_of_li = my_string.count("li")
print(number_of_li)

23.2.1.3. Solution Section 5.2.3.7

print(1 == True)
print(0 == False)

23.2.1.4. Solution Section 5.3.1.3

print(random_stuff[-1])
print(random_stuff[-4])
print(random_stuff[-len(random_stuff)])

23.2.1.5. Solution Section 5.3.1.7

list1 = [1, 2, 3]
list2 = [4, 5, 6]

list2.reverse()
list3 = list2[:-1] + list1
print(list3)

23.2.1.6. Solution Section 5.3.2.4

fruit_prices["pear"] = [3, 4, 5]
print(fruit_prices["pear"][1])

23.2.1.7. Solution Section 5.4.1.2

my_int = 6
print(type(my_int))
print(dir(my_int))
my_float = 6.0
print(type(my_float))
print(dir(my_float))

23.2.1.8. Solution Section 5.5.1.1

if mango < 0.5:
    print("Mangoes are super cheap; get a bunch of them!")
elif mango < 1.0:
    print("Get one mango from the store.")
elif mango > 2.0 and mango < 5.0:
    print("Mangos are expensive. Maybe just one")
else:
    print("Meh. I don't really even like mangoes.")

23.2.1.9. Solution Section 5.5.2.2

num_elems = len(random_stuff)

for i in range(num_elems):
    val = random_stuff[i]
    print(f"{i}, {i**2}")

23.2.1.10. Solution Section 5.5.4.1

list_with_squares = [(ii, ii ** 2) for ii in range(len(random_stuff))]
print(list_with_squares)

23.2.1.11. Solution Section 5.5.5.1

num = 800

if num > 500:
   if num < 900:
      if num > 700:
         print("Great number.")
      else:
         print("Terrible number.")
num = 800

if num > 500:
   if num < 900:
   if num > 700:
         print("Great number.")
      else:
         print("Terrible number.")

23.2.1.12. Solution Section 5.7.1.1

def create_sample(x, mu, sd, n):
    result = []
    for ii in range(n):
        result.append(add_noise(x, mu, sd))
    return result

sample = create_sample(4, 1, 2, 10)
print(sample)

23.2.1.13. Solution Section 5.7.2.4

a_list =  [1,2,3,4,5]
arg_printer(min, a_list)
arg_printer(len, a_list)

23.2.1.14. Solution Section 5.8.4.1

from math import pi

class Circle:

    def __init__(self, radius):
        self.radius = radius

    def area(self):
        return pi * self.radius**2

    def circumference(self):
        return 2 * pi * self.radius

class Square:
    def __init__(self, side):
        self.side = side

    def area(self):
        return self.side ** 2

    def circumference(self):
        return self.side * 4

23.2.1.15. Solution Section 5.8.8.1

class Square:
    def __init__(self, side):
        self.side = side

    def area(self):
        return self.side ** 2

    def circumference(self):
        return self.side * 4

    def __mul__(self, prey):
        new_area = self.area() + prey.area()
        self.side = sqrt(new_area)

s1 = Square(4)
s2 = Square(2)
print(s1.side)
print(s1.area())

print("Eat the other square")
s1 * s2

print(s1.side)
print(s1.area())

23.3. Scientific computing

23.3.1. Section 8

23.3.1.1. Solution Section 8.2.3.2

list_of_list_of_lists = [[[1, 1, 2, 3, 5], [8, 13, 21, 34, 55]],
                         [[2, 2, 4, 6, 10], [16, 26, 42, 68, 55]]]

array3d = np.array(list_of_list_of_lists)
print(array3d)
print(array3d[1, 0, 2])

23.3.1.2. Solution Section 8.2.3.4

The first item in the strides for the first array represents the 10 items in the second dimension of the array that you would have to pass through to get to the first item in the second row (times 8 bytes per item). The first item in the strides for the second array represents the 4 items (in the second dimension) times 8 items (in the third dimension) that you would to pass in order to get to the first item in the item along each of these dimensions (times 8 bytes per item).

23.3.1.3. Solution Section 8.2.8.1

bold.std()
bold[(bold >=1) & (bold<100)].std()

23.3.1.4. Solution Section 8.2.9.1

solution1 = np.arange(1, 100, 2)
solution2 = np.ones((3, 5))
array_with_numbers = np.arange(150)
solution3 = (array_with_numbers > 0) & (array_with_numbers <= 100)
solution4 = np.mod(array_with_numbers, 3) == 0
solution5 = np.random.random_sample(100) * 10

23.3.1.5. Solution Section 8.2.10.1

import scipy.interpolate as spi
x = np.arange(0, 360, 2)
interp = spi.interp1d(x, bold[32, 32, 12])
new_x = np.arange(0, 358, 0.5)
high_res = interp(new_x)

23.3.2. Section 9

23.3.2.1. Solution Section 9.3.1.1

subjects["IQ_sub_diff"] = subjects["IQ_Vocab"] - subjects["IQ_Matrix"]

23.3.2.2. Solution Section 9.3.2.1

new_df = subjects[subjects["IQ"].notnull()]
subjects.dropna(inplace=True)

23.3.2.3. Solution Section 9.3.4.1

# One way of doing this:
subjects.groupby(["Handedness", "Gender", "age_less_than_10"]).mean()

# Another way of doing this:
subjects[(subjects["Handedness"] == "Right") &
         (subjects["Gender"] == "Male") &
         (~subjects["age_less_than_10"])].mean()

23.3.2.4. Solution Section 9.4.1.1

# Part 1
gender_groups_mean = joined.groupby(["Gender", "tractID", "nodeID"]).mean()
male_means = gender_groups_mean.loc[("Male", "Left Cingulum Cingulate")]
female_means = gender_groups_mean.loc[("Female", "Left Cingulum Cingulate")]

fig, ax = plt.subplots()
ax.plot(male_means["fa"])
ax.plot(female_means["fa"])
ax.set_xlabel("Node")
ax.set_ylabel("Fractional anisotropy")

# Part 2

for tract in joined["tractID"].unique():
    fig, ax = plt.subplots()
    ax.plot(group_means.loc[(False, tract)]["fa"])
    ax.plot(group_means.loc[(True, tract)]["fa"])
    ax.set_xlabel("Node")
    ax.set_ylabel("Fractional anisotropy")

23.3.3. Section 10

23.3.3.1. Solution Section 10.1.1.1

fig, ax = plt.subplots()
ax.plot(trial,
        first_block,
        marker='o',
        linestyle='--',
        label="First block",
        color="lightseagreen")

ax.plot(trial,
        middle_block,
        marker='v',
        linestyle='--',
        label="Middle block",
        color="palegoldenrod")

ax.plot(trial,
        last_block,
        marker='^',
        linestyle='--',
        label="Last block",
        color="firebrick")

ax.set_xlabel("Trials")
ax.set_ylabel("Percent correct")
ax.legend()
title = ax.set_title("Harlow, 1949")

23.3.3.2. Solution Section 10.1.2.1

fig, ax = plt.subplots(5, 4)
min_fa = min([younger_fa.min(), older_fa.min()])
max_fa = max([younger_fa.max(), older_fa.max()])

for tract_idx in range(20):
    pathway = tracts[tract_idx]
    ax.flat[tract_idx].plot(younger_fa[pathway], color="C0")
    ax.flat[tract_idx].plot(older_fa[pathway], color="C1")
    ax.flat[tract_idx].set_ylim([min_fa, max_fa])
    ax.flat[tract_idx].set_title(pathway)
    ax.flat[tract_idx].axis("off")


fig.set_tight_layout("tight")
fig.set_size_inches([10, 8])

ax[4, 0].set_xlabel("Node")
label = ax[4, 0].set_ylabel("Fractional Anisotropy")

23.3.3.3. Solution Section 10.3.1.1

max_bold = bold.max()
min_bold = bold.min()
fig, ax = plt.subplots(3, 6)
for frame_idx in range(18):
    ax.flat[frame_idx].matshow(
        bold[:, :, 10,
        frame_idx * 10],
        cmap="bone",
        vmax=max_bold,
        vmin=min_bold)
    ax.flat[frame_idx].axis("off")
fig.set_tight_layout("tight")

23.4. Neuroimaging in Python

23.4.1. Section 11.1

23.4.1.1. Solution Section 11.2.2.4

layout = BIDSLayout("path/to/bids-examples/ds011")
print(layout)
bold_files = layout.get(suffix="bold", return_type='filename', extension="nii.gz")
trs = [layout.get_metadata(ff)["RepetitionTime"] for ff in bold_files]
print(trs)

23.4.2. Section 12

23.4.2.1. Solution Section 12.1.1

The results are different, but only by a very small amount. In fact, they are identical if voxels with standard deviation smaller than 0.001 are excluded from the final calculation:

mean = np.mean(data_bold, -1)
std = np.std(data, -1)
tsnr_numpy = np.zeros(mean.shape)
std_idx = std > 1.0e-3
tsnr_numpy[std_idx] = mean[std_idx] / std[std_idx])

23.4.2.2. Section 12.1.2

MRIQC SNR is larger than the one calculated before. One reason this could be is that it is correcting the data for motion-related artifacts before calculating SNR.

23.4.3. Section 13

23.4.3.1. Solution Section 13.1.1

This is [-1, 1, 1]

23.4.3.2. Solution Section 13.1.2

The first dimension increases from front to back (P), the second dimension increases from inferior to superior (S) the last dimension increases either from right-to-left (L) or from left-to-right (R), so it’s either LPS or RPS.

23.4.3.3. Solution Section 13.1.3

\(C_{2, 1} = A_{2, 1} B_{1, 1} + A_{2, 2} B_{2, 1} + A_{2, 3} B_{3, 1} = 4 \cdot 7 + 5 \cdot 9 + 6 \cdot 11 = 139\)

\(C_{2, 2} = A_{2,1} B_{1,2} + A_{2, 2} B_{2, 2} + A_{2, 3} B_{3, 2} = 4 \cdot 8 + 5 \cdot 10 + 6 \cdot 12 = 154\)

def matmul(a, b):
    result = np.zeros((a.shape[0], b.shape[1]))
    for ii in range(result.shape[0]):
        for jj in range(result.shape[1]):
            for kk in range(a.shape[1]):
                result[ii, jj] = result[ii, jj] + a[ii, kk] * b[kk, jj]

    return result

23.4.3.4. Solution Section 13.3.2.1

print(affine_bold @ np.array([data_bold.shape[0],
                              data_bold.shape[1],
                              data_bold.shape[2], 1]))

23.4.3.5. Solution Section 13.3.3.1

The rotation matrix is defined as:

\[\begin{split} A_{\theta} = \begin{bmatrix} cos(\theta) & -sin(\theta) \\ sin(\theta) & cos(\theta) \end{bmatrix} \end{split}\]

And its inverse is:

\[\begin{split} A_{\theta^{-1}} = \begin{bmatrix} cos(-\theta) & -sin(-\theta) \\ sin(-\theta) & cos(-\theta) \end{bmatrix} \end{split}\]

The transpose of this matrix is the matrix where columns and rows have been switched:

\[\begin{split} A_{\theta} = \begin{bmatrix} cos(\theta) & sin(\theta) \\ -sin(\theta) & cos(\theta) \end{bmatrix} \end{split}\]

But \(cos(\theta) = cos{-\theta}\) and \(-sin(\theta)\) is \(sin(-\theta)\) so, replacing this in this is also:

\[\begin{split} A_{\theta} = \begin{bmatrix} cos(-\theta) & sin(\theta) \\ sin(-\theta) & cos(-\theta) \end{bmatrix}\end{split}\]

For the same reason, $sin(\theta) = -sin(-\theta), so replacing that in the first row of the matrix yields the inverse as above.

23.4.3.6. Solution Section 13.3.4.1

# Part 1:
img_bold0 = nib.Nifti1Image(data_bold[:, :, :, 0], img_bold.affine)
img_bold0_resampled = resample_from_to(img_bold0, (img_t1.shape, img_t1.affine))

data_bold0_resampled = img_bold0_resampled.get_fdata()

fig, ax = plt.subplots(1, 2)
ax[0].matshow(data_t1[:, :, data_t1.shape[-1]//2])
im = ax[1].matshow(data_bold0_resampled[:, :, data_bold0_resampled.shape[-1]//2])

Part 2: We’d expect more loss of information going from the T1-weighted data to the BOLD data than vice versa. However, this is exclusively because of loss of information in the T1-weighted data. There is no information in the BOLD data beyond the resolution at which it was originally sampled.

23.5. Image processing

23.5.1. Section 14

23.5.1.1. Solution Section 14.4.1.1

print(gray_img[200, 70])
print(gray_img[400, 200])

The values of these pixels are relatively low (less than 0.5). This is because they have high values in the red and blue channels and these are weighted much lower in the conversion to gray scale than the values of the green channel.

23.5.1.2. Solution Section 14.4.2.1

mri_img = np.random.randn(100, 100, 100)

kernel = np.ones((20, 20, 20))
result = np.zeros(mri_img.shape)
padded_mri_img = np.pad(mri_img, int(kernel.shape[0] / 2))

for ii in range(result.shape[0]):
    for jj in range(result.shape[1]):
        for kk in range(result.shape[2]):
            neighborhood = padded_mri_img[ii:ii+kernel.shape[0],
                                          jj:jj+kernel.shape[1],
                                          kk:kk+kernel.shape[2]]
            weighted_neighborhood = neighborhood * kernel
            conv_voxel = np.sum(weighted_neighborhood)
            result[ii, jj, kk] = conv_voxel

23.5.1.3. Solution Section 14.4.3.1

from skimage.filters import sobel, sobel_h, sobel_v
fig, ax = plt.subplots(1, 3)
sobel_collins = sobel(gray_img)
ax[0].matshow(sobel_collins)
sobel_v_collins = sobel_v(gray_img)
ax[1].matshow(sobel_v_collins)
sobel_h_collins = sobel_h(gray_img)
ax[2].matshow(sobel_h_collins)
fig, ax = plt.subplots()
sobel_brain = sobel(brain)
ax.matshow(sobel_brain[:, :, 10])

These functions are edge detectors! The _h and _v versions of this function emphasize specifically horizontal and vertical edges. These functions don’t work on 3-dimensional images because they don’t have a clear notion of “horizontal” and “vertical” associated with them.

23.5.1.4. Solution Section 14.4.4.1

# Part 1
def morphological_white_tophat(image, selem=disk(7)):
    return image - dilation(erosion(image, selem=selem), selem=selem)

def morphological_black_tophat(image, selem=disk(7)):
    return image - erosion(dilation(image, selem=selem), selem=selem)

fig, ax = plt.subplots(1, 2)
ax[0].matshow(morphological_white_tophat(shepp_logan))
ax[1].matshow(morphological_black_tophat(shepp_logan))

fig, ax = plt.subplots(1, 2)
ax[0].matshow(morphological_white_tophat(shepp_logan, selem=disk(14)))
ax[1].matshow(morphological_black_tophat(shepp_logan, selem=disk(14)))


# Part 2
selem = disk(7)
fig, ax = plt.subplots(1, 2)
ax[0].matshow(brain[:, :, 10])
ax[1].matshow(dilation(erosion(brain[:, :, 10], selem=selem), selem=selem))


from skimage.morphology import ball
selem = ball(7)
fig, ax = plt.subplots(1, 2)
ax[0].matshow(brain[:, :, 10])
ax[1].matshow(dilation(erosion(brain, selem=selem), selem=selem)[:, :, 10])

23.5.2. Section 15

23.5.2.1. Solution Section 15.1.1.1

# Part 1
min_intraclass_variance = np.inf

unique_vals = np.unique(slice10)

for ii in range(len(unique_vals)):
    for jj in range(ii, len(unique_vals)):
        candidate1 = unique_vals[ii]
        candidate2 = unique_vals[jj]
        foreground1 = slice10[slice10 < candidate1]
        foreground2 = slice10[(slice10>=candidate1) & (slice10 < candidate2)]
        background = slice10[slice10 >= candidate2]
        if len(foreground1) and len(foreground2) and len(background):
            foreground1_variance = np.var(foreground1) * len(foreground1)
            foreground2_variance = np.var(foreground2) * len(foreground2)
            background_variance =  np.var(background) * len(background)
            intraclass_variance = (foreground1_variance +
                                   foreground2_variance +
                                   background_variance)
            if intraclass_variance < min_intraclass_variance:
                min_intraclass_variance = intraclass_variance
                threshold1 = candidate1
                threshold2 = candidate2

segmentation = np.zeros_like(slice10)
segmentation[(slice10 > threshold1) & (slice10 < threshold2)] = 1
fig, ax = plt.subplots()
ax.imshow(slice10, cmap="bone")
p = ax.imshow(segmentation, alpha=0.5)


from skimage.filters import threshold_multiotsu
threshold1, threshold2 = threshold_multiotsu(slice10)

segmentation = np.zeros_like(slice10)
segmentation[(slice10 > threshold1) & (slice10 < threshold2)] = 1
fig, ax = plt.subplots()
ax.imshow(slice10, cmap="bone")
p = ax.imshow(segmentation, alpha=0.5)

# Part 2
from skimage.filters import try_all_threshold
try_all_threshold(slice10)

The Minimum or Otsu methods both seem to be similarly good at doing this. One way to evaluate this objectively would be to look at the histogram of this slice across all time points in the BOLD time series and seeing that they all remain in a similar segment.

23.5.3. Section 16

23.5.3.1. Solution Section 16.1.2.1

metric = CCMetric(3)
sdr = SymmetricDiffeomorphicRegistration(metric)

mapping = sdr.optimize(mni_data, t1_resamp_data, prealign=affine3d.affine)
t1_warped = mapping.transform(t1_resamp_data)

fig, axes = plt.subplots(1, 3, figsize=(8, 4))
ax = axes.ravel()

ax[0].imshow(mni_data[:, :, 85]/np.max(mni_data))
ax[1].imshow(t1_warped[:, :, 85]/np.max(t1_warped))

stereo = np.zeros((193, 229, 3), dtype=np.uint8)
stereo[..., 0] = 255 * mni_data[:, :, 85]/np.max(mni_data)
stereo[..., 1] = 255 * t1_warped[:, :, 85]/np.max(t1_warped)
ax[2].imshow(stereo)
fig.tight_layout()

Diffeomorphic registration substantially improves the registration to the template. There is a risk of warping the features substantially, to a point where they take on an unrealistic shape, similar to some of the distortions that happen in the photo.

23.6. Machine learning

23.6.1. Section 18

23.6.1.1. Solution Section 18.2.3.2

# Part 1
from sklearn.metrics import mean_squared_error, mean_absolute_error

print(mean_squared_error(y, y_pred))
print(mean_absolute_error(y, y_pred))

# Part 2

import numpy as np
def max_absolute_error(y, y_pred):
    return np.max(np.abs(y - y_pred))

print(max_absolute_error(y, y_pred))

23.6.2. Section 19

23.6.2.1. Solution Section 19.1.1.1

est = LinearRegression()
est.fit(x[:, None], y)

test_x, test_y = make_xy(30)

x_range = np.linspace(test_x.min(), test_x.max(), 100)
reg_line = est.predict(x_range[:, None])

fig, ax = plt.subplots()
ax.scatter(test_x, test_y)
ax.plot(x_range, reg_line)

mse = mean_squared_error(y, est.predict(test_x[:, None]))
ax.set_title(f"Mean squared error: {mse:.2f}");

The error does increase when applied to new data, but this increase is much smaller than the increase observed for the 10th-order polynomial.

23.6.2.2. Section 20

23.6.2.3. Solution Section 20.1.3.1

We have higher out-of-sample accuracy because our training sample is larger, so the model has more data to learn from. The downside is that our evaluation dataset is smaller, so we get widely varying test accuracies (between 0.45 and 0.62) that depend on the characteristics of the test sample.

23.6.2.4. Solution Section 20.1.5.1

y_adjusted = abide_data['age_resid']
r2_cv = cross_val_score(est, X, y_adjusted, cv=k)

print("Individual fold scores:", r2_cv)
print(f"\nMean cross-validated R^2: ", np.mean(r2_cv))

This result tells us that there is not much information about age, above and beyond the information about site.

23.6.2.5. Solution Section 20.2.1.1

n_features = [5, 30, 100]
r2 = []
train_scores
test_scores
fig, axes = plt.subplots(1, 3)
for ii in range(len(feature_sets)):
    ax = axes[ii]
    X_subset = X.sample(n_features[ii], axis=1)
    results = learning_curve(est, X_subset, y, train_sizes=train_sizes,
                         cv=5, shuffle=True)
    sizes, train_scores, test_scores = results
    train_mean = train_scores.mean(1)
    test_mean = test_scores.mean(1)

    ax.plot(sizes, train_mean, 'o-', label='Training')
    ax.plot(sizes, test_mean, 'o-', label='Test');
    ax.grid(True, axis='y')
    ax.set_ylim(0, 1)
    ax.set_xlabel('Sample size (n)')
    ax.set_ylabel('$R^2$');
    ax.set_title(f"{n_features[ii]} features used" )
fig.set_tight_layout("tight")

23.6.3. Section 21

23.6.3.1. Solution Section 21.2.1.2

# Part 1
n_features = [10, 100, 400]
for nn in n_features:
    X = scale(features.sample(nn, axis=1, random_state=99))
    alpha = np.logspace(-3, 1, 100)
    ax = plot_coef_path(Lasso, X, y, alpha, max_iter=2000)
    ax.set_title(f"{nn} features")
    ax.set_xlim(1e-3, 10)

# Part 2
y_resid = phenotypes['age_resid']
n_features = [10, 100, 400]
for nn in n_features:
    X = scale(features.sample(nn, axis=1, random_state=99))
    ax = plot_coef_path(Lasso, X, y_resid, alpha, max_iter=2000)
    ax.set_title(f"{nn} features")
    ax.set_xlim(1e-3, 10)

23.6.3.2. Solution Section 21.2.1.5

from sklearn.model_selection import validation_curve, cross_val_score
from sklearn.linear_model import LinearRegression
from ndslib.viz import plot_train_test

x_range = np.logspace(-5, 5, 100)
train_scores, test_scores = validation_curve(
   Ridge(), X, y, param_name='alpha',
   param_range=x_range, cv=5, scoring='r2')

ols_r2 = cross_val_score(
   LinearRegression(), X, y, scoring='r2', cv=5).mean()

ax = plot_train_test(
   x_range, train_scores, test_scores, 'Ridge',
   hlines={'OLS (test)': ols_r2})

ax.set_title("Age prediction performance")

23.6.4. Section 22

23.6.4.1. Solution Section 22.1.1.1

x11 = 100
x12 = 40
w_2_11 = -2
w_2_21 = 3
w_2_12 = 0.5
w_2_22 = -1
x21 = w_2_11 * x11 + w_2_21 * x12
x22 = w_2_12 * x11 + w_2_22 * x12
w_3_11 = 1
w_3_21 = 2
x31 = w_3_11 * x21 + w_3_21 * x22
print(x31)

23.6.4.2. Solution Section 22.1.2.1

x11 = 100
x12 = 40
w_2_11 = -2
w_2_21 = 3
w_2_12 = 0.5
w_2_22 = -1
x21 = relu(w_2_11 * x11 + w_2_21 * x12)
x22 = relu(w_2_12 * x11 + w_2_22 * x12)
w_3_11 = 1
w_3_21 = 2
x31 = relu(w_3_11 * x21 + w_3_21 * x22)
print(x31)

23.6.4.3. Solution Section 22.2.1.1

x11 = 100
x12 = 40
w_2_11 = -2
w_2_21 = 3
w_2_12 = 0.5
w_2_22 = -1
x21 = relu(w_2_11 * x11 + w_2_21 * x12)
x22 = relu(w_2_12 * x11 + w_2_22 * x12)
w_3_11 = 1
w_3_21 = 2
x31 = relu(w_3_11 * x21 + w_3_21 * x22)
e31 = (x31 - 15)**2

iterations = 0

while e31 > 0.01:
    iterations = iterations + 1
    # This part is similar to the two-layer network
    e_3_11 = x21 * d_relu(x31) * e31
    e_3_21 = x22 * d_relu(x31) * e31

    w_3_11 = w_3_11 - lr * e_3_11
    w_3_21 = w_3_21 - lr * e_3_21

    # Propagate the error to the second layer:
    e21 = w_3_11 * e_3_11
    e22 = w_3_21 * e_3_21

    e_2_11 = x11 * d_relu(x21) * e21
    e_2_21 = x12 * d_relu(x21) * e22
    e_2_12 = x12 * d_relu(x22) * e21
    e_2_22 = x12 * d_relu(x22) * e22

    w_2_11 = w_2_11 - lr * e_2_11
    w_2_21 = w_2_21 - lr * e_2_21
    w_2_12 = w_2_12 - lr * e_2_11
    w_2_22 = w_2_22 - lr * e_2_21

    # Forward propagate through the network again with the same inputs
    x21 = relu(w_2_11 * x11 + w_2_21 * x12)
    x22 = relu(w_2_12 * x11 + w_2_22 * x12)
    x31 = relu(w_3_11 * x21 + w_3_21 * x22)
    e31 = (x31 - 15)**2

print(x31)
print(iterations)

23.6.4.4. Solution Section 22.3.1.1

model = keras.Sequential(
    [
        keras.Input(shape=(X_train.shape[-1],)),
        layers.Dense(8, activation="relu"),
        layers.Dense(16, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(16, activation="relu"),
        layers.Dense(3, activation="softmax"),
    ]
)

model.compile(loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, categorical_y_train, epochs=5, batch_size=5,
                    verbose=2)

y_predict = model.predict(X_test).argmax(axis=1)
print(accuracy_score(y_test, y_predict))

model.summary()

Increasing the number of layers and units can improve accuracy, but it also increases the number of parameters.

23.6.4.5. Solution Section 22.4.1.1

import pandas as pd
import numpy as np
subject_table = load_data("abide2")
new_table = pd.merge(pd.DataFrame(data=dict(subject=subject)),
                     subject_table,
                     left_on="subject",
                     right_on="subject")
new_table.groupby("site")["group"].value_counts()

aut_in_kki = 22 / (67 + 22)
aut_in_ohsu = 36 / (53 + 36)
guess_with_site = np.mean([1-aut_in_kki, 1-aut_in_ohsu])
print(guess_with_site)

y_site = to_categorical(new_table["site"] == "ABIDEII-KKI_1")
X_train, X_test, y_train, y_test = train_test_split(
    X, y_site, train_size=0.8, random_state=100)
history = model.fit(X_train, y_train, batch_size=5, epochs=5,
                    verbose=2)

model.evaluate(X_test, y_test, verbose=2)

Based on these results – chance performance given knowledge of site is 0.67 and accuracy of classifying the site is very high – we would have to conclude that the performance of the algorithm could be explained entirely by classifying the site.