# Understanding Numpy Arrays

In [4]:
# Numpy is a library for operating on multi-dimensional arrays
# It provides an ndarray object and functions to operate on it.

import numpy as np

In [5]:
# Numpy arrays have a fixed size and homogenous data type

x = np.array([1, 2, 3, 4, 5])
print(x)
print(x.shape, x.dtype)

[1 2 3 4 5]
(5,) int64


In [6]:
# When using mixed data to create an array, objects are up cast to a common data type

y = np.array([1.5, 2, 3, 4, 5])
print(y)
print(y.shape, y.dtype)

[1.5 2.  3.  4.  5. ]
(5,) float64


In [7]:
# The size attribute is the total number of elements in the array

z = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8)
print(z)
print(z.shape, z.size, z.dtype)

[[1 2 3]
 [4 5 6]]
(2, 3) 6 uint8


In [8]:
# Tabular data layout

num_samples = 100
num_features = 17

X = np.random.random((num_samples, num_features))
X.shape

(100, 17)

In [9]:
# Image data layout

num_rows = 256
num_cols = 512
num_channels = 3
num_images = 10

X = np.random.random((num_images, num_rows, num_cols, num_channels))
X.shape

(10, 256, 512, 3)

In [10]:
# Time series / NLP / Genomics layout
num_steps = 256
num_channels = 4
num_samples = 10

X = np.random.random((num_samples, num_steps, num_channels))
X.shape

(10, 256, 4)

# Basic Operations on Numpy Arrays

In [11]:
print(z)
print(z.shape)

[[1 2 3]
 [4 5 6]]
(2, 3)


In [12]:
# Numpy supports several mathematical operations on arrays
# You can see some of them here - https://numpy.org/doc/stable/reference/routines.math.html

z.sum()

21

In [13]:
# You can specify an axis along which operations should be performed

z.sum(axis=0)

array([5, 7, 9], dtype=uint64)

In [14]:
# You can convert arrays from one type to another

z.sum(axis=1).astype(np.uint8)

array([ 6, 15], dtype=uint8)

In [15]:
# You can index along any axis

v = z[0]
v

array([1, 2, 3], dtype=uint8)

In [16]:
# If you provide indexes for a single axis, it will return the sub array along that axis

v = z[1]
v

array([4, 5, 6], dtype=uint8)

In [17]:
# You can select individual elements by specifying indices for both axes

v = z[0, 1]
v

2

In [18]:
# You can slice along any axis

v = z[0, 1:]
v

array([2, 3], dtype=uint8)

In [19]:
# Slices return a view to the underlying storage of the array.

v += 10
v

array([12, 13], dtype=uint8)

In [20]:
# Modifying the view changes the original array as well

z

array([[ 1, 12, 13],
       [ 4,  5,  6]], dtype=uint8)

In [21]:
# If you want to preserve the original array, make a copy

c = z[0, 1:].copy()
c -= 10
c

array([2, 3], dtype=uint8)

In [22]:
# The original array is unchanged if you modify the copy

z

array([[ 1, 12, 13],
       [ 4,  5,  6]], dtype=uint8)

In [23]:
# Iterating over an array loops over elements in axis 0
# This example prints z[0] and z[1]

for row in z:
    print(row, row.shape) 

[ 1 12 13] (3,)
[4 5 6] (3,)


# Creating Arrays

In [24]:
# You can create arrays from lists or nested lists

z = np.array([[1, 2, 3], [4, 5, 6]])
z

array([[1, 2, 3],
       [4, 5, 6]])

In [25]:
# You can create a range using arange

x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [26]:
# You can optionally provide start, stop, and step parameters

x = np.arange(2, 10, 2)
x

array([2, 4, 6, 8])

In [27]:
# You can create an empty array with np.zeros

x = np.zeros((3, 4, 5))
print(x)
print(x.dtype)

[[[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0.]]]
float64


In [28]:
# You can create an array with a constant value using np.ones

x = np.ones((2, 3, 1), dtype=np.int32) * 255
x

array([[[255],
        [255],
        [255]],

       [[255],
        [255],
        [255]]], dtype=int32)

In [29]:
# You can create randomly initialized arrays

x = np.random.random((1, 10))
x

array([[0.93766989, 0.73087512, 0.7893487 , 0.8125977 , 0.8500206 ,
        0.97407856, 0.24994965, 0.81907807, 0.33642315, 0.16042002]])

# Reshaping Arrays

In [30]:
# Reshaping requires the size to be unchanged

x = np.arange(20).reshape(2, 2, 5)
x

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9]],

       [[10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]]])

In [31]:
x = np.arange(24).reshape(2, 3, 4)
x

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [32]:
# Numpy can infer one missing shape param if all the others are provided

x.reshape(2, -1)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]])

In [33]:
# This inference works along any axis

x.reshape(-1, 3)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20],
       [21, 22, 23]])

In [34]:
# Flatten converts an n-dimensional array to a flat 1D array

x.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [35]:
# flatten makes a copy of the array, but ravel returns a flat "view"

x.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [36]:
# The original array is unchanged if you change the flattened array

y = x.flatten()
y[0] = 9
x

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [37]:
# The original array is changed if you change the raveled array

y = x.ravel()
y[0] = 9
x

array([[[ 9,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [38]:
# expand_dims inserts new axes. The shape is 1 along these new axes
# You can specify where to insert the new axis with the axis parameter

a = np.arange(5)
b = np.expand_dims(a, axis=0)  # new axis inserted in the front
b.shape

(1, 5)

In [39]:
b = np.expand_dims(a, axis=-1)  # new axis inserted at the end
b.shape

(5, 1)

In [40]:
b = np.expand_dims(a, axis=(0, -1))  # new axis inserted at multiple locations simultaneously
b.shape

(1, 5, 1)

In [41]:
# You can remove extra axes with shape 1 using squeeze
# by default it removes all axes with shape 1

c = b.squeeze()
c.shape

(5,)

In [42]:
# You can specify which axis to remove by using the axis parameter

c = b.squeeze(axis=2)
c.shape

(1, 5)

In [43]:
c = b.squeeze(axis=0)
c.shape

(5, 1)

# Joining Arrays

In [44]:
# Concatenate joins a list of arrays along a specified axis (0 default)

a = np.arange(5)
b = np.ones(5)

x = np.concatenate([a, b])
x

array([0., 1., 2., 3., 4., 1., 1., 1., 1., 1.])

In [47]:
# You can concatenate arrays along an axis as long as the shape is identical along all the other axes

a = np.ones((3, 4))
b = np.zeros((3, 4))

x = np.concatenate([a, b])
print(x)
print(x.shape)

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
(6, 4)


In [48]:
# You can specify which axis you want to join along

x = np.concatenate([a, b], axis=1)
print(x)
print(x.shape)

[[1. 1. 1. 1. 0. 0. 0. 0.]
 [1. 1. 1. 1. 0. 0. 0. 0.]
 [1. 1. 1. 1. 0. 0. 0. 0.]]
(3, 8)


In [49]:
# stack is similar to concatenate but it creates a new axis and joins along that new axis

x = np.stack([a, b])
print(x)
print(x.shape)

[[[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]]
(2, 3, 4)


In [50]:
# You can specify where the new axis is to be inserted, but it's usually axis 0

x = np.stack([a, b], axis=-1)
print(x)
print(x.shape)

[[[1. 0.]
  [1. 0.]
  [1. 0.]
  [1. 0.]]

 [[1. 0.]
  [1. 0.]
  [1. 0.]
  [1. 0.]]

 [[1. 0.]
  [1. 0.]
  [1. 0.]
  [1. 0.]]]
(3, 4, 2)


# Broadcasting basics

In [51]:
# Numpy broadcasting enables you to perform operations with differently shaped arrays subject to some constraints
# https://numpy.org/doc/stable/user/basics.broadcasting.html

# multiplying an array with a scalar

a = np.ones(5)
a * 9

array([9., 9., 9., 9., 9.])

In [52]:
# Adding a scalar

a + 5

array([6., 6., 6., 6., 6.])

In [56]:
# Many arithmetic operations are applied element wise on a pair of arrays

b = np.arange(5)
a + b

array([1., 2., 3., 4., 5.])

In [57]:
# The shapes need to be compatible otherwise the operations fail

# b = np.arange(6)
a + b

array([1., 2., 3., 4., 5.])

In [58]:
# b is broadcast to shape(1, 3), then repeated 3 times along axis 0 (row)

a = np.ones((3, 3))
b = np.arange(3)
a + b

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [59]:
# b is of shape (3, 1) and is then repeated 3 times along axis 1 (column)

b = np.arange(3).reshape(3, 1)
a + b

array([[1., 1., 1.],
       [2., 2., 2.],
       [3., 3., 3.]])

In [60]:
# Here's an example of normalization. We compute the mean and standard deviation for each column in the matrix
# Usually, the axis along which the values are calculated is collapsed
# Specifying keepdims=True preserves this axis with a shape 1

x = np.random.random((10, 4))
mean = x.mean(axis=0, keepdims=True)
std = x.std(axis=0, keepdims=True)
mean.shape  # this is one row and 4 columns

(1, 4)

In [61]:
# mean and std are broadcast from (1, 4) to (10, 4).
# So from each row, we subtract the mean value of each column and divide by the std dev of that column.
# Note that with keepdims=False, the mean and std cannot be broadcast properly.
# In the result, you can see that each column in the result has 0 mean and 1 std.

y = (x - mean) / std
print(y.mean(axis=0))
print(y.std(axis=0))

[-8.88178420e-17  0.00000000e+00 -7.77156117e-17  3.99680289e-16]
[1. 1. 1. 1.]


# Advanced Indexing Examples

In [62]:
a = np.arange(25).reshape(5, 5)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [63]:
# Crops a sub region by leaving a border of size 1

a[1:-1, 1:-1]

array([[ 6,  7,  8],
       [11, 12, 13],
       [16, 17, 18]])

In [None]:
# Reverses the order of the columns

a[:, ::-1]

In [None]:
# all elements which are divisible by 2 are set to 0

a[a % 2 == 0] = 0
a

In [None]:
# Slice out every alternate column from rows 1 and 3

a[[1, 3], ::2]