Skip to main content

Module 25 - NumPy Basics

NumPy (Numerical Python) is the foundational library for scientific computing in Python, providing support for large multi-dimensional arrays and mathematical functions.


1. Introduction to NumPy

Why NumPy?

# Python lists are slow for numerical operations
python_list = list(range(1000000))
# NumPy arrays are 10-100x faster!

import numpy as np
numpy_array = np.arange(1000000)

Installation

pip install numpy

Key Features

✅ Fast, efficient arrays
✅ Broadcasting operations
✅ Linear algebra functions
✅ Random number generation
✅ Fourier transforms
✅ Integration with C/C++/Fortran


2. Creating NumPy Arrays

2.1 From Python Lists

import numpy as np

# 1D array
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1) # [1 2 3 4 5]
print(type(arr1)) # <class 'numpy.ndarray'>

# 2D array
arr2 = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2)
# [[1 2 3]
# [4 5 6]]

# 3D array
arr3 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(arr3.shape) # (2, 2, 2)

2.2 Built-in Array Creation Functions

import numpy as np

# Array of zeros
zeros = np.zeros((3, 4)) # 3x4 array of zeros
print(zeros)

# Array of ones
ones = np.ones((2, 3))
print(ones)

# Array with specific value
full = np.full((2, 2), 7) # 2x2 array filled with 7
print(full)

# Identity matrix
identity = np.eye(3) # 3x3 identity matrix
print(identity)

# Range of values
arange = np.arange(0, 10, 2) # [0, 2, 4, 6, 8]
print(arange)

# Evenly spaced values
linspace = np.linspace(0, 1, 5) # 5 values from 0 to 1
print(linspace) # [0. 0.25 0.5 0.75 1. ]

# Random arrays
random = np.random.rand(3, 3) # 3x3 random values [0, 1)
print(random)

random_int = np.random.randint(0, 10, size=(2, 3)) # Random integers
print(random_int)

3. Array Attributes

import numpy as np

arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])

print(arr.shape) # (2, 4) - dimensions
print(arr.ndim) # 2 - number of dimensions
print(arr.size) # 8 - total elements
print(arr.dtype) # dtype('int64') - data type
print(arr.itemsize) # 8 - size of each element in bytes
print(arr.nbytes) # 64 - total bytes

4. Array Operations

4.1 Arithmetic Operations

import numpy as np

arr1 = np.array([1, 2, 3, 4])
arr2 = np.array([10, 20, 30, 40])

# Element-wise operations
print(arr1 + arr2) # [11 22 33 44]
print(arr1 - arr2) # [-9 -18 -27 -36]
print(arr1 * arr2) # [10 40 90 160]
print(arr1 / arr2) # [0.1 0.1 0.1 0.1]
print(arr1 ** 2) # [1 4 9 16]

# Scalar operations
print(arr1 + 10) # [11 12 13 14]
print(arr1 * 2) # [2 4 6 8]

# Matrix multiplication
mat1 = np.array([[1, 2], [3, 4]])
mat2 = np.array([[5, 6], [7, 8]])
result = np.dot(mat1, mat2) # or mat1 @ mat2
print(result)
# [[19 22]
# [43 50]]

4.2 Comparison Operations

import numpy as np

arr = np.array([1, 2, 3, 4, 5])

print(arr > 3) # [False False False True True]
print(arr == 3) # [False False True False False]
print(arr % 2 == 0) # [False True False True False]

# Boolean indexing
even_values = arr[arr % 2 == 0]
print(even_values) # [2 4]

5. Indexing and Slicing

5.1 Basic Indexing

import numpy as np

arr = np.array([10, 20, 30, 40, 50])

print(arr[0]) # 10 - first element
print(arr[-1]) # 50 - last element
print(arr[1:4]) # [20 30 40] - slicing

# 2D array
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

print(arr2d[0, 0]) # 1
print(arr2d[1, 2]) # 6
print(arr2d[0]) # [1 2 3] - first row
print(arr2d[:, 0]) # [1 4 7] - first column
print(arr2d[0:2, 1:3]) # [[2 3]
# [5 6]]

5.2 Boolean Indexing

import numpy as np

arr = np.array([1, 2, 3, 4, 5, 6])

# Get values greater than 3
mask = arr > 3
print(mask) # [False False False True True True]
print(arr[mask]) # [4 5 6]

# Directly
print(arr[arr > 3]) # [4 5 6]

# Multiple conditions
print(arr[(arr > 2) & (arr < 5)]) # [3 4]

5.3 Fancy Indexing

import numpy as np

arr = np.array([10, 20, 30, 40, 50])

# Index with array of indices
indices = [0, 2, 4]
print(arr[indices]) # [10 30 50]

# 2D array
arr2d = np.array([[1, 2], [3, 4], [5, 6]])
print(arr2d[[0, 2]]) # [[1 2]
# [5 6]]

6. Array Manipulation

6.1 Reshaping

import numpy as np

arr = np.arange(12) # [0 1 2 3 4 5 6 7 8 9 10 11]

# Reshape to 2D
reshaped = arr.reshape(3, 4)
print(reshaped)
# [[ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]]

# Reshape to 3D
reshaped3d = arr.reshape(2, 3, 2)
print(reshaped3d.shape) # (2, 3, 2)

# Flatten array
flattened = reshaped.flatten()
print(flattened) # [0 1 2 3 4 5 6 7 8 9 10 11]

# Ravel (like flatten but returns view)
raveled = reshaped.ravel()

6.2 Stacking and Splitting

import numpy as np

arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])

# Vertical stack
vstack = np.vstack([arr1, arr2])
print(vstack)
# [[1 2 3]
# [4 5 6]]

# Horizontal stack
hstack = np.hstack([arr1, arr2])
print(hstack) # [1 2 3 4 5 6]

# Split array
arr = np.arange(9)
split = np.split(arr, 3) # Split into 3 equal parts
print(split) # [array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8])]

7. Mathematical Functions

import numpy as np

arr = np.array([1, 2, 3, 4, 5])

# Basic stats
print(np.sum(arr)) # 15
print(np.mean(arr)) # 3.0
print(np.median(arr)) # 3.0
print(np.std(arr)) # 1.4142... (standard deviation)
print(np.var(arr)) # 2.0 (variance)
print(np.min(arr)) # 1
print(np.max(arr)) # 5

# Math operations
print(np.sqrt(arr)) # [1. 1.41421356 1.73205081 2. 2.23606798]
print(np.exp(arr)) # [2.71828183e+00 7.38905610e+00 ... ]
print(np.log(arr)) # [0. 0.69314718 1.09861229 1.38629436 1.60943791]
print(np.sin(arr)) # Sine
print(np.cos(arr)) # Cosine

# Cumulative operations
print(np.cumsum(arr)) # [ 1 3 6 10 15]
print(np.cumprod(arr)) # [ 1 2 6 24 120]

# Axis-based operations
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
print(np.sum(arr2d, axis=0)) # [5 7 9] - sum columns
print(np.sum(arr2d, axis=1)) # [ 6 15] - sum rows

8. Broadcasting

Broadcasting allows NumPy to perform operations on arrays of different shapes.

import numpy as np

# Scalar broadcasting
arr = np.array([1, 2, 3])
result = arr + 10 # [11 12 13]

# 1D + 2D broadcasting
arr1d = np.array([1, 2, 3])
arr2d = np.array([[10], [20], [30]])

result = arr1d + arr2d
print(result)
# [[11 12 13]
# [21 22 23]
# [31 32 33]]

# Broadcasting rules example
arr1 = np.ones((3, 4))
arr2 = np.arange(4)
result = arr1 + arr2 # Works! arr2 broadcasted to (3, 4)

9. Practical Examples

9.1 Data Normalization

import numpy as np

# Normalize data to [0, 1] range
data = np.array([10, 20, 30, 40, 50])

normalized = (data - data.min()) / (data.max() - data.min())
print(normalized) # [0. 0.25 0.5 0.75 1. ]

# Standardization (mean=0, std=1)
standardized = (data - data.mean()) / data.std()
print(standardized) # [-1.41 -0.71 0. 0.71 1.41]

9.2 Distance Calculation

import numpy as np

# Euclidean distance between points
point1 = np.array([0, 0])
point2 = np.array([3, 4])

distance = np.linalg.norm(point2 - point1)
print(f"Distance: {distance}") # 5.0

9.3 Image Processing

import numpy as np

# Simulate grayscale image (100x100 pixels)
image = np.random.randint(0, 256, size=(100, 100))

# Brighten image
brightened = np.clip(image + 50, 0, 255)

# Apply threshold
binary = (image > 128).astype(int) * 255

# Flip image
flipped = np.flip(image, axis=0) # Vertical flip

print(f"Original range: [{image.min()}, {image.max()}]")
print(f"Brightened range: [{brightened.min()}, {brightened.max()}]")

10. Performance Comparison

import numpy as np
import time

size = 1000000

# Python list
python_list = list(range(size))
start = time.time()
result = [x * 2 for x in python_list]
python_time = time.time() - start

# NumPy array
numpy_array = np.arange(size)
start = time.time()
result = numpy_array * 2
numpy_time = time.time() - start

print(f"Python list time: {python_time:.4f}s")
print(f"NumPy array time: {numpy_time:.4f}s")
print(f"NumPy is {python_time / numpy_time:.1f}x faster!")

11. Common Functions Reference

FunctionDescriptionExample
np.array()Create arraynp.array([1, 2, 3])
np.arange()Range of valuesnp.arange(0, 10, 2)
np.linspace()Evenly spacednp.linspace(0, 1, 5)
np.zeros()Array of zerosnp.zeros((3, 4))
np.ones()Array of onesnp.ones((2, 3))
np.random.rand()Random [0,1)np.random.rand(3, 3)
np.reshape()Change shapearr.reshape(2, 3)
np.sum()Sum elementsnp.sum(arr)
np.mean()Mean valuenp.mean(arr)
np.dot()Dot productnp.dot(a, b)

Summary

✅ NumPy provides efficient multi-dimensional arrays
✅ 10-100x faster than Python lists for numerical operations
✅ Broadcasting enables operations on different-shaped arrays
✅ Rich mathematical and statistical functions
✅ Foundation for pandas, scikit-learn, TensorFlow
✅ Vectorized operations eliminate explicit loops


Next Steps

In Module 26, you'll learn:

  • Pandas DataFrames and Series
  • Data manipulation and analysis
  • Reading/writing CSV, Excel files
  • Data cleaning and transformation

Practice Exercises

  1. Create a 5x5 matrix of random integers and find the sum of each row and column
  2. Implement matrix multiplication without using np.dot()
  3. Create a function to normalize any array to [0, 1] range
  4. Calculate correlation coefficient between two arrays
  5. Implement image brightness adjustment using NumPy
Challenge

Create a simple neural network forward pass:

  • Input: 784-dimensional vector (28x28 image)
  • Hidden layer: 128 neurons with ReLU activation
  • Output layer: 10 neurons (digits 0-9) with softmax
  • Implement using only NumPy matrix operations
  • Use broadcasting for efficiency