Data
Numpy Documentation is here.
Cheat sheets: All Cheatsheets
Creation
np.array({python list}) or option np.array({python list}, dtype=float) e.g. np.array([1, 1, 2, 3, 5, 8]) # list of ints => 1-d array np.array([ [1, 1, 2, 3, 5, 8], [2, 3, 5, 8, 13, 21], [0, 2, 2, 4, 6, 10] ]) # => 2-d array i.e. 3×6 shape |
create an n-dimensional array from a python list or list of lists [of lists…]. |
np.zeros((3,5)), np.ones((3,5)), np.full((3,5), 19) | initialize 3×5 matrix with constants |
a = np.empty((3,4)) | uninitialized |
a = np.random.random((3,4)) | random |
np.eye({number of rows}[, {number of cols}, k, dtype]) | Identity like matrix. see also np.identity. |
np.arange([{start:default is 0},] {stop} [{increment: default is 1}]) e.g np.arange(11) np.arange(-1, 121, 4) |
generate a sequence from {start} to {stop} incrementing by {increment} |
np.linspace(start, stop [num={number-of-steps}, endpoint={True|False}, dtype={None|float|…}) | initialize 1xnum array with {number-of-steps} elements. if endpoint=true, include the endpoint |
Basic array funcs
a.shape | Array dimensions |
len(a) | Length of array |
a.size | Number of array elements |
a.dtype | Data type of array elements |
a.dtype.name | Name of data type |
a.astype(int) |
Array operations
np.reshape(a, (3,5)) | reshaping an array’s indices keeping the same number of elements |
np.dot(a,b) | matrix multiplication |
a[0,0], a[:,0], a[0,:] | array slices |
b=a[2:8:3] # start idx=2, index less than 8, incr next index by 3 idx=[2, 7, 2] # list of the [ a[2], a[7], a[2] ] idx2 = slice(2,8,3) # start idx=2, idx |
Slicing the n-d arrays |
a[{expression}] = {value} e.g. a[a == 42 ] = 12 |
set every entry in a that is equal to 42 to the value 12 |
a[{expression}] = {value} e.g. a[a |
Set every element in array a to 12 if the current value is less than 42 |
np.where(expression involving array my_array) # e.g. a=np.array([[[1,3,4,5], 2], [[3,3,1], 0], [[1,11,3], 2], [[2,-81,1], 2]]) # => a.shape=(4,2) i.e. 4×2 array with a[1, 0] is a list and a[1,1] is a num np.where(a[:,1] == 2) # => np.array([0,2,3]) i.e. gives the indices whose values satisfy our selector a[np.where(a[:,1]==2)] # this is a slice of a containing the indices for our selector |
np.where() returns a slicing data structure that can be used to index a to get just the elements that match the where clause. This allows you to apply operations to only some entries in the n-d array |
Operations
np.where(expression involving array my_array) # e.g. a=np.array([[[1,3,4,5], 2], [[3,3,1], 0], [[1,11,3], 2], [[2,-81,1], 2]]) # => a.shape=(4,2) i.e. 4×2 array with a[1, 0] is a list and a[1,1] is a num np.where(a[:,1] == 2) # => np.array([0,2,3]) i.e. gives the indices whose values satisfy our selector a[np.where(a[:,1]==2)] # this is a slice of a containing the indices for our selector |
apply operations to only some entries in the n-d array |
np.sum(my_array, axis=1) #axis is 0-based | sum over axis=1 |
np.min() | Array-wise minimum value |
np.max(axis=0) | Maximum value of an array row |
np.cumsum(axis=1) | Cumulative sum of the elements |
Combining Arrays
np.concatenate((a,d),axis=0) | Concatenate arrays |
np.vstack((a,b)) | Stack arrays vertically (row-wise) array([[ 1. , 2. , 3. ], [ 1.5, 2. , 3. ], [ 4. , 5. , 6. ]]) |
np.r_[e,f] | Stack arrays vertically (row-wise) |
np.hstack((e,f)) | Stack arrays horizontally (column-wise) array([[ 7., 7., 1., 0.], [ 7., 7., 0., 1.]]) |
np.column_stack((a,d)) | Create stacked column-wise arrays array([[ 1, 10], [ 2, 15], [ 3, 20]]) |
np.c_[a,d] | Create stacked column-wise arrays |
Splitting Arrays
np.hsplit(a,3) | Split the array horizontally at the 3rd [array([1]),array([2]),array([3])] index |
np.vsplit(c,2) | Split the array vertically at the 2nd index [array([[[ 1.5, 2. , 1. ], [ 4. , 5. , 6. ]]]), |
Data types
np.int8, np.int16, np.int32, np.int64 | signed integer |
np.uint8, np.uint16, np.uint32, np.uint64 | unsigned integer |
np.float, np.float16, np.float32, np.float64 | floats |
np.complex, np.complex64, np.complex128 | complex numbers |
np.object | a python object type |
np.string | fixed length string |
np.unicode | fixed length unicode |
Math functions
round small numbers to 0: np.where(a > 1e-16,a,0) | |
np.max | |
np.log | |
np.exp | |
np.sqrt | |
np.sin(), np.cos(), np.tan() | |
np.mean() | Mean |
np.median() | Median |
np.corrcoef() | Correlation coefficient |
np.std(a) | Standard deviation |
indicator functions
np.max | return max of 2 arrays |
np.sign | return the sign of each element |
np.where(expression,1,0) | e.g np.where(_y > 0,1,0) := 1 if expr is true, 0 otherwise |
np.int64(expression) | e.g np.int64(_y > 0) := 1 if expr is true, 0 otherwise |
Examples
Useful packages
import numpy as np import matplotlib.pyplot as plt
matplotlib
You can use matplotlib to display graphs and charts. To do so, you’ll need to add the following to your jupyter notebook
# This is a bit of magic to make matplotlib figures appear inline in the # notebook rather than in a new window. %matplotlib inline plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' # Some more magic so that the notebook will reload external python modules; # see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython %load_ext autoreload p%autoreload 2
Functions Examples
# Next implement the function svm_loss_vectorized; for now only compute the loss; # we will implement the gradient in a moment. def f(r,c): return (r+1)*10 + c +1 def g(r, c): return f(r, c) * 1000 + 1 _X=np.fromfunction(f, (6,7), dtype=int) _W=np.fromfunction(g, (7, 3), dtype=int) _y=np.array([1, 0, 0, 0, 0, 2]) ---- _X=[[11, 12, 13, 14, 15, 16, 17], [21, 22, 23, 24, 25, 26, 27], [31, 32, 33, 34, 35, 36, 37], [41, 42, 43, 44, 45, 46, 47], [51, 52, 53, 54, 55, 56, 57], [61, 62, 63, 64, 65, 66, 67]] _y=[1, 0, 0, 0, 0, 2] row_indices=[1, 3, 5] col_indices=[4, 5, 4] _X[row_indices,col_indices]=[25, 46, 65] _X[row_indices]=_X[row_indices,:]= [[21, 22, 23, 24, 25, 26, 27], [41, 42, 43, 44, 45, 46, 47], [61, 62, 63, 64, 65, 66, 67]] _X[:,col_indices]= [[15, 16, 15], [25, 26, 25], [35, 36, 35], [45, 46, 45], [55, 56, 55], [65, 66, 65]] _Z = np.copy(_X) _Z[1,4]=1014; _Z[2,3] = 1023; _Z[3,2] = 1032; _Z[4,1]=1041 np.argmax(_Z)=29 np.argmax(_Z, axis = 1)=[6, 4, 3, 2, 1, 6] np.argmax(_Z, axis=0)=[5, 4, 3, 2, 1, 5, 5]