Python 进阶教程:NumPy的使用

1.1 什么是NumPy

NumPy(Numerical Python)是Python中进行科学计算的基础库,提供了高效的多维数组对象ndarray以及丰富的数学函数库。

1.2 NumPy的核心特性

特性说明
ndarrayN维数组对象,支持向量化运算
广播功能对不同形状的数组进行数学运算
集成C/C++高性能的数组操作
线性代数矩阵运算、特征值计算等
傅里叶变换信号处理、图像处理
随机数生成统计分析和模拟

1.3 NumPy vs Python原生列表

对比项Python列表NumPy数组
数据类型混合同质
性能较慢高效
内存占用较大紧凑
运算方式逐元素循环向量化
功能基础列表操作丰富数学函数
# Python列表:需要循环
a = [1, 2, 3, 4, 5]
b = [x * 2 for x in a]  # [2, 4, 6, 8, 10]
​
# NumPy数组:向量化运算
import numpy as np
arr = np.array([1, 2, 3, 4, 5])
result = arr * 2  # array([2, 4, 6, 8, 10])

2. 安装与导入

2.1 安装NumPy

# 使用pip安装
pip install numpy
​
# 使用conda安装
conda install numpy
​
# 验证安装
python -c "import numpy; print(numpy.__version__)"

2.2 导入NumPy

# 标准导入(推荐使用别名np)
import numpy as np
​
# 查看版本
print(np.__version__)
​
# 检查是否安装成功
arr = np.array([1, 2, 3])
print(arr)  # [1 2 3]

3. 数组基础

3.1 创建数组

import numpy as np
​
# 从Python列表创建
a = np.array([1, 2, 3, 4, 5])
print(a)  # [1 2 3 4 5]
​
# 二维数组
b = np.array([[1, 2, 3], [4, 5, 6]])
print(b)
# [[1 2 3]
#  [4 5 6]]
​
# 三维数组
c = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(c)
# [[[1 2]
#   [3 4]]
#  [[5 6]
#   [7 8]]]

3.2 数据类型

NumPy支持多种数据类型:

数据类型说明示例
int88位整数-128 ~ 127
int1616位整数-32768 ~ 32767
int3232位整数-2³¹ ~ 2³¹-1
int6464位整数-2⁶³ ~ 2⁶³-1
uint8无符号8位0 ~ 255
float16半精度浮点16位
float32单精度浮点32位
float64双精度浮点64位
complex64复数(双32位)实部+虚部
bool布尔类型True/False
objectPython对象任意类型
string_字符串固定长度
unicode_Unicode固定长度
# 指定数据类型
arr1 = np.array([1, 2, 3], dtype=np.float32)
print(arr1.dtype)  # float32
​
arr2 = np.array([1, 0, 1], dtype=np.bool_)
print(arr2.dtype)  # bool
​
# 类型转换
arr = np.array([1.5, 2.7, 3.9])
print(arr.astype(int))  # [1 2 3]

4. 数组创建

4.1 常用创建函数

函数说明示例
np.array()从列表创建np.array([1,2,3])
np.zeros()全0数组np.zeros((2,3))
np.ones()全1数组np.ones((2,3))
np.full()填充数组np.full((2,3), 7)
np.empty()未初始化数组np.empty((2,3))
np.arange()范围数组np.arange(0, 10, 2)
np.linspace()等差数组np.linspace(0, 1, 5)
np.eye()单位矩阵np.eye(3)
np.random.rand()随机数组(0-1)np.random.rand(2,3)
np.random.randint()随机整数np.random.randint(0,10,(2,3))

4.2 创建函数示例

import numpy as np
​
# np.zeros() - 创建全0数组
zeros_1d = np.zeros(5)          # 一维:[0. 0. 0. 0. 0.]
zeros_2d = np.zeros((3, 4))     # 二维3x4
zeros_3d = np.zeros((2, 3, 4))  # 三维2x3x4
​
# np.ones() - 创建全1数组
ones_arr = np.ones((2, 3))
print(ones_arr)
# [[1. 1. 1.]
#  [1. 1. 1.]]
​
# np.full() - 填充指定值
full_arr = np.full((2, 3), 99)
print(full_arr)
# [[99 99 99]
#  [99 99 99]]
​
# np.arange() - 类似range
range1 = np.arange(10)        # [0 1 2 3 4 5 6 7 8 9]
range2 = np.arange(0, 10, 2)  # [0 2 4 6 8]
range3 = np.arange(5, 0, -1)   # [5 4 3 2 1]
​
# np.linspace() - 等差数列
linspace_arr = np.linspace(0, 1, 5)  # [0.   0.25 0.5  0.75 1.  ]
print(linspace_arr)
​
# np.eye() - 单位矩阵
identity = np.eye(3)
print(identity)
# [[1. 0. 0.]
#  [0. 1. 0.]
#  [0. 0. 1.]]
​
# np.diag() - 对角矩阵
diag_arr = np.diag([1, 2, 3])
print(diag_arr)
# [[1 0 0]
#  [0 2 0]
#  [0 0 3]]

4.3 随机数组创建

# np.random.rand() - [0, 1)均匀分布
rand_arr = np.random.rand(3, 4)
​
# np.random.randn() - 标准正态分布
randn_arr = np.random.randn(3, 4)
​
# np.random.randint() - 随机整数
randint_arr = np.random.randint(0, 10, (3, 4))  # 0-10之间
​
# np.random.uniform() - 均匀分布
uniform_arr = np.random.uniform(1, 5, (3, 4))
​
# np.random.normal() - 正态分布
normal_arr = np.random.normal(0, 1, (3, 4))  # 均值0,标准差1
​
# np.random.choice() - 随机选择
choice_arr = np.random.choice([1, 2, 3, 4, 5], size=10)
​
# 设置随机种子(可复现)
np.random.seed(42)
arr1 = np.random.rand(5)
​
np.random.seed(42)
arr2 = np.random.rand(5)  # 与arr1相同

4.4 特殊数组

# 创建与已有数组相同形状的数组
arr = np.array([[1, 2, 3], [4, 5, 6]])
​
zeros_like = np.zeros_like(arr)  # 与arr形状相同的全0数组
ones_like = np.ones_like(arr)     # 与arr形状相同的全1数组
empty_like = np.empty_like(arr)   # 与arr形状相同的空数组
​
# 创建对角数组
diag_arr = np.diag([1, 2, 3, 4])  # 主对角线
diag_arr = np.diag([1, 2, 3, 4], k=1)  # 第k条对角线

5. 数组属性

5.1 核心属性表

属性说明返回值类型
ndarray.ndim数组维度数int
ndarray.shape各维度大小tuple
ndarray.size元素总数int
ndarray.dtype数据类型dtype
ndarray.itemsize元素字节大小int
ndarray.nbytes总字节数int
ndarray.T转置数组ndarray
ndarray.real实部ndarray
ndarray.imag虚部ndarray

5.2 属性示例

import numpy as np

arr = np.array([[1, 2, 3], [4, 5, 6]])

print("数组:")
print(arr)
# [[1 2 3]
#  [4 5 6]]

print(f"ndim (维度): {arr.ndim}")      # 2
print(f"shape (形状): {arr.shape}")    # (2, 3)
print(f"size (元素数): {arr.size}")     # 6
print(f"dtype (类型): {arr.dtype}")    # int64
print(f"itemsize (字节): {arr.itemsize}") # 8
print(f"nbytes (总字节): {arr.nbytes}")  # 48

# 转置
print(f"T (转置):")
print(arr.T)
# [[1 4]
#  [2 5]
#  [3 6]]

5.3 高维数组属性

arr_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]])

print(f"维度: {arr_3d.ndim}")      # 3
print(f"形状: {arr_3d.shape}")     # (3, 2, 2)
print(f"元素数: {arr_3d.size}")    # 12

# 访问具体元素
print(f"形状解释: {arr_3d.shape}")
# (3, 2, 2) 表示:
# - 3个二维数组
# - 每个二维数组有2行
# - 每行有2个元素

6. 索引与切片

6.1 一维数组索引

import numpy as np

arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# 基本索引
print(arr[0])   # 0(第一个元素)
print(arr[-1])  # 9(最后一个元素)
print(arr[5])   # 5

# 切片: start:stop:step
print(arr[2:7])    # [2 3 4 5 6](索引2到6)
print(arr[:5])     # [0 1 2 3 4](从头到4)
print(arr[5:])     # [5 6 7 8 9](从5到结尾)
print(arr[::2])    # [0 2 4 6 8](步长2)
print(arr[::-1])   # [9 8 7 6 5 4 3 2 1 0](反转)
print(arr[5:2:-1]) # [5 4 3](反向切片)

6.2 多维数组索引

arr_2d = np.array([[1, 2, 3, 4],
                   [5, 6, 7, 8],
                   [9, 10, 11, 12]])

print("原始数组:")
print(arr_2d)
# [[ 1  2  3  4]
#  [ 5  6  7  8]
#  [ 9 10 11 12]]

# 索引格式: arr[行, 列]
print(arr_2d[0, 0])   # 1(第一行第一列)
print(arr_2d[1, 2])   # 7(第二行第三列)
print(arr_2d[-1, -1]) # 12(最后一行最后一列)

# 行切片
print(arr_2d[0, :])   # [1 2 3 4](第一行所有列)
print(arr_2d[1, :])   # [5 6 7 8](第二行所有列)

# 列切片
print(arr_2d[:, 0])   # [1 5 9](第一列所有行)
print(arr_2d[:, -1])  # [4 8 12](最后一列)

# 子矩阵
print(arr_2d[0:2, 1:3])
# [[ 2  3]
#  [ 6  7]]

6.3 高级索引

# 整数数组索引
arr = np.array([10, 20, 30, 40, 50])

indices = [0, 2, 4]
print(arr[indices])  # [10 30 50]

# 二维整数数组索引
arr_2d = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])

rows = [0, 1, 2]
cols = [0, 1, 2]
print(arr_2d[rows, cols])  # [1 5 9](对角元素)

# 使用不同行和列索引
print(arr_2d[[0, 1], [0, 2]])  # [1 7]

6.4 布尔索引

arr = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90])

# 布尔条件
condition = arr > 40
print(condition)  # [False False False False False  True  True  True  True]

# 使用布尔数组索引
print(arr[condition])  # [50 60 70 80 90]

# 简洁写法
print(arr[arr > 40])  # [50 60 70 80 90]
print(arr[arr % 20 == 0])  # [20 40 60 80]
print(arr[(arr > 20) & (arr < 70)])  # [30 40 50 60]

# 二维布尔索引
arr_2d = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])

# 找出大于5的元素位置
mask = arr_2d > 5
print(mask)
# [[False False False]
#  [False False  True]
#  [ True  True  True]]

print(arr_2d[mask])  # [6 7 8 9](展平后的结果)

# np.where - 条件替换
result = np.where(arr_2d > 5, arr_2d, 0)
print(result)
# [[0 0 0]
#  [0 0 6]
#  [7 8 9]]

6.5 花式索引(Fan cy Indexing)

arr = np.array([10, 20, 30, 40, 50])

# 多个索引
print(arr[[0, 2, 4]])  # [10 30 50]
print(arr[[-1, -2]])   # [50 40]

# 二维花式索引
arr_2d = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9],
                   [10, 11, 12]])

# 选择特定行
print(arr_2d[[0, 2, 3]])  # 选择第1、3、4行
# [[ 1  2  3]
#  [ 7  8  9]
#  [10 11 12]]

# 选择特定行和列
rows = [0, 2, 3]
cols = [0, 1, 2]
print(arr_2d[np.ix_(rows, cols)])
# [[ 1  2  3]
#  [ 7  8  9]
#  [10 11 12]]

# 使用np.ix_创建网格索引
print(arr_2d[np.ix_([0, 2], [0, 1, 2])])
# [[1 2 3]
#  [7 8 9]]

7. 数组操作

7.1 数组重塑

函数说明
arr.reshape()改变数组形状
arr.resize()原地改变形状
arr.flatten()展平为一维
arr.ravel()展平为一维(视图)
arr.transpose()转置数组
arr.T转置(属性)
import numpy as np

arr = np.arange(12)
print(f"原数组: {arr}")  # [ 0  1  2  3  4  5  6  7  8  9 10 11]

# reshape - 不改变原数组
reshaped = arr.reshape(3, 4)
print(f"reshape(3,4):\n{reshaped}")
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]

# -1自动推断维度
print(arr.reshape(3, -1))  # 自动计算列数:4
print(arr.reshape(-1, 4))  # 自动计算行数:3
print(arr.reshape(2, 2, 3))  # 三维:2x2x3

# flatten vs ravel
arr_2d = np.array([[1, 2, 3], [4, 5, 6]])
flat1 = arr_2d.flatten()  # 返回副本
flat2 = arr_2d.ravel()    # 返回视图(可能)

flat1[0] = 99
print(arr_2d)  # 原数组不变(flatten返回副本)

# resize - 原地修改
arr = np.arange(6)
arr.resize(2, 3)
print(arr)
# [[0 1 2]
#  [3 4 5]]

7.2 数组合并

函数说明
np.concatenate()沿指定轴连接
np.vstack()垂直堆叠(行方向)
np.hstack()水平堆叠(列方向)
np.dstack()深度堆叠
np.column_stack()列堆叠
np.row_stack()行堆叠
import numpy as np

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# concatenate
c = np.concatenate([a, b])
print(c)  # [1 2 3 4 5 6]

# vstack - 垂直堆叠
arr1 = np.array([[1, 2, 3]])
arr2 = np.array([[4, 5, 6]])
vstacked = np.vstack([arr1, arr2])
print(vstacked)
# [[1 2 3]
#  [4 5 6]]

# hstack - 水平堆叠
hstacked = np.hstack([arr1, arr2.T])  # 需要形状匹配
print(hstacked)
# [[1 2 3 4 5 6]]

# dstack - 深度堆叠
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
dstacked = np.dstack([arr1, arr2])
print(dstacked)
# [[1 4]
#  [2 5]
#  [3 6]]

7.3 数组分割

函数说明
np.split()沿指定轴分割
np.vsplit()垂直分割(行方向)
np.hsplit()水平分割(列方向)
np.dsplit()深度分割
import numpy as np

arr = np.arange(12).reshape(3, 4)
print(f"原数组:\n{arr}")
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]

# split - 按索引分割
a, b = np.split(arr, [2])  # 按行索引分割
print(f"split([2]):\na = \n{a}\nb = \n{b}")
# a = [[0 1 2 3]
#      [4 5 6 7]]
# b = [[ 8  9 10 11]]

# vsplit - 垂直分割
upper, lower = np.vsplit(arr, [1])
print(f"upper:\n{upper}")  # [[0 1 2 3]]
print(f"lower:\n{lower}")
# [[ 4  5  6  7]
#  [ 8  9 10 11]]

# hsplit - 水平分割
left, right = np.hsplit(arr, [2])
print(f"left:\n{left}")
# [[0 1]
#  [4 5]
#  [8 9]]
print(f"right:\n{right}")
# [[ 2  3]
#  [ 6  7]
#  [10 11]]

8. 数学运算

8.1 元素级运算

import numpy as np

arr = np.array([1, 2, 3, 4, 5])

# 基本算术运算
print(arr + 1)    # [2 3 4 5 6]
print(arr - 1)    # [0 1 2 3 4]
print(arr * 2)    # [2 4 6 8 10]
print(arr / 2)    # [0.5 1.  1.5 2.  2.5]
print(arr ** 2)   # [ 1  4  9 16 25]
print(arr % 2)    # [1 0 1 0 1]

# 比较运算
print(arr > 2)    # [False False  True  True  True]
print(arr == 3)   # [False False  True False False]
print(arr != 3)   # [ True  True False  True  True]

# 逻辑运算
a = np.array([True, False, True, False])
b = np.array([True, True, False, False])
print(np.logical_and(a, b))  # [ True False False False]
print(np.logical_or(a, b))   # [ True  True  True False]
print(np.logical_not(a))     # [False  True False  True]

8.2 数组间运算

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# 元素级运算
print(a + b)  # [5 7 9]
print(a - b)  # [-3 -3 -3]
print(a * b)  # [ 4 10 18]
print(a / b)  # [0.25 0.4  0.5 ]
print(a ** b) # [  1  32 729]

# 广播后运算
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
row = np.array([1, 2, 3])
print(arr2d + row)  # 行广播
# [[2 4 6]
#  [5 7 9]]

col = np.array([[1], [2]])
print(arr2d + col)  # 列广播
# [[2 3 4]
#  [6 7 8]]

8.3 聚合函数

函数说明
np.sum()求和
np.prod()乘积
np.mean()平均值
np.std()标准差
np.var()方差
np.min()最小值
np.max()最大值
np.argmin()最小值索引
np.argmax()最大值索引
np.median()中位数
np.percentile()百分位数
import numpy as np

arr = np.array([[1, 2, 3],
                [4, 5, 6]])

# 全局聚合
print(np.sum(arr))      # 21
print(np.prod(arr))     # 720
print(np.mean(arr))     # 3.5
print(np.std(arr))      # 1.7078...
print(np.min(arr))      # 1
print(np.max(arr))      # 6
print(np.median(arr))   # 3.5

# 沿轴聚合
print(np.sum(arr, axis=0))  # [5 7 9] 沿列求和
print(np.sum(arr, axis=1))  # [ 6 15] 沿行求和

print(np.mean(arr, axis=0)) # [2.5 3.5 4.5] 沿列平均
print(np.mean(arr, axis=1)) # [2. 5.] 沿行平均

# 返回索引
print(np.argmin(arr))    # 0(展平后最小值的索引)
print(np.argmax(arr))    # 5(展平后最大值的索引)
print(np.argmin(arr, axis=0))  # [0 0 0] 每列最小值的行索引
print(np.argmax(arr, axis=1))  # [2 2] 每行最大值的列索引

# 百分位数
print(np.percentile(arr, 25))  # 2.25
print(np.percentile(arr, 50))  # 3.5
print(np.percentile(arr, 75))  # 4.75

8.4 三角函数

import numpy as np

angles = np.array([0, np.pi/6, np.pi/4, np.pi/3, np.pi/2])

print(np.sin(angles))
print(np.cos(angles))
print(np.tan(angles))

# 反三角函数
print(np.arcsin(np.array([0, 0.5, 1])))  # [0.         0.52359878 1.57079633]
print(np.arccos(np.array([0, 0.5, 1])))  # [1.57079633 1.04719755 0.        ]
print(np.arctan(np.array([0, 0.5, 1])))  # [0.         0.46364761 0.78539816]

8.5 指数与对数

import numpy as np

arr = np.array([1, 2, 3, 4, 5])

print(np.exp(arr))      # e的幂: [  2.71828183   7.3890561   20.08553692  54.59815003 148.4131591 ]
print(np.exp2(arr))     # 2的幂: [ 2.  4.  8. 16. 32.]
print(np.log(arr))      # 自然对数: [0.         0.69314718 1.09861229 1.38629436 1.60943791]
print(np.log2(arr))     # 底2对数: [0.         1.         1.5849625  2.         2.32192809]
print(np.log10(arr))    # 底10对数: [0.         0.30103    0.47712125 0.60205999 0.69897   ]

9. 形状操作

9.1 形状变换

import numpy as np

arr = np.arange(24)
print(f"原数组: {arr.shape}")  # (24,)

# 改变形状
r1 = arr.reshape(4, 6)
print(f"reshape(4,6): {r1.shape}")  # (4, 6)

r2 = arr.reshape(2, 3, 4)  # 三维
print(f"reshape(2,3,4): {r2.shape}")  # (2, 3, 4)

# 自动推断
r3 = arr.reshape(4, -1)
print(f"reshape(4,-1): {r3.shape}")  # (4, 6)

# newaxis增加维度
arr = np.array([1, 2, 3, 4])
print(arr.shape)  # (4,)

col_vec = arr[:, np.newaxis]
print(col_vec.shape)  # (4, 1)
print(col_vec)
# [[1]
#  [2]
#  [3]
#  [4]]

row_vec = arr[np.newaxis, :]
print(row_vec.shape)  # (1, 4)
print(row_vec)  # [[1 2 3 4]]

9.2 维度操作

import numpy as np

# squeeze - 移除大小为1的维度
arr = np.array([[[1], [2], [3]]])
print(f"shape: {arr.shape}")  # (1, 3, 1)
print(f"squeeze后: {np.squeeze(arr).shape}")  # (3,)

# expand_dims - 增加维度
arr = np.array([1, 2, 3])
expanded = np.expand_dims(arr, axis=0)
print(f"expand_dims(axis=0): {expanded.shape}")  # (1, 3)

# swapaxes - 交换轴
arr = np.random.rand(3, 4, 5)  # 3x4x5
swapped = np.swapaxes(arr, 0, 2)  # 交换第0和第2轴
print(f"swapaxes后: {swapped.shape}")  # (5, 4, 3)

# moveaxis - 移动轴
moved = np.moveaxis(arr, 0, -1)  # 将第0轴移到末尾
print(f"moveaxis后: {moved.shape}")  # (4, 5, 3)

# rollaxis - 滚动轴
rolled = np.rollaxis(arr, 0, 3)  # 将第0轴滚动到第3位置
print(f"rollaxis后: {rolled.shape}")  # (4, 5, 3)

9.3 数组转置

import numpy as np

arr = np.arange(12).reshape(3, 4)
print(f"原数组:\n{arr}")
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]

# 基本转置
print(f"T:\n{arr.T}")
# [[ 0  4  8]
#  [ 1  5  9]
#  [ 2  6 10]
#  [ 3  7 11]]

# transpose - 多维转置
arr_3d = np.arange(24).reshape(2, 3, 4)  # 2x3x4
print(f"3D数组形状: {arr_3d.shape}")  # (2, 3, 4)

transposed = arr_3d.transpose(2, 0, 1)  # 转为4x2x3
print(f"transpose(2,0,1): {transposed.shape}")  # (4, 2, 3)

# 轴对换
rearranged = np.transpose(arr_3d, (2, 1, 0))
print(f"transpose((2,1,0)): {rearranged.shape}")  # (4, 3, 2)

10. 广播机制

10.1 广播规则

NumPy的广播规则允许对不同形状的数组进行运算:

规则1:如果两个数组的维度数不同,小维度数组的形状会在左边补1。

规则2:如果两个数组在某个维度大小不匹配,大小为1的维度会被扩展以匹配另一个数组。

规则3:如果两个数组在某个维度大小不同且都不为1,则抛出错误。

10.2 广播示例

import numpy as np

# 示例1:标量与数组
arr = np.array([1, 2, 3, 4, 5])
print(arr + 10)  # [11 12 13 14 15]
# 10 被广播到 [10, 10, 10, 10, 10]

# 示例2:一维与二维
arr_2d = np.array([[1, 2, 3],
                   [4, 5, 6]])
arr_1d = np.array([10, 20, 30])

print(arr_2d + arr_1d)
# [[11 22 33]
#  [14 25 36]]
# arr_1d 被广播为 [[10, 20, 30], [10, 20, 30]]

# 示例3:列向量与二维数组
col_vec = np.array([[10], [20]])
print(arr_2d + col_vec)
# [[11 12 13]
#  [24 25 26]]
# col_vec 被广播为 [[10, 10, 10], [20, 20, 20]]

# 示例4:三维广播
a = np.ones((3, 4, 2))
b = np.ones((4, 2))
result = a + b  # b 被广播为 (1, 4, 2) 然后扩展为 (3, 4, 2)

# 示例5:失败的广播
try:
    a = np.array([1, 2, 3])
    b = np.array([1, 2])
    print(a + b)  # ValueError
except ValueError as e:
    print(f"广播错误: {e}")
# shapes (3,) and (2,) not aligned: 3 (from x) != 2 (from y)

10.3 广播的应用场景

import numpy as np

# 场景1:归一化数据(每行减去行均值)
data = np.array([[1, 2, 3],
                 [4, 5, 6],
                 [7, 8, 9]])
row_means = data.mean(axis=1, keepdims=True)  # (3,) -> (3,1)
normalized = data - row_means
print("归一化:\n", normalized)
# [[-1.  0.  1.]
#  [-1.  0.  1.]
#  [-1.  0.  1.]]

# 场景2:计算百分比(每列除以列和)
col_sums = data.sum(axis=0)  # [12 15 18]
percentages = data / col_sums
print("百分比:\n", percentages)
# [[0.083 0.133 0.167]
#  [0.333 0.333 0.333]
#  [0.583 0.533 0.500]]

# 场景3:距离计算
points = np.array([[0, 0], [1, 1], [2, 2]])
center = np.array([1, 1])
distances = np.sqrt(np.sum((points - center) ** 2, axis=1))
print("距离:", distances)  # [1.414 0.     1.414]

11. 常用函数

11.1 排序函数

import numpy as np

arr = np.array([[3, 1, 2], [6, 4, 5]])

# np.sort - 不修改原数组
sorted_arr = np.sort(arr)
print("默认排序:\n", sorted_arr)
# [[1 2 3]
#  [4 5 6]]

sorted_axis0 = np.sort(arr, axis=0)  # 按列排序
sorted_axis1 = np.sort(arr, axis=1)  # 按行排序

# arr.sort - 原地排序
arr_copy = arr.copy()
arr_copy.sort(axis=1)
print("原地排序:\n", arr_copy)

# np.argsort - 返回排序索引
indices = np.argsort(arr[0])
print("索引排序:", indices)  # [1 2 0]
print("按索引取元素:", arr[0][indices])  # [1 2 3]

11.2 搜索和计数函数

import numpy as np<br><br>arr = np.array([1, 3, 5, 7, 9, 3, 1])<br><br># np.where - 条件索引<br>indices = np.where(arr > 5)<br>print("大于5的索引:", indices)  # (array([3, 4]),)<br>print("大于5的值:", arr[indices])  # [7 9]<br><br># np.argmax / np.argmin<br>print("最大值索引:", np.argmax(arr))  # 4<br>print("最小值索引:", np.argmin(arr))  # 0<br><br># np.nonzero - 非零元素索引<br>arr_bool = np.array([True, False, True, True, False])<br>print("True的索引:", np.nonzero(arr_bool))  # (array([0, 2, 3]),)<br><br># np.count_nonimport numpy as np

arr = np.array([1, 3, 5, 7, 9, 3, 1])

# np.where - 条件索引
indices = np.where(arr > 5)
print("大于5的索引:", indices)  # (array([3, 4]),)
print("大于5的值:", arr[indices])  # [7 9]

# np.argmax / np.argmin
print("最大值索引:", np.argmax(arr))  # 4
print("最小值索引:", np.argmin(arr))  # 0

# np.nonzero - 非零元素索引
arr_bool = np.array([True, False, True, True, False])
print("True的索引:", np.nonzero(arr_bool))  # (array([0, 2, 3]),)

# np.count_nonzero
print("非零元素个数:", np.count_nonzero(arr))  # 7

# np.searchsorted - 有序数组搜索
sorted_arr = np.array([1, 2, 3, 4, 5])
print("插入位置:", np.searchsorted(sorted_arr, 3.5))  # 2zero<br>print("非零元素个数:", np.count_nonzero(arr))  # 7<br><br># np.searchsorted - 有序数组搜索<br>sorted_arr = np.array([1, 2, 3, 4, 5])<br>print("插入位置:", np.searchsorted(sorted_arr, 3.5))  # 2

11.3 集合运算

import numpy as np

a = np.array([1, 2, 3, 4, 5])
b = np.array([3, 4, 5, 6, 7])

# np.union1d - 并集
print("并集:", np.union1d(a, b))  # [1 2 3 4 5 6 7]

# np.intersect1d - 交集
print("交集:", np.intersect1d(a, b))  # [3 4 5]

# np.setdiff1d - 差集(在a中不在b中)
print("差集:", np.setdiff1d(a, b))  # [1 2]

# np.setxor1d - 对称差集(并集减交集)
print("对称差集:", np.setxor1d(a, b))  # [1 2 6 7]

# np.in1d - 成员检测
print("a中元素是否在b中:", np.in1d(a, b))  # [False False  True  True  True]
print("a和b是否有交集:", np.any(np.in1d(a, b)))  # True

11.4 字符串函数

import numpy as np

arr = np.array(['hello', 'world', 'numpy'])

# 大小写转换
print(np.char.upper(arr))  # ['HELLO' 'WORLD' 'NUMPY']
print(np.char.lower(arr))  # ['hello' 'world' 'numpy']
print(np.char.title(arr))  # ['Hello' 'World' 'Numpy']

# 字符串操作
print(np.char.join('-', arr))    # ['h-e-l-l-o', 'w-o-r-l-d', 'n-u-m-p-y']
print(np.char.replace(arr, 'o', 'X'))  # ['hellX', 'wXrld', 'numpy']

# 判断函数
print(np.char.isalpha(arr))  # [True True True]
print(np.char.isdigit(arr))  # [False False False]

# 字符串连接
arr1 = np.array(['Hello', 'Hi'])
arr2 = np.array(['World', 'there'])
print(np.char.add(arr1, arr2))  # ['HelloWorld' 'Hithere']

12. 实战示例

12.1 示例1:图像处理基础

import numpy as np

# 创建灰度图像(8x8像素)
image = np.array([
    [0, 0, 0, 0, 0, 0, 0, 0],
    [0, 255, 255, 255, 255, 255, 255, 0],
    [0, 255, 0, 0, 0, 0, 255, 0],
    [0, 255, 0, 0, 0, 0, 255, 0],
    [0, 255, 0, 0, 0, 0, 255, 0],
    [0, 255, 0, 0, 0, 0, 255, 0],
    [0, 255, 255, 255, 255, 255, 255, 0],
    [0, 0, 0, 0, 0, 0, 0, 0]
], dtype=np.uint8)

print(f"图像形状: {image.shape}")
print(f"像素范围: {image.min()} - {image.max()}")

# 图像处理操作
# 1. 反转颜色
inverted = 255 - image

# 2. 亮度调整
brightness = 50
brightened = np.clip(image + brightness, 0, 255).astype(np.uint8)

# 3. 对比度调整
contrast = 1.5
mean = image.mean()
adjusted = np.clip((image - mean) * contrast + mean, 0, 255).astype(np.uint8)

# 4. 创建彩色图像(RGB)
color_image = np.stack([image, image, image], axis=-1)  # 8x8x3
print(f"彩色图像形状: {color_image.shape}")

12.2 示例2:数据分析统计

import numpy as np

# 模拟学生成绩数据
np.random.seed(42)
n_students = 100
n_subjects = 5

# 生成成绩数据(0-100分)
subjects = ['Math', 'English', 'Science', 'History', 'Art']
scores = np.random.randint(40, 100, size=(n_students, n_subjects))

print("=== 成绩统计报告 ===")
print(f"学生人数: {n_students}")
print(f"科目数: {n_subjects}")
print()

# 各科目统计
print("各科目统计:")
print("-" * 50)
for i, subject in enumerate(subjects):
    subject_scores = scores[:, i]
    print(f"{subject:10s}: 平均={subject_scores.mean():.1f}, "
          f"最高={subject_scores.max()}, "
          f"最低={subject_scores.min()}, "
          f"标准差={subject_scores.std():.1f}")

print()
print("=" * 50)

# 各学生统计
student_means = scores.mean(axis=1)
student_totals = scores.sum(axis=1)

print(f"班级平均分: {student_means.mean():.2f}")
print(f"班级总分: {student_totals.sum()}")

# 找出优秀学生(平均分>90)
excellent = np.where(student_means > 90)[0]
print(f"优秀学生数(平均>90): {len(excellent)}")

# 找出需要帮助的学生(任一科目<60)
needs_help = np.where((scores < 60).any(axis=1))[0]
print(f"需要帮助的学生数(有科目<60): {len(needs_help)}")

# 百分位数分析
print("\n成绩分布:")
percentiles = [25, 50, 75, 90, 95]
for p in percentiles:
    print(f"  {p}百分位: {np.percentile(scores, p):.1f}")

12.3 示例3:矩阵运算与线性代数

import numpy as np

# 创建矩阵
A = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 10]])

B = np.array([[7, 8, 9],
              [1, 2, 3],
              [4, 5, 6]])

print("矩阵A:")
print(A)
print("\n矩阵B:")
print(B)

# 矩阵乘法
C = np.dot(A, B)
print("\n矩阵乘法 A @ B:")
print(C)

# 等价于
C = A @ B
print("\n矩阵乘法 A @ B (使用@运算符):")
print(C)

# 转置
print("\nA的转置:")
print(A.T)

# 求逆矩阵
try:
    A_inv = np.linalg.inv(A)
    print("\nA的逆矩阵:")
    print(A_inv)
    
    # 验证:A @ A_inv ≈ I
    identity = A @ A_inv
    print("\nA @ A⁻¹ (应接近单位矩阵):")
    print(np.round(identity, decimals=5))
except np.linalg.LinAlgError:
    print("矩阵不可逆")

# 求行列式
det_A = np.linalg.det(A)
print(f"\n矩阵A的行列式: {det_A:.2f}")

# 求特征值和特征向量
eigenvalues, eigenvectors = np.linalg.eig(A)
print("\n特征值:")
print(eigenvalues)
print("\n特征向量:")
print(eigenvectors)

# 解线性方程组 Ax = b
b = np.array([1, 2, 3])
x = np.linalg.solve(A, b)
print(f"\n解 Ax = b:")
print(f"x = {x}")

# 验证
print(f"验证 A @ x = b: {A @ x}")

13. 性能优化

13.1 避免复制,使用视图

import numpy as np
import time

arr = np.arange(1000000)

# 不好:创建副本
start = time.time()
arr_copy = arr.reshape(1000, 1000)  # 这会创建副本吗?
# 实际上reshape在连续内存时返回视图
copy_time = time.time() - start

# 好:使用视图
arr_view = arr.reshape(1000, 1000)  # 返回视图

# 使用ravel而非flatten
start = time.time()
flat1 = arr.ravel()  # 快:返回视图
flat_time1 = time.time() - start

start = time.time()
flat2 = arr.flatten()  # 慢:返回副本
flat_time2 = time.time() - start

print(f"ravel时间: {flat_time1:.6f}")
print(f"flatten时间: {flat_time2:.6f}")

13.2 向量化优于循环

import numpy as np
import time

# 慢:使用Python循环
arr = np.random.rand(1000000)
start = time.time()
result_loop = np.array([x**2 + 2*x + 1 for x in arr])
loop_time = time.time() - start

# 快:向量化运算
start = time.time()
result_vectorized = arr**2 + 2*arr + 1
vectorized_time = time.time() - start

print(f"循环时间: {loop_time:.4f}s")
print(f"向量化时间: {vectorized_time:.4f}s")
print(f"加速比: {loop_time/vectorized_time:.1f}x")

13.3 使用适当的数据类型

import numpy as np<br><br>#import numpy as np

# 使用float32而非float64节省内存
arr_float64 = np.random.rand(1000000).astype(np.float64)
arr_float32 = np.random.rand(1000000).astype(np.float32)

print(f"float64 大小: {arr_float64.nbytes / 1024 / 1024:.2f} MB")
print(f"float32 大小: {arr_float32.nbytes / 1024 / 1024:.2f} MB")

# 使用int32而非int64
arr_int64 = np.random.randint(0, 100, 1000000, dtype=np.int64)
arr_int32 = np.random.randint(0, 100, 1000000, dtype=np.int32)

print(f"int64 大小: {arr_int64.nbytes / 1024 / 1024:.2f} MB")
print(f"int32 大小: {arr_int32.nbytes / 1024 / 1024:.2f} MB") 使用float32而非float64节省内存<br>arr_float64 = np.random.rand(1000000).astype(np.float64)<br>arr_float32 = np.random.rand(1000000).astype(np.float32)<br><br>print(f"float64 大小: {arr_float64.nbytes / 1024 / 1024:.2f} MB")<br>print(f"float32 大小: {arr_float32.nbytes / 1024 / 1024:.2f} MB")<br><br># 使用int32而非int64<br>arr_int64 = np.random.randint(0, 100, 1000000, dtype=np.int64)<br>arr_int32 = np.random.randint(0, 100, 1000000, dtype=np.int32)<br><br>print(f"int64 大小: {arr_int64.nbytes / 1024 / 1024:.2f} MB")<br>print(f"int32 大小: {arr_int32.nbytes / 1024 / 1024:.2f} MB")

13.4 np.where的妙用

import numpy as np

# 用np.where代替if-else循环
arr = np.random.rand(1000000)

# 慢
start = time.time()
result_loop = np.empty_like(arr)
for i in range(len(arr)):
    if arr[i] > 0.5:
        result_loop[i] = 1.0
    else:
        result_loop[i] = 0.0
loop_time = time.time() - start

# 快
start = time.time()
result_where = np.where(arr > 0.5, 1.0, 0.0)
where_time = time.time() - start

print(f"if-else循环: {loop_time:.4f}s")
print(f"np.where: {where_time:.4f}s")
print(f"加速比: {loop_time/where_time:.1f}x")

14. 最佳实践

14.1 导入习惯

# ✅ 推荐:使用标准别名
import numpy as np

# ❌ 不推荐:导入全部(污染命名空间)
# from numpy import *

14.2 创建数组的习惯

import numpy as np

# ✅ 推荐:明确指定dtype
arr1 = np.zeros((3, 4), dtype=np.float32)
arr2 = np.array([1, 2, 3], dtype=np.int32)

# ✅ 推荐:使用有意义的形状参数
data = np.random.rand(n_samples, n_features)

# ✅ 推荐:使用view而非copy切片
arr = np.arange(100).reshape(10, 10)
sub_arr = arr[2:5, 2:5]  # 这是视图,修改会影响原数组

# 如果需要副本
sub_arr_copy = arr[2:5, 2:5].copy()

# ✅ 推荐:使用np.zeros_like等保持数据类型
existing_arr = np.array([1, 2, 3], dtype=np.int32)
zeros = np.zeros_like(existing_arr)  # dtype=np.int32

14.3 避免常见陷阱

import numpy as np

# 陷阱1:使用可变默认参数
def wrong_function(data=np.array([])):  # ❌
    pass

def correct_function(data=None):  # ✅
    if data is None:
        data = np.array([])
    pass

# 陷阱2:使用==比较浮点数
a = np.array([0.1] * 3)
b = np.array([0.3] * 3)
# print(a.sum() == b.sum())  # ❌ 可能False

# ✅ 使用np.isclose
print(np.isclose(a.sum(), b.sum()))  # True

# 陷阱3:忘记广播维度
arr = np.array([[1, 2, 3], [4, 5, 6]])
row = np.array([1, 2, 3, 4])  # ❌ 长度不匹配
# arr + row  # ValueError

row = np.array([1, 2, 3])  # ✅ 长度匹配
print(arr + row)

# 陷阱4:在循环中增长数组
# ❌ 慢
result = np.array([])
for i in range(10000):
    result = np.append(result, i)

# ✅ 快:预分配
result = np.empty(10000)
for i in range(10000):
    result[i] = i

14.4 调试技巧

import numpy as np

# 打印完整数组
np.set_printoptions(threshold=np.inf)

# 恢复默认设置
np.set_printoptions(edgeitems=3, threshold=1000)

# 检查数组属性
arr = np.array([[1, 2], [3, 4]])
print(f"shape: {arr.shape}, dtype: {arr.dtype}, ndim: {arr.ndim}")

# 检查是否为视图
base = np.arange(10)
view = base[::2]
print(f"是否视图: {view.base is base}")  # True
print(f"共享内存: {np.shares_memory(view, base)}")  # True

# 检查数组是否连续
print(f"C连续: {arr.flags['C_CONTIGUOUS']}")
print(f"F连续: {arr.flags['F_CONTIGUOUS']}")

14.5 常用配置

import numpy as np

# 设置随机种子(可复现性)
np.random.seed(42)

# 设置打印选项
np.set_printoptions(
    precision=4,      # 浮点数精度
    suppress=True,    # 抑制科学计数法
    edgeitems=3,      # 每维显示的元素数
    linewidth=120     # 每行字符数
)

# 重置为默认值
np.set_printoptions(edgeitems=3, threshold=1000, precision=8)

附录:常用速查表

A1. 数组创建速查

操作代码
零数组np.zeros(shape)
一数组np.ones(shape)
随机[0,1)np.random.rand(*shape)
随机整数np.random.randint(low, high, shape)
单位矩阵np.eye(n)
范围数组np.arange(start, stop, step)
等差数组np.linspace(start, stop, num)

A2. 数组操作速查

操作代码
改变形状arr.reshape(shape)
转置arr.Tarr.transpose()
合并np.concatenate([a, b], axis)
分割np.split(arr, indices)
展平arr.ravel()arr.flatten()

A3. 数学函数速查

操作代码
求和np.sum(arr)arr.sum()
均值np.mean(arr)
标准差np.std(arr)
最小/大np.min(arr), np.max(arr)
点积np.dot(a, b)a @ b

参考资料

© 版权声明
THE END
喜欢就支持一下吧
点赞12 分享
评论 抢沙发

请登录后发表评论

    暂无评论内容