盒子
盒子
文章目录
  1. PETSc
  2. Install
  3. petsc4py
  4. petsc with CUDA

PETSc[00] 从编译到安装

PETSc

PETSc, 即便携式可扩展科学计算工具包(Portable, Extensible Toolkit for Scientific Computation), 特点为:

  • 可用于PDE求解;
  • 提供 C、Fortran 和 Python(通过 petsc4py)的绑定;
  • 高级优化工具包(TAO, Toolkit for Advanced Optimization);
  • 支持 MPI 和 GPU(通过 CUDA、HIP、Kokkos 或 OpenCL),以及混合 MPI-GPU 并行;
  • 支持 NEC-SX Tsubasa 向量引擎.

Install

使用常见的包管理器可以方便的进行安装:

1
2
3
4
conda install -c conda-forge petsc
sudo apt install petsc-dev
brew install petsc
python -m pip install petsc petsc4py mpi4py

homebrew安装好后为例子,可以通过下面命令查看一下安装的版本:

1
2
ls /opt/homebrew/Cellar/petsc
# 3.24.2

之后要配置PETSC_DIRPETSC_ARCH两个环境变量,从而方便进行(对于homebrew安装,不需要指定PETSC_ARCH):

1
2
export PETSC_DIR=/opt/homebrew/opt/petsc
export PETSC_ARCH=""

编译方法为:

1
2
3
4
5
mpicxx -O3 test.cpp -o test.out \
-I/opt/homebrew/opt/petsc/include \
-L/opt/homebrew/opt/petsc/lib -lpetsc
# or use pkg-config
mpicxx -O3 test.cpp -o test.out $(pkg-config --cflags --libs petsc)

可以用下面代码进行测试:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
mpicxx -O3 -x c++ - -o petsc_test $(pkg-config --cflags --libs petsc) << 'EOF'
#include <petscvec.h>
#include <petscsys.h>
static char help[] = "Simple PETSc example.\n";
int main(int argc, char **argv) {
PetscCall(PetscInitialize(&argc, &argv, NULL, help));
char version[256];
PetscCall(PetscGetVersion(version, sizeof(version)));
PetscPrintf(PETSC_COMM_WORLD, "%s\n", version);
Vec x; PetscInt n = 4;
PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
PetscCall(VecSetSizes(x, PETSC_DECIDE, n));
PetscCall(VecSetFromOptions(x));
PetscCall(VecSet(x, 1.0));
PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
PetscCall(VecDestroy(&x));
PetscCall(PetscFinalize());
return 0;
}
EOF
mpirun -n 2 ./petsc_test
# PETSc Release Version 3.24.2, Nov 29, 2025
# Vec Object: 2 MPI processes
# type: mpi
# Process [0]
# 1.
# 1.
# Process [1]
# 1.
# 1.

petsc4py

可以通过pip来安装petsc的python binding,从而在python中方便的使用petsc:

1
python -m pip install petsc petsc4py

可以用下面代码进行测试:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import sys
import petsc4py
petsc4py.init(sys.argv)
from petsc4py import PETSc
import numpy as np
import numba
from numba import *
import timeit
from contextlib import contextmanager

@contextmanager
def timeit_ctx(label="elapsed"):
start = timeit.default_timer()
yield
end = timeit.default_timer()
PETSc.Sys.Print(f"{label}: {(end - start) * 1000:.2f} ms")

# compute y = sin(x) + x^2
@jit(nopython=True, cache=True, fastmath=True, parallel=True)
def kernel(n, x, y):
for i in prange(n):
val = x[i]
y[i] = np.sin(val) + val * val

n = int(2e8)
x = PETSc.Vec().createMPI(n, comm=PETSc.COMM_WORLD)
y = PETSc.Vec().createMPI(n, comm=PETSc.COMM_WORLD)

x.set(2.0)
y.set(0.0)
x_np = x.getArray()

with timeit_ctx("Numba Compile"):
kernel.compile((int64, float64[:], float64[:]))

with timeit_ctx("PETSc"):
with x as x_loc, y as y_loc:
size_loc = x.getLocalSize()
kernel(size_loc, x_loc, y_loc)

with timeit_ctx("NumPy"):
y_np = np.sin(x_np) + x_np**2

if PETSc.COMM_WORLD.getRank() == 0:
PETSc.Sys.Print(f"True value: {np.sin(2.0) + 4.0:.2f}")
PETSc.Sys.Print(f"PETSc value: {y.getValue(0):.2f}")

x.destroy()
y.destroy()

运行结果:

1
2
3
4
5
6
mpirun -np 8 -x NUMBA_NUM_THREADS=1 -x OMP_NUM_THREADS=1 -x KMP_WARNINGS=0 python petsc_test.py
# Numba Compile: 371.68 ms
# PETSc: 656.95 ms
# NumPy: 1735.71 ms
# True value: 4.91
# PETSc value: 4.91

petsc with CUDA

需要提前设置petsc的位置PETSC_DIR和python的位置CONDA_PYTHON

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# export PETSC_DIR=...
# export CONDA_PYTHON=...

COMMON_OPTS="--with-cc=gcc --with-cxx=g++ --with-fc=gfortran \
--with-cuda=1 \
--with-cudac=nvcc \
--with-clanguage=cxx \
--with-petsc4py=1 \
--with-python-exec=$CONDA_PYTHON \
--download-mpich \
--download-hypre"

cd "$PETSC_DIR"

# debug version
ARCH_DEBUG="arch-cuda-cxx-debug"
echo -e "\n\033[0;32m[Step 1/2] Configuring Debug Version ($ARCH_DEBUG)...\033[0m"
./configure $COMMON_OPTS --with-debugging=1 PETSC_ARCH=$ARCH_DEBUG
echo -e "\n\033[0;32mBuilding Debug Version ($ARCH_DEBUG)...\033[0m"
make PETSC_DIR=$PETSC_DIR PETSC_ARCH=$ARCH_DEBUG all

# optimized version
ARCH_OPT="arch-cuda-cxx-opt"
echo -e "\n\033[0;32m[Step 2/2] Configuring Optimized Version ($ARCH_OPT)...\033[0m"
./configure $COMMON_OPTS --with-debugging=0 PETSC_ARCH=$ARCH_OPT
echo -e "\n\033[0;32mBuilding Optimized Version ($ARCH_OPT)...\033[0m"
make PETSC_DIR=$PETSC_DIR PETSC_ARCH=$ARCH_OPT all

echo -e "\n\033[0;32m========================================\033[0m"
echo -e "\033[0;32m Installation Successful! \033[0m"
echo -e "\033[0;32m========================================\033[0m"

echo -e "\nAdd the following to your ~/.bashrc or run in your terminal to use the \033[1mOptimized\033[0m version:"
echo "---------------------------------------------------"
echo "export PETSC_DIR=$PETSC_DIR"
echo "export PETSC_ARCH=$ARCH_OPT"
echo "export PYTHONPATH=\\$PETSC_DIR/\\$PETSC_ARCH/lib:$PYTHONPATH"
echo "---------------------------------------------------"

echo -e "\nSwitch to \033[1mDebug\033[0m version when developing:"
echo "---------------------------------------------------"
echo "export PETSC_ARCH=$ARCH_DEBUG"
echo "export PYTHONPATH=\\$PETSC_DIR/\\$PETSC_ARCH/lib:$PYTHONPATH"
echo "---------------------------------------------------"

检查是否正常安装好了:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from petsc4py import PETSc
PETSc.Options().insertString("-vec_type cuda")
opts = PETSc.Options()
for k in opts.getAll():
PETSc.Sys.Print(f"{k} = {opts.getString(k)}")

n = 1024
v = PETSc.Vec().create(PETSc.COMM_SELF)
v.setSizes(n)
v.setFromOptions()
v.set(1.0)
v.assemblyBegin(); v.assemblyEnd()

v.scale(2.0)
print("Vec type:", v.getType())
print("Vec norm:", v.norm())
v.destroy()

# vec_type = cuda
# Vec type: seqcuda
# Vec norm: 64.0