GPU多卡压测脚本(矩阵乘法, Torch-based)

import torch
import torch.multiprocessing as mp
import time
import math

def stress_task(gpu_id):
    try:
        device = torch.device(f"cuda:{gpu_id}")
        torch.cuda.set_device(device) # 确保上下文正确

        free_mem, total_mem = torch.cuda.mem_get_info(device)
        
        # 3. 动态计算矩阵大小
        # 设定占用目标为剩余显存的 65% (留 35% 给 PyTorch内核开销,防止 OOM)
        # 此时需要存储 X, Y 以及结果 Z,共 3 个矩阵。每个 float32 占 4 字节。
        # 公式: (N * N * 4 bytes) * 3 matrices <= free_mem * 0.65
        target_mem = free_mem * 0.65
        matrix_size = int(math.sqrt(target_mem / 12))

        free_gb = free_mem / (1024**3)
        total_gb = total_mem / (1024**3)
        print(f"[GPU {gpu_id}] Total: {total_gb:.2f}GB | Free: {free_gb:.2f}GB")
        print(f"[GPU {gpu_id}] Calculated Matrix Size: {matrix_size}x{matrix_size} (Target utilization: ~85%)")

        print(f"[GPU {gpu_id}] Allocating memory...")
        x = torch.randn(matrix_size, matrix_size, device=device)
        y = torch.randn(matrix_size, matrix_size, device=device)
        
        print(f"[GPU {gpu_id}] Starting loop...")

        while True:
            z = torch.mm(x, y)

    except RuntimeError as e:
        print(f"[GPU {gpu_id}] Error: {e}")
        if "out of memory" in str(e):
            print(f"[GPU {gpu_id}] Auto-size was too aggressive. Try lowering the 0.85 factor in code.")
    except KeyboardInterrupt:
        pass

if __name__ == '__main__':
    if not torch.cuda.is_available():
        print("CUDA is not available!")
        exit()

    num_gpus = torch.cuda.device_count()
    print(f"Found {num_gpus} GPUs. Auto-calculating load for each...")

    processes = []
    
    mp.set_start_method('spawn', force=True)

    print("Starting processes... (Press Ctrl+C to stop)")
    start_time = time.time()

    try:
        for i in range(num_gpus):
            p = mp.Process(target=stress_task, args=(i,))
            p.start()
            processes.append(p)
        
        for p in processes:
            p.join()

    except KeyboardInterrupt:
        print(f"\nStop signal received. Terminating all processes...")
        for p in processes:
            if p.is_alive():
                p.terminate()
        print(f"All stopped. Duration: {time.time() - start_time:.2f} seconds")

Cluade Code使用国产平台(如MiniMax,GLM)的配置流程

首先安装node.js: https://nodejs.org/zh-cn/download

安装后通过npm安装pnpm:npm install -g pnpm@latest-10

然后用pnpm安装Cluade Code: pnpm install -g @anthropic-ai/claude-code

网络问题请换源: pnpm config set registry https://registry.npmmirror.com/

然后编辑~/.claude/settings.json,内容国产平台的教程上应该有

由于Cluade Code禁止了一些国家和地区,直接运行会有如下的提示:

,所以需要绕过其验证,编辑~/.claude.json,在json的根目录(也就是{}内)增加一行: “hasCompletedOnboarding”: true,如:

然后保存退出来,运行:

node --eval "
const homeDir = os.homedir();
const filePath = path.join(homeDir, '.claude.json');
if (fs.existsSync(filePath)) {
const content = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
fs.writeFileSync(filePath,JSON.stringify({ …content, hasCompletedOnboarding: true }, 2), 'utf-8');
} else {
fs.writeFileSync(filePath,JSON.stringify({ hasCompletedOnboarding: true }), 'utf-8');
}"

之后进入项目根目录输入claude即可正常使用:

原图像+mask外扩

def crop_image_and_mask(image_path, mask_path, mask_expansion=0):
# 加载图像和掩膜数据
image_nii = nib.load(image_path)
mask_nii = nib.load(mask_path)

# 获取图像和掩膜的 numpy 数组
image_data = image_nii.get_fdata()
mask_data = mask_nii.get_fdata()

# 获取图像的空间分辨率(单位为毫米)
voxel_size = image_nii.header.get_zooms() # 获取像素尺寸(x, y, z方向上的尺寸,单位是毫米)

# 转换 mask_expansion 从毫米到像素
expansion_pixels = [int(mask_expansion / size) for size in voxel_size]

# 获取掩膜中非零值的坐标
mask_nonzero_coords = np.argwhere(mask_data > 0)

# 找到掩膜的最小和最大坐标
min_coords = mask_nonzero_coords.min(axis=0)
max_coords = mask_nonzero_coords.max(axis=0)

# 扩展掩膜的范围
min_coords_expanded = np.maximum(min_coords - expansion_pixels, 0)
max_coords_expanded = np.minimum(max_coords + expansion_pixels, image_data.shape)

# 截取图像和掩膜中对应的部分
cropped_image_data = image_data[min_coords_expanded[0]:max_coords_expanded[0] + 1,
min_coords_expanded[1]:max_coords_expanded[1] + 1,
min_coords_expanded[2]:max_coords_expanded[2] + 1]
cropped_mask_data = mask_data[min_coords_expanded[0]:max_coords_expanded[0] + 1,
min_coords_expanded[1]:max_coords_expanded[1] + 1,
min_coords_expanded[2]:max_coords_expanded[2] + 1]

# 打印扩展后的起始和终止坐标
print(f"Expanded crop start coordinates: {min_coords_expanded}")
print(f"Expanded crop end coordinates: {max_coords_expanded}")

# 创建新的 nifti 图像
cropped_image_nii = nib.Nifti1Image(cropped_image_data, image_nii.affine)
cropped_mask_nii = nib.Nifti1Image(cropped_mask_data, mask_nii.affine)

# 保存截取后的图像和掩膜
nib.save(cropped_image_nii, image_path.replace('image', f'image_ROI_{mask_expansion}_{mask_expansion}_{mask_expansion}'))
nib.save(cropped_mask_nii, mask_path.replace('label', f'label_ROI_{mask_expansion}_{mask_expansion}_{mask_expansion}'))

PCA+梯度下降提取椎骨对称面算法

这个任务主要工作是确定椎骨的中心对称面,为椎弓根螺钉植入提供参考依据,输入的是.nii.gz的单节椎骨分割mask,大致形态如下:

ITK-SNAP下截取的Mask3D可视化示意图

动态效果:

GIF没进度条,而且不能重播,看了一下WP后台的元素改了之后不能可视化,于是就搞了个视频
继续阅读“PCA+梯度下降提取椎骨对称面算法”