Memory_Allocated_GB = 5 # GiB,需要占用的显存大小,单位GiB
GPU_ID = 2 # 需要卡显存的GPU ID
import torch from torch import cuda import time import psutil def get_gpu_used_information(): device_count = cuda.device_count() used_information = [] for gpu_id in range(device_count): gpu_processes_list = cuda.list_gpu_processes(gpu_id).split('\n') for process in gpu_processes_list: if process.startswith('process'): inf = process.split() used_information.append((gpu_id, inf[1], inf[3][:-4], psutil.Process(int(inf[1])).username().split('\\')[1])) return used_information def print_information(infs): print('\033[0:32m+----------------------------------------------------------+\033[0m') print('\033[0:32m| GPU PID MEMORY-USED USER-NAME |\033[0m') for inf in infs: record = "{:>8d} {:>9d} {:>14d}MiB {}".format(inf[0], int(inf[1]), int(inf[2]), str(inf[3]).rjust(15)) print('\033[0:32m|' + record + ' ' * 6 + '|\033[0m') print('\033[0:32m+----------------------------------------------------------+\033[0m') if __name__ == '__main__': Memory_Allocated_GB = 5 # GiB GPU_ID = 2 ace = None while True: print('\033[0:33mNow: ' + time.strftime('%Y-%m-%d %H:%M:%S') + '\033[0m') used_inf = get_gpu_used_information() print_information(used_inf) try: device = 'cuda:{}'.format(GPU_ID) ace = torch.zeros([Memory_Allocated_GB, 1024, 1024, 256], device=device) except RuntimeError: ace = None for sec in range(10)[::-1]: print("\r\033[0:31mRace to control GPU: {} {}GiB failed, tra again after {}s...\033[0m".format( GPU_ID, Memory_Allocated_GB, sec), flush=True, end='') time.sleep(1) print() else: print("\033[1:35mGPU: {}, memory allocated: {} GB\033[0m".format(GPU_ID, Memory_Allocated_GB)) while True: print("\r\033[1:35mGPU: {}, hold-up time: {}\033[0m".format(GPU_ID, time.strftime('%Y-%m-%d %H:%M:%S')), end='', flush=True) time.sleep(1)