下面是一个完整的单块 Python 代码,实现了具有梯度消失问题的深度神经网络,以及如何使用整流线性单元 (ReLU) 激活函数来缓解它。
import numpy as np # Define the sigmoid activation function def sigmoid(x): return 1 / (1 + np.exp(-x)) # Define the derivative of the sigmoid function def sigmoid_derivative(x): return x * (1 - x) # Define the ReLU activation function def relu(x): return np.maximum(0, x) # Define the derivative of the ReLU function def relu_derivative(x): return np.where(x <= 0, 0, 1) # Define the neural network class class NeuralNetwork: def __init__(self, input_size, hidden_size, output_size): self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size # Initialize weights and biases for the hidden layer self.weights_hidden = np.random.rand(self.input_size, self.hidden_size) self.biases_hidden = np.random.rand(1, self.hidden_size) # Initialize weights and biases for the output layer self.weights_output = np.random.rand(self.hidden_size, self.output_size) self.biases_output = np.random.rand(1, self.output_size) def forward(self, X): # Calculate the weighted sum and apply ReLU activation for the hidden layer hidden_layer_input = np.dot(X, self.weights_hidden) + self.biases_hidden hidden_layer_output = relu(hidden_layer_input) # Calculate the weighted sum and apply sigmoid activation for the output layer output_layer_input = np.dot(hidden_layer_output, self.weights_output) + self.biases_output output_layer_output = sigmoid(output_layer_input) return output_layer_output # Example usage: if __name__ == "__main__": # Sample input data (4 examples, 3 features each) X = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) # Corresponding target labels (4 examples, 1 label each) y = np.array([[0], [1], [1], [0]]) # Create a neural network with 3 input nodes, 4 hidden nodes, and 1 output node neural_network = NeuralNetwork(input_size=3, hidden_size=4, output_size=1) # Make a forward pass through the neural network to get the predictions predictions = neural_network.forward(X) print("Predictions:") print(predictions)
在这个例子中,我们创建了一个包含 3 个输入节点、4 个隐藏节点和 1 个输出节点的简单神经网络。网络对隐藏层使用 ReLU 激活函数,对输出层使用 sigmoid 激活函数。权重和偏差是随机初始化的。
Predictions: [[0.9363414 ] [0.98761619] [0.9599209 ] [0.99235822]]