tensorflow ↔ pytorch
Tensor
1
1
test1_pth = torch.tensor([[1., 2.], [3., 4.]])
1
2
3
>>> test1_pth
tensor([[1., 2.],
[3., 4.]])
1
test1_tf = tf.constant([[1., 2.], [3., 4.]])
1
2
3
4
5
>>> test1_tf
tf.Tensor([2. 2.], shape=(2,), dtype=float32)
<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1., 2.],
[3., 4.]], dtype=float32)>
2
1
2
test2_pth = torch.tensor([[1., 2.], [3., 4.]])
test2_pth[0][0] = 9.
1
2
3
>>> test2_pth
tensor([[9., 2.],
[3., 4.]])
1
2
3
test2_tf = tf.constant([[1., 2.], [3., 4.]])
test2_tf = tf.Variable(test2_tf)
test2_tf[0, 0].assign(9.)
1
2
3
4
>>> test2_tf
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[9., 2.],
[3., 4.]], dtype=float32)>
3
1
test3_pth = torch.tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
1
2
3
>>> test3_pth
tensor([[1., 2.],
[3., 4.]], requires_grad=True)
1
2
test3_tf = tf.constant([[1.0, 2.0], [3.0, 4.0]], dtype=tf.float32)
test3_tf = tf.Variable(test3_tf, trainable=True)
1
2
3
4
>>> test3_tf
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[1., 2.],
[3., 4.]], dtype=float32)>
Weight initialization
1.
1
2
3
4
5
6
7
8
9
import torch
import torch.nn as nn
def layer_init(layer, w_a=0, w_b=1, bias_const=-1):
torch.nn.init.uniform_(layer.weight, a=w_a, b=w_b)
torch.nn.init.constant_(layer.bias, bias_const)
return layer
test1 = layer_init1(nn.Linear(3, 4))
1
2
3
4
5
6
7
8
9
10
>>> test1._parameters
OrderedDict([('weight',
Parameter containing:
tensor([[0.4316, 0.7005, 0.3997],
[0.0089, 0.4746, 0.2912],
[0.1438, 0.6648, 0.1226],
[0.4291, 0.0352, 0.6135]], requires_grad=True)),
('bias',
Parameter containing:
tensor([-1., -1., -1., -1.], requires_grad=True))])
1
2
3
4
5
6
7
8
9
10
11
12
13
import tensorflow as tf
from tensorflow.keras.initializers import RandomUniform, Constant
test2 = tf.keras.models.Sequential()
test2.add(tf.keras.Input(shape=(3,)))
test2.add(
tf.keras.layers.Dense(
4,
activation="relu",
kernel_initializer=RandomUniform(0, 1),
bias_initializer=Constant(-1),
)
)
1
2
3
4
5
6
>>> test2.weights
[<tf.Variable 'dense/kernel:0' shape=(3, 4) dtype=float32, numpy=
array([[0.792158 , 0.76683366, 0.71274936, 0.3051616 ],
[0.22424495, 0.65580904, 0.06704581, 0.2954831 ],
[0.7401217 , 0.24613738, 0.8886342 , 0.7413529 ]], dtype=float32)>,
<tf.Variable 'dense/bias:0' shape=(4,) dtype=float32, numpy=array([-1., -1., -1., -1.], dtype=float32)>]
Gradient
1
1
2
3
4
5
6
7
8
x = torch.tensor(0.0, requires_grad=True)
y = 2 * x + 3
y.backward()
grad_of_y_wrt_x = x.grad
print(grad_of_y_wrt_x)
# tensor(2.)
1
2
3
4
5
6
7
x = torch.tensor(0., requires_grad=True)
y = 2 * x + 3
grad_of_y_wrt_x = torch.autograd.grad(y, x)[0]
print(grad_of_y_wrt_x)
# tensor(2.)
1
2
3
4
5
6
7
8
9
import tensorflow as tf
x = tf.Variable(0.)
with tf.GradientTape() as tape:
y = 2 * x + 3
grad_of_y_wrt_x = tape.gradient(y, x)
print(grad_of_y_wrt_x)
# tf.Tensor(2.0, shape=(), dtype=float32)
2
1
2
3
4
5
6
7
W = torch.tensor([[1., 2.], [3., 4.]], requires_grad=True)
b = torch.tensor([[2., 1.], [1., 2.]], requires_grad=True)
x = torch.tensor([[4., 5.], [6., 7.]], requires_grad=True)
y = x.matmul(W) + b
y.backward(torch.ones_like(y))
1
2
3
4
5
>>> b.grad, W.grad
(tensor([[1., 1.],
[1., 1.]]),
tensor([[10., 10.],
[12., 12.]]))
1
2
3
4
5
6
7
8
W = tf.Variable(tf.constant([[1., 2.], [3., 4.]]))
b = tf.Variable(tf.constant([[2., 1.], [1., 2.]]))
x = tf.Variable(tf.constant([[4., 5.], [6., 7.]]))
with tf.GradientTape() as tape:
y = tf.matmul(x, W) + b
grad_of_y_wrt_W_and_b = tape.gradient(y, [b, W])
1
2
3
4
5
6
7
>>> grad_of_y_wrt_W_and_b
[<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1., 1.],
[1., 1.]], dtype=float32)>,
<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[10., 10.],
[12., 12.]], dtype=float32)>]x
3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import torch
t_c = torch.tensor([4])
t_u = torch.tensor([3])
learning_rate = 0.01
def model(t_u, w, b):
return w * t_u + b
def loss_fn(t_p, t_c):
squared_diffs = (t_p - t_c)**2
return squared_diffs.mean()
params = torch.tensor([2.0, 7.0], requires_grad=True)
loss = loss_fn(model(t_u, *params), t_c)
loss.backward()
print(params.grad)
# tensor([54., 18.])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import tensorflow as tf
t_c = tf.constant([4.0])
t_u = tf.constant([3.0])
learning_rate = 0.01
def model(t_u, w, b):
return w * t_u + b
def loss_fn(t_p, t_c):
squared_diffs = tf.square(t_p - t_c)
return tf.reduce_mean(squared_diffs)
params = tf.Variable([2.0, 7.0], dtype=tf.float32)
with tf.GradientTape() as tape:
t_p = model(t_u, *params)
loss = loss_fn(t_p, t_c)
grads = tape.gradient(loss, params)
print(grads)
# tf.Tensor([54. 18.], shape=(2,), dtype=float32)
Model
1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import torch
import torch.nn as nn
class QNetwork(nn.Module):
def __init__(self, env):
super().__init__()
self.network = nn.Sequential(
nn.Linear(np.array(env.single_observation_space.shape).prod(), 120),
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(84, env.single_action_space.n),
)
def forward(self, x):
return self.network(x)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
import tensorflow as tf
class QNetwork(tf.keras.Model):
def __init__(self, env):
super(QNetwork, self).__init__()
self.network = tf.keras.Sequential([
tf.keras.layers.InputLayer(input_shape=(np.array(env.single_observation_space.shape).prod(),)),
tf.keras.layers.Dense(120, activation='relu'),
tf.keras.layers.Dense(84, activation='relu'),
tf.keras.layers.Dense(env.single_action_space.n)
])
def call(self, x):
return self.network(x)
1
2
3
4
5
6
7
8
9
10
import tensorflow as tf
def create_q_network(env):
input_shape = (np.array(env.single_observation_space.shape).prod(),)
inputs = tf.keras.layers.Input(shape=input_shape)
x = tf.keras.layers.Dense(120, activation='relu')(inputs)
x = tf.keras.layers.Dense(84, activation='relu')(x)
outputs = tf.keras.layers.Dense(env.single_action_space.n)(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
return model
2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import torch
import torch.nn as nn
class Actor(nn.Module):
def __init__(self, env):
super().__init__()
self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod(), 256)
self.fc2 = nn.Linear(256, 256)
self.fc_mu = nn.Linear(256, np.prod(env.single_action_space.shape))
# action rescaling
self.register_buffer(
"action_scale", torch.tensor((env.action_space.high - env.action_space.low) / 2.0, dtype=torch.float32)
)
self.register_buffer(
"action_bias", torch.tensor((env.action_space.high + env.action_space.low) / 2.0, dtype=torch.float32)
)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = torch.tanh(self.fc_mu(x))
return x * self.action_scale + self.action_bias
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import tensorflow as tf
class Actor(tf.keras.Model):
def __init__(self, env):
super(Actor, self).__init__()
self.fc1 = tf.keras.layers.Dense(256, activation='relu')
self.fc2 = tf.keras.layers.Dense(256, activation='relu')
self.fc_mu = tf.keras.layers.Dense(np.prod(env.single_action_space.shape), activation='tanh')
# action rescaling
action_scale = (env.action_space.high - env.action_space.low) / 2.0
action_bias = (env.action_space.high + env.action_space.low) / 2.0
self.action_scale = self.add_weight(name='action_scale', shape=action_scale.shape, initializer=tf.constant_initializer(action_scale), trainable=False)
self.action_bias = self.add_weight(name='action_bias', shape=action_bias.shape, initializer=tf.constant_initializer(action_bias), trainable=False)
def call(self, x):
x = self.fc1(x)
x = self.fc2(x)
x = self.fc_mu(x)
actions = x * self.action_scale + self.action_bias
return actions
BackPropagation
1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import torch
import torch.optim as optim
learning_rate = 0.1
x = torch.tensor([2.0])
y = torch.tensor([3.0])
params = torch.tensor([1.0, 3.0], requires_grad=True)
optimizer = optim.SGD([params], lr=learning_rate)
result = x * params[0] + params[1]
loss = (result - y) ** 2
print(f"loss: {loss}")
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"params: {params}")
print(f"params.grad: {params.grad}")
1
2
3
loss: tensor([4.], grad_fn=<PowBackward0>)
params: tensor([0.2000, 2.6000], requires_grad=True)
params.grad: tensor([8., 4.])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import tensorflow as tf
learning_rate = 0.1
x = tf.constant([2.0])
y = tf.constant([3.0])
params = tf.Variable([1.0, 3.0], trainable=True)
optimizer = tf.optimizers.SGD(learning_rate)
with tf.GradientTape() as tape:
result = x * params[0] + params[1]
loss = tf.reduce_mean((result - y) ** 2)
print(f"loss: {loss.numpy()}")
gradients = tape.gradient(loss, [params])
optimizer.apply_gradients(zip(gradients, [params]))
print(f"params: {params.numpy()}")
print(f"params_grad: {gradients[0].numpy()}")
1
2
3
loss: 4.0
params: [0.19999999 2.6 ]
params_grad: [8. 4.]
gather
1.
1
2
3
4
5
6
import torch
test = torch.tensor([[1, 2], [3, 4], [5, 6]])
a = torch.tensor([[0], [0], [1]])
print(test.gather(1, a))
1
2
3
tensor([[1],
[3],
[6]])
1
2
3
4
5
6
import tensorflow as tf
test = tf.constant([[1, 2], [3, 4], [5, 6]])
a = tf.constant([[0], [0], [1]])
print(tf.experimental.numpy.take_along_axis(test, a, axis=1))
1
2
3
4
tf.Tensor(
[[1]
[3]
[6]], shape=(3, 1), dtype=int32)
max
1.
1
2
3
4
5
6
import torch
test = torch.tensor([[1, 2], [3, 4], [5, 6]])
result = test.max(1)
print(result[0])
1
tf.Tensor([2 4 6], shape=(3,), dtype=int32)
1
2
3
4
5
6
import tensorflow as tf
test = tf.constant([[1, 2], [3, 4], [5, 6]])
result = tf.reduce_max(test, 1)
print(result)
1
tensor([2, 4, 6])