fix: missing activation func derivative + send error before update
This commit is contained in:
@@ -46,15 +46,15 @@ class Autoencoder:
|
|||||||
with tqdm(bar_format="{desc} {elapsed} {rate_fmt}") as lbar:
|
with tqdm(bar_format="{desc} {elapsed} {rate_fmt}") as lbar:
|
||||||
while True:
|
while True:
|
||||||
lbar.set_description(
|
lbar.set_description(
|
||||||
f"{LOADER[epoch % len(LOADER)]} Training ({epoch=} error={prev_error:.2f})", # noqa
|
f"{LOADER[epoch % len(LOADER)]} Training ({epoch=} error={float(prev_error):.6f}", # noqa
|
||||||
)
|
)
|
||||||
lbar.update()
|
lbar.update()
|
||||||
error = 0
|
error = 0
|
||||||
for x in data_set:
|
for x in tqdm(data_set, leave=False):
|
||||||
error += self.train(x)
|
error += self.train(x)
|
||||||
error /= len(data_set)
|
error /= len(data_set)
|
||||||
derror = prev_error - error
|
derror = prev_error - error
|
||||||
if derror <= 0 or abs(derror) < 1e-8:
|
if derror <= 0 or abs(derror) < 1e-4:
|
||||||
no_improv += 1
|
no_improv += 1
|
||||||
else:
|
else:
|
||||||
no_improv = 0
|
no_improv = 0
|
||||||
@@ -84,8 +84,10 @@ class Autoencoder:
|
|||||||
return out, code
|
return out, code
|
||||||
|
|
||||||
def save(self, path: str):
|
def save(self, path: str):
|
||||||
|
path = path.removesuffix('.npy')
|
||||||
np.save(path, self)
|
np.save(path, self)
|
||||||
|
|
||||||
def load(path: str) -> 'Autoencoder':
|
def load(path: str) -> 'Autoencoder':
|
||||||
|
path = path.removesuffix('.npy') + '.npy'
|
||||||
data = np.load(path, allow_pickle=True)
|
data = np.load(path, allow_pickle=True)
|
||||||
return data.item()
|
return data.item()
|
||||||
|
|||||||
40
layers.py
40
layers.py
@@ -1,6 +1,6 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import types
|
import types
|
||||||
from utils import regularize
|
from utils import normalize
|
||||||
|
|
||||||
|
|
||||||
class NNLayer:
|
class NNLayer:
|
||||||
@@ -12,28 +12,34 @@ class NNLayer:
|
|||||||
self.W = np.random.uniform(-1, 1, (in_size, out_size))
|
self.W = np.random.uniform(-1, 1, (in_size, out_size))
|
||||||
self.B = np.zeros((out_size))
|
self.B = np.zeros((out_size))
|
||||||
self.lr = lr
|
self.lr = lr
|
||||||
self.last_input = None
|
self.input = None
|
||||||
self.last_output = None
|
self.output = None
|
||||||
|
self.output_linear = None
|
||||||
self.activation_func = activation_func
|
self.activation_func = activation_func
|
||||||
|
|
||||||
def forward(self, V: np.ndarray) -> np.ndarray:
|
def forward(self, V: np.ndarray) -> np.ndarray:
|
||||||
self.last_input = V
|
self.input = normalize(V)
|
||||||
res = V @ self.W + self.B
|
self.output_linear = self.input @ self.W + self.B
|
||||||
self.last_output = regularize(self.activation_func(res))
|
self.output = self.activation_func(
|
||||||
return self.last_output
|
self.output_linear
|
||||||
|
)
|
||||||
|
return self.output
|
||||||
|
|
||||||
def backprop(self, error: np.ndarray) -> np.ndarray:
|
def backprop(self, error: np.ndarray) -> np.ndarray:
|
||||||
dW = np.outer(self.last_input, error)
|
error *= self.activation_func(self.output_linear, True)
|
||||||
self.W -= self.lr * dW
|
ret = self.W @ error
|
||||||
self.B -= self.lr * error
|
dW = np.outer(self.input, error) * self.lr
|
||||||
return error @ self.W.T
|
dB = error * self.lr
|
||||||
|
self.W -= dW
|
||||||
|
self.B -= dB
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
class DeepNNLayer:
|
class DeepNNLayer:
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
layers: list[int],
|
layers: list[int],
|
||||||
lr: float,
|
lr: float,
|
||||||
activation_func):
|
activation_func: types.FunctionType):
|
||||||
self.layers: list[NNLayer] = []
|
self.layers: list[NNLayer] = []
|
||||||
for i in range(len(layers) - 1):
|
for i in range(len(layers) - 1):
|
||||||
self.layers.append(
|
self.layers.append(
|
||||||
@@ -45,13 +51,11 @@ class DeepNNLayer:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def forward(self, v: np.ndarray) -> np.ndarray:
|
def forward(self, v: np.ndarray) -> np.ndarray:
|
||||||
v_i = v
|
|
||||||
for layer in self.layers:
|
for layer in self.layers:
|
||||||
v_i = layer.forward(v_i)
|
v = layer.forward(v)
|
||||||
return v_i
|
return v
|
||||||
|
|
||||||
def backprop(self, error: np.ndarray) -> np.ndarray:
|
def backprop(self, error: np.ndarray) -> np.ndarray:
|
||||||
error_i = error
|
|
||||||
for layer in self.layers[::-1]:
|
for layer in self.layers[::-1]:
|
||||||
error_i = layer.backprop(error_i)
|
error = layer.backprop(error)
|
||||||
return error_i
|
return error
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from autoencoder import Autoencoder
|
from autoencoder import Autoencoder
|
||||||
from utils import relu
|
from utils import relu, regularize
|
||||||
|
|
||||||
|
|
||||||
def load_mnist() -> list[np.ndarray]:
|
def load_mnist() -> list[np.ndarray]:
|
||||||
@@ -18,7 +18,7 @@ def load_mnist() -> list[np.ndarray]:
|
|||||||
|
|
||||||
|
|
||||||
def mnist_train(
|
def mnist_train(
|
||||||
bottleneck: int,
|
filename: str,
|
||||||
max_epoch: int,
|
max_epoch: int,
|
||||||
patience: int,
|
patience: int,
|
||||||
):
|
):
|
||||||
@@ -29,9 +29,9 @@ def mnist_train(
|
|||||||
x_train = x_train / 255
|
x_train = x_train / 255
|
||||||
x_test = x_test / 255
|
x_test = x_test / 255
|
||||||
autoencoder = Autoencoder(
|
autoencoder = Autoencoder(
|
||||||
[in_len, bottleneck],
|
[in_len, 64, 16],
|
||||||
[bottleneck, in_len],
|
[16, 64, in_len],
|
||||||
0.1,
|
0.01,
|
||||||
relu
|
relu
|
||||||
)
|
)
|
||||||
autoencoder.train_dataset(
|
autoencoder.train_dataset(
|
||||||
@@ -39,24 +39,39 @@ def mnist_train(
|
|||||||
max_epoch,
|
max_epoch,
|
||||||
patience,
|
patience,
|
||||||
display_loss=True)
|
display_loss=True)
|
||||||
autoencoder.save("autoencoder_mnist")
|
autoencoder.save(filename)
|
||||||
|
|
||||||
|
|
||||||
def mnist_test():
|
def mnist_test(filename: str):
|
||||||
x_train, _, x_test, _ = load_mnist()
|
x_train, _, x_test, y_test = load_mnist()
|
||||||
in_len = x_train[0].shape[0] * x_train[0].shape[0]
|
in_len = x_train[0].shape[0] * x_train[0].shape[0]
|
||||||
img_shape = x_train[0].shape
|
img_shape = x_train[0].shape
|
||||||
x_train.resize(x_train.shape[0], in_len)
|
x_train.resize(x_train.shape[0], in_len)
|
||||||
x_test.resize(x_test.shape[0], in_len)
|
x_test.resize(x_test.shape[0], in_len)
|
||||||
x_train = x_train / 255
|
x_train = x_train / 255
|
||||||
x_test = x_test / 255
|
x_test = x_test / 255
|
||||||
autoencoder = Autoencoder.load('autoencoder_mnist.npy')
|
autoencoder: Autoencoder = Autoencoder.load(filename)
|
||||||
example: np.ndarray = x_test[np.random.randint(0, len(x_test))]
|
for i in autoencoder.encoder.layers:
|
||||||
output, _ = autoencoder.forward(example.flatten())
|
print(len(i.input), len(i.output))
|
||||||
plt.subplot(1, 2, 1)
|
idx = np.random.randint(0, len(x_test))
|
||||||
plt.matshow(example.reshape(img_shape), fignum=False)
|
example: np.ndarray = x_test[idx]
|
||||||
plt.subplot(1, 2, 2)
|
output, code = autoencoder.forward(example.flatten())
|
||||||
plt.matshow(output.reshape(img_shape), fignum=False)
|
output = regularize(output)
|
||||||
|
plt.subplot(1, 3, 1)
|
||||||
|
plt.matshow(
|
||||||
|
example.reshape(img_shape),
|
||||||
|
fignum=False)
|
||||||
|
plt.title(f"Input ({y_test[idx]})")
|
||||||
|
plt.subplot(1, 3, 2)
|
||||||
|
plt.matshow(
|
||||||
|
output.reshape(img_shape),
|
||||||
|
fignum=False)
|
||||||
|
plt.title(f"Output ({y_test[idx]})")
|
||||||
|
plt.subplot(1, 3, 3)
|
||||||
|
s = int(np.ceil(np.sqrt(code.shape[0])))
|
||||||
|
code.resize((s, s), refcheck=False)
|
||||||
|
plt.matshow(code, fignum=False)
|
||||||
|
plt.title(f"Code ({y_test[idx]})")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
@@ -65,13 +80,34 @@ if __name__ == "__main__":
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('-b', type=int, nargs='?', default=50)
|
parser.add_argument(
|
||||||
parser.add_argument('-e', type=int, nargs='?', default=1000)
|
'-e',
|
||||||
parser.add_argument('-p', type=int, nargs='?', default=5)
|
type=int,
|
||||||
parser.add_argument('-r', action='store_true')
|
nargs='?',
|
||||||
|
default=1000,
|
||||||
|
help='Max epochs'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-p',
|
||||||
|
type=int,
|
||||||
|
nargs='?',
|
||||||
|
default=5,
|
||||||
|
help='Patience'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-m',
|
||||||
|
type=str, nargs='?',
|
||||||
|
default='autoencoder_mnist.npy',
|
||||||
|
help='Model filename to save in run mode or load in training mode'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-r',
|
||||||
|
action='store_true',
|
||||||
|
help='Run mode'
|
||||||
|
)
|
||||||
args = parser.parse_args(sys.argv[1:])
|
args = parser.parse_args(sys.argv[1:])
|
||||||
if args.r:
|
if args.r:
|
||||||
mnist_test()
|
mnist_test(args.m)
|
||||||
else:
|
else:
|
||||||
mnist_train(args.b, args.e, args.p)
|
mnist_train(args.m, args.e, args.p)
|
||||||
mnist_test()
|
mnist_test(args.m)
|
||||||
|
|||||||
Reference in New Issue
Block a user