Comparison of code between Scheme+ (an enhanced scheme language) and Python

damien_mattei · September 16, 2023, 8:57am

I present a comparison of the same algorithm between two languages: Python versus Scheme+, a language based and written in Scheme.

The algorithm is based on fundamental deep learning : a multi-layer neural network simulate 2 boolean functions NOT and XOR and a mathematical function SIN (sinus).

First i present the original Python program and his output, after the Scheme+ version.

Here is the original python code: (note that the code use sometimes some NumPy library features)

file: exo_retropropagationNhidden_layers_matrix_v2.py

# L'algorithme de rétro-propagation du gradient dans un
# réseau de neurones avec N couches cachées.

#  D. Mattei

# python3.11 -O exo_retropropagationNhidden_layers_matrix_v2.py

from random import seed, uniform,randint
seed(1789)     # si vous voulez avoir les mêmes tirages aléatoires à chaque exécution du fichier !
from math import exp, pow, pi, sin , tanh , atan
from Matrix import Matrix
from time import time


# sigmoïde
def σ(z̃):
    try:
        s = 1/(1+exp(-z̃))
    except OverflowError as e:
        # Somehow no exception is caught here...
        #print('OverflowError...')
        #print("x=",x)
        #sys.exit(1)
        s = 0
    except Exception as e:
        print(e)
    
    return s


# not used
def σࠤ(z):
    return σ(z)*(1-σ(z))

# not used
def tanhࠤ(x):
    return 1 - tanh(x)**2

def der_tanh(z,z̃):
    return 1 - z**2

def der_σ(z,z̃):
    return z*(1-z)


def leaky_RELU(z̃):
    return max(0.01*z̃,z̃)

def der_leaky_RELU(z,z̃):
    return 1 if z>=0 else 0.01

def RELU(z̃):
    return max(0,z̃)

def der_RELU(z,z̃):
    return 1 if z>=0 else 0

def swish(z̃):
    return z̃ * σ(z̃)

def der_swish(z,z̃):
    return z + σ(z̃) * (1 - z)

def der_atan(z,z̃):
    return 1 / (1 + pow(z̃,2))



class ReseauRetroPropagation():
    
    def __init__(self,nc=[2,3,1], nbiter=3, ηₛ=1.0 , #ηₑ=0.0001 ,
                 activation_function_hidden_layer=tanh,
                 activation_function_output_layer=tanh,
                 activation_function_hidden_layer_derivative=der_tanh,
                 activation_function_output_layer_derivative=der_tanh):
        
        '''Construit un réseau de neurones avec plusieurs couches cachées. Il y a des entrées (+ biais),
        des neurones dans les couches cachées (+ biais) et des neurones en sortie dont les nombres sont définies dans nc.'''

        lnc = len(nc) # the total of all layer including input, output and hidden layers
        
        # on crée le tableau des couches du réseau
        self.z = [ [0] * n for n in nc ] # les entrées concrètes seront fournies avec la méthode accepte
        self.z̃ = [ [0] * n for n in nc ] # z̃[0] is not used as z[0] is x, the initial data 
              
        self.ᐁ = [ [0] * n for n in nc ]    # gradients locaux des neurones cachés et gradient sur la couche de sortie
        # ᐁ[0] is useless but keep same index with z
        
        self.nbiter = nbiter

        # "learning rate" 
        self.ηₛ = ηₛ
        #self.ηₑ = ηₑ
        self.error = 0

        self.activation_function_hidden_layer = activation_function_hidden_layer
        self.activation_function_output_layer = activation_function_output_layer
        self.activation_function_hidden_layer_derivative = activation_function_hidden_layer_derivative
        self.activation_function_output_layer_derivative = activation_function_output_layer_derivative


    # forward propagation
    
    # fusionne accept et propage
    # z_* sans le coef. 1 constant pour le bias
    def accepte_et_propage(self,x):         # on entre des entrées et on les propage

        # note: i just reference the variables for code readness (hide all the self keyword)
        z = self.z
        z̃ = self.z̃ 
        M = self.M
        
        if len(x) != len(z[0]):
            raise ValueError("Mauvais nombre d'entrées !")
        
        #z[0] = x       # on ne touche pas au biais
        self.z[0] = z[0] = x
        
        # propagation des entrées vers la sortie

        n = len(z)

        # hidden layers
        for i in range(n-2) :
            
            # calcul des stimuli reçus par la couche cachée d'indice i+1 à-partir de la précedente

            # create a list with 1 in front for the bias coefficient
            z_1 = [1] + z[i]
            
            z̃[i+1] = M[i] * z_1 # z̃ = matrix * iterable (list here) , return a list
            
            # calcul des réponses des neurones cachés
            #z[i+1] = list(map(σ,z̃))
            #z[i+1] = list(map(tanh,z̃))
            z[i+1] = list(map(self.activation_function_hidden_layer,z̃[i+1])) 

            # update the variable when necessary
            self.z[i+1] = z[i+1]
            self.z̃[i+1] = z̃[i+1]


        # output layer
        i = i + 1

        # calcul des stimuli reçus par la couche cachée d'indice i+1 à-partir de la précedente

        # create a list with 1 in front for the bias coefficient
        z_1 = [1] + z[i]
        
        z̃[i+1] = M[i] * z_1 # z̃ = matrix * iterable (list here)
        
        # calcul des réponses des neurones de la couche de sortie
        z[i+1] = list(map(self.activation_function_output_layer,z̃[i+1])) 
        
        # update the variable when necessary
        self.z[i+1] = z[i+1]
        self.z̃[i+1] = z̃[i+1]
    
        #print("accepte_et_propage : self.z[i+1] ="); print(self.z[i+1])
        #return self.z[i+1]              # et retour des sorties



    def print_matrix_elements(self,M):
        
        for e in M:
            print(e)

        print()

    # not used
    def η(self,n):
        ηₛ = self.ηₛ
        N = self.nbiter
        # if n < N/3:
        #     return ((0.1 - ηₛ) / (N/3)) * n + ηₛ
        # else:
        #     return 1/10**(1+int(((n-N/3)/(N-N/3))*10))

        return ηₛ/10**(int((n/N)*10))

    
    def apprentissage(self,Lexemples):  # apprentissage des poids par une liste d'exemples

        nbiter = self.nbiter

        ip = 0                          # numéro de l'exemple courant

        # TODO: take in account the error as stop point
        for it in range(nbiter):   # le nombre d'itérations est fixé !

            error = 0.0                     # l'erreur totale pour cet exemple
            
            (x,y) = Lexemples[ip]         # un nouvel exemple à apprendre
                      
            # PROPAGATION VERS L'AVANT
            self.accepte_et_propage(x)       # sorties obtenues sur l'exemple courant, self.z_k et z_j sont mis à jour
              
            # RETRO_PROPAGATION VERS L'ARRIERE, EN DEUX TEMPS

            # note: i just use local reference for the variables for code readness (hide all the self keyword)
            z = self.z
            z̃ = self.z̃

            i = i_output_layer = len(z) - 1 # start at index i of the ouput layer

            ᐁ = self.ᐁ
            
            ns = len(z[i])
            
            # TEMPS 1. calcul des gradients locaux sur la couche k de sortie (les erreurs commises)
            for k in range(ns):
                ᐁ[i][k] = y[k] - z[i][k]       # gradient sur un neurone de sortie (erreur locale)
                error += pow(ᐁ[i][k],2)                              # l'erreur quadratique totale
                
            error *= 0.5
            
            if it == nbiter-1 : self.error = error                # mémorisation de l'erreur totale à la dernière itération

            # modification des poids de la matrice de transition de la derniére couche de neurones cachés à la couche de sortie
            M = self.M # read/write data

            # because i dislike self keyword in my mathematical expressions i recopy the variables

            η = self.ηₛ

            #η = self.η(it)
            
            #print(η)
                    
            # (test fait: modifier la matrice apres le calcul du gradient de la couche j (maintenant i-1) , conclusion: ne change pas la convergence de l'algo)

            მzⳆმz̃ = self.activation_function_output_layer_derivative
            
            self.modification_des_poids(M[i-1],η,z[i-1],z[i],z̃[i],ᐁ[i],მzⳆმz̃)

            #self.print_matrix_elements(M)
            
                        
            # TEMPS 2. calcul des gradients locaux sur les couches cachées (rétro-propagation), sauf pour le bias constant

            მzⳆმz̃ = self.activation_function_hidden_layer_derivative

            for i in reversed(range(1,i_output_layer)) :

                nc = len(z[i])
                ns = len(z[i+1])
                for j in range(nc):
                    
                    ᐁ[i][j] = sum(მzⳆმz̃(z[i+1][k],z̃[i+1][k]) * M[i][k][j+1] * ᐁ[i+1][k] for k in range(ns))

                # modification des poids de la matrice de transition de la couche i-1 à i
         
                self.modification_des_poids(M[i-1],η,z[i-1],z[i],z̃[i],ᐁ[i],მzⳆმz̃)

            #self.print_matrix_elements(M)

            # et l'on passe à l'exemple suivant
            
            #ip = (ip + 1) % len(Lexemples)      # parcours des exemples en ordre circulaire
            ip = randint(0,len(Lexemples) - 1)
           

   
            
    # modify coefficients layer
    def modification_des_poids(self,M_i_o,η,z_input,z_output,z̃_output,ᐁ_i_o,მzⳆმz̃): # derivative of activation function of the layer
        #print(z̃_output)
        
        # the length of output and input layer with coeff. used for bias update             
        (len_layer_output, len_layer_input_plus1forBias) = M_i_o.dim()
        
        len_layer_input = len_layer_input_plus1forBias - 1
       
        for j in range(len_layer_output):  # line
            
            for i in range(len_layer_input): # column , parcours les colonnes de la ligne sauf le bias

                M_i_o[j][i+1] -= -η * z_input[i] * მzⳆმz̃(z_output[j],z̃_output[j]) * ᐁ_i_o[j]

            # and update the bias
            M_i_o[j][0] -= -η * 1.0 * მzⳆმz̃(z_output[j],z̃_output[j]) * ᐁ_i_o[j]

    def test(self,Lexemples):
        print('Test des exemples :')
        error = 0
        for (entree,sortie_attendue) in Lexemples:
            self.accepte_et_propage(entree)
            print(entree,'-->',self.z[len(self.z)-1],': on attendait',sortie_attendue)
            ᐁ = sortie_attendue[0] - self.z[len(self.z)-1][0] # erreur sur un element
            error += pow(ᐁ,2)                              # l'erreur quadratique totale
                
        error *= 0.5
        print("Error on examples=") ; print(error)




            
if __name__ == '__main__':
    

    print('################## NOT ##################')
    r1 = ReseauRetroPropagation([1,2,1],50000,10,σ,σ,der_σ,der_σ)
    Lexemples1 = [[[1],[0]],[[0],[1]]]
    START = time() ; r1.apprentissage(Lexemples1) ; END = time()
    r1.test(Lexemples1)
    print('APPRENTISSAGE sur {} itérations, time = {:.2f}s'.format(r1.nbiter,END-START))
    print()
    print("Error=") ; print(r1.error)
    
    print('################## XOR ##################')
    # 2 entrées (+ bias), 1 neurone en sortie
    r2 = ReseauRetroPropagation([2,3,1],250000,10,σ,σ,der_σ,der_σ)
    #r2 = ReseauRetroPropagation([2,3,1],50000,1,0.001,σ,σ,der_σ,der_σ)
    #r2 = ReseauRetroPropagation([2,8,10,7,1],50000,0.1,0.001,σ,σ,der_σ,der_σ) 
    Lexemples2 = [[[1,0],[1]], [[0,0],[0]], [[0,1],[1]], [[1,1],[0]]]
    START = time() ; r2.apprentissage(Lexemples2) ; END = time()
    print('APPRENTISSAGE sur {} itérations, time = {:.2f}s'.format(r2.nbiter,END-START))
    r2.test(Lexemples2)
    print("Error=") ; print(r2.error)
    #print("r2.M=",r2.M)

    print('################## SINUS ##################')
    #r3 = ReseauRetroPropagation([1,30,30,30,1],50000,0.1,tanh,tanh,der_tanh,der_tanh)
    #r3 = ReseauRetroPropagation([1,30,30,30,1],50000,0.01,atan,tanh,der_atan,der_tanh)
    #r3 = ReseauRetroPropagation([1,70,70,1],50000,0.01,tanh,tanh,der_tanh,der_tanh)
    r3 = ReseauRetroPropagation([1,70,70,1],50000,0.01,atan,tanh,der_atan,der_tanh)
    Llearning = [ [[x],[sin(x)]] for x in [ uniform(-pi,pi) for n in range(10000)] ]
    Ltest = [ [[x],[sin(x)]] for x in [ uniform(-pi/2,pi/2) for n in range(10)] ]
    START = time() ; r3.apprentissage(Llearning) ; END = time()
    print('APPRENTISSAGE sur {} itérations, time = {:.2f}s'.format(r3.nbiter,END-START))
    r3.test(Ltest)
    print("Error=") ; print(r3.error)

the Python output
(some output and comments in code has been truncated due to limit of 32000 characters in Discourse )

(base) mattei@mbp-touch-bar AI_Deep_Learning % python3.11 -O exo_retropropagationNhidden_layers_matrix_v2.py
################## NOT ##################
Test des exemples :
[1] --> [0.00148701369562664] : on attendait [0]
[0] --> [0.9984886736566694] : on attendait [1]
Error on examples=
2.247658523513152e-06
APPRENTISSAGE sur 50000 itérations, time = 1.20s

Error=
1.1421795194838212e-06
################## XOR ##################
APPRENTISSAGE sur 250000 itérations, time = 7.02s
Test des exemples :
[1, 0] --> [0.998285295446957] : on attendait [1]
[0, 0] --> [0.00038355618841602525] : on attendait [0]
[0, 1] --> [0.9982994366618798] : on attendait [1]
[1, 1] --> [0.0021330419170872326] : on attendait [0]
Error on examples=
5.264555270454156e-06
Error=
1.4703763914601483e-06
################## SINUS ##################
APPRENTISSAGE sur 50000 itérations, time = 180.41s
Test des exemples :
[0.620848536169476] --> [0.5858217567721616] : on attendait [0.5817255565858611]
[0.5984362064181683] --> [0.5695010014480877] : on attendait [0.56335112898119]
[-0.13101865345613994] --> [-0.09942646583350077] : on attendait [-0.1306441331372787]
[0.30838292152563485] --> [0.3049571823968051] : on attendait [0.30351824014297935]
[0.3328861098219065] --> [0.3295845699745558] : on attendait [0.32677205712564156]
[0.03856460805884221] --> [0.04942242189696839] : on attendait [0.03855504970243054]
[-1.2545793545012092] --> [-0.9579666772778113] : on attendait [-0.9504186368706252]
[-0.8538799411228164] --> [-0.7419741117640238] : on attendait [-0.7538354396044733]
[-0.8818313716845816] --> [-0.7579757059169164] : on attendait [-0.7719044463122184]
[0.7045792110501594] --> [0.645393392206271] : on attendait [0.6477132944484959]
Error on examples=
0.0007771394342524107
Error=
4.223490468883356e-05

Now here is the Scheme+ version: (note that the code include some syntax specific to Racket class system of external flomat library)

file : exo_retropropagationNhidden_layers_matrix_v2+.rkt

#lang reader "../Scheme-PLUS-for-Racket/main/Scheme-PLUS-for-Racket/SRFI/SRFI-105.rkt" ; SRFI-105 Curly-infix-expressions


; Deep Learning : back propagation, gradient descent, neural network with N hidden layers

; L'algorithme de rétro-propagation du gradient dans un
; réseau de neurones avec N couches cachées.

;  D. Mattei	


; MacOS users : use MacVim to show ALL the characters of this file (not Emacs, not Aquamacs)
;; jeu de couleurs: Torte ou Koehler

;; use in command line:
;; (base) mattei@pc-mattei:~/Dropbox/git/AI_Deep_Learning$ racket
;; Welcome to Racket v8.6 [cs].
;; > (require "exo_retropropagationNhidden_layers_matrix_v2+.rkt")


(provide (all-defined-out)) 

(require srfi/42) ; Eager Comprehensions

(require "matrix.rkt")


(include "../Scheme-PLUS-for-Racket/main/Scheme-PLUS-for-Racket/Scheme+.rkt")

(require "../Scheme-PLUS-for-Racket/main/Scheme-PLUS-for-Racket/overload.rkt")

(include "../Scheme-PLUS-for-Racket/main/Scheme-PLUS-for-Racket/assignment.rkt") ; all sort of assignment with <- 
(include "../Scheme-PLUS-for-Racket/main/Scheme-PLUS-for-Racket/apply-square-brackets.rkt") ; all sort of indexing with [] 


; first stage overloading
(define-overload-existing-operator +)
(define-overload-existing-operator *)
(define-overload-procedure uniform)

; to take in account the new overloaded operators scheme-infix.rkt must be included
; after the overloading first stage definition of operators
(include "../Scheme-PLUS-for-Racket/main/Scheme-PLUS-for-Racket/scheme-infix.rkt")


; second stage overloading
(overload-existing-operator + vector-append (vector? vector?))

(overload-existing-operator * multiply-flomat-vector (flomat? vector?))

;; return a number in ]-1,1[
;; the dummy parameter is needed by a flomat procedure
(define (uniform-dummy dummy) {(random) * (if {(random 2) = 0}  1 -1)}); we randomly choose the sign of the random number
		         	

; return a random number between [inf, sup]
(define (uniform-interval inf sup)
  {gap <+ {sup - inf}}
  {inf + gap * (random)})

(overload-procedure uniform uniform-dummy (number?))

(overload-procedure uniform uniform-interval (number? number?))


; sigmoïde
(define (σ z̃) 
  {1 / {1 + (exp (- z̃))}})

; some derivatives
(define (der_tanh z z̃)
  {1 - z ** 2})	

(define (der_σ z z̃)
    {z * {1 - z}})

(define (der_atan z z̃)
  {1 / {1 + z̃ ** 2}})



#| this is a Scheme multi line comment,
but will it works with Scheme+ parser?
|#

;> (for ([x (reversed (in-range 0 3))]) (display x) (newline) )
;2
;1
;0
(define-syntax reversed ; same as Python : reversed(range(0,3))
  
  		(syntax-rules ()

    			((_ (name end)) (begin
					  (unless (equal? (quote in-range) (quote name)) 
	       					(error "first argument is not in-range:" (quote name)))
					  (in-range {end - 1} -1 -1)))

			((_ (name start end)) (begin
					  	(unless (equal? (quote in-range) (quote name)) 
	       						(error "first argument is not in-range:" (quote name)))
					  	(in-range {end - 1} {start - 1} -1)))))





; (make-object ReseauRetroPropagation)
; (define net (new ReseauRetroPropagation (nc #(1 2 3 4))))
; (get-field z net)
; '#(#(0) #(0 0) #(0 0 0) #(0 0 0 0))

(define ReseauRetroPropagation

  (class object%

	 (super-new)
	 
	 ; this is the initialisation parameters
	 (init-field (nc #(2 3 1)) ;; on crée le tableau des couches du réseau
	       	     (nbiter 10000)
	       	     (ηₛ 1.0)
		     (activation_function_hidden_layer tanh)
		     (activation_function_output_layer tanh)
		     (activation_function_hidden_layer_derivative der_tanh)
		     (activation_function_output_layer_derivative der_tanh))
	 
	 {lnc <+ (vector-length nc)}

	 ; les entrées concrètes seront fournies avec la méthode accepte
	 ;; (field (z (vector-ec (: i (vector-length nc)) (make-vector {nc[i]} 0))))
	 (field (z (vector-ec (:vector lg nc)
			      (make-vector lg 0))))
	 ;; (field (z (for/vector ([lg nc])
	 ;; 		       (make-vector lg 0))))

	 (display "z=") (display z) (newline)


	 ; z̃[0] is not used as z[0] is x, the initial data
	 (field (z̃ (vector-ec (:vector lg nc)
			      (make-vector lg 0))))

	 (display "z̃=") (display z̃) (newline)

	 (define-pointwise-unary uniform) ;; flomat library feature

	 {M <+ (vector-ec (: n {lnc - 1}) ; vectors by eager comprehension (SRFI 42)
			  (.uniform! (zeros {nc[n + 1]} {nc[n] + 1})))} ;; flomat Matrix
					   
	 (display "M=") (display M) (newline)

	 (field (ᐁ (for/vector ([lg nc])
			       (make-vector lg 0))))

	 
	 (display "ᐁ=") (display ᐁ) (newline)

	 (display "nbiter=") (display nbiter) (newline)

	 (field (error 0))


	 ; forward propagation
    
         ; z_* sans le coef. 1 constant pour le bias
	 (define (accepte_et_propage x) ; on entre des entrées et on les propage
		
		(when {vector-length(x) ≠ vector-length(z[0])} 
		  (display "Mauvais nombre d'entrées !") (newline)
		  (exit #f))

		{z[0] <- x} ; on ne touche pas au biais

		;; propagation des entrées vers la sortie

		{n <+ vector-length(z)}
		;(display "n=") (display n) (newline)

		;; hidden layers
		(declare z_1)

		(declare i) ; because the variable will be used outside the 'for' loop too
		
		;(for-racket ([i (in-range {n - 2})]) ; warning : in Racket the variable 'i' 
		; is only seen inside the 'for-racket' but i need it ouside too
		(for ({i <- 0} {i < n - 2} {i <- i + 1}) ; personnal 'for' definition as in Javascript,C,C++,Java

		     ;; calcul des stimuli reçus par la couche cachée d'indice i+1 à-partir de la précedente

		     ;; create an array with 1 in front for the bias coefficient
		    
		     {z_1 <- #(1) + z[i]} ; + operator has been overloaded to append scheme vectors

		     {z̃[i + 1] <- M[i] * z_1} ; z̃ = matrix * vector , return a vector

		     ;(display "z̃[i + 1] = ") (display {z̃[i + 1]}) (newline)

		     {z[i + 1] <- vector-map(activation_function_hidden_layer z̃[i + 1])}

		     ;(display "z[i + 1] = ") (display {z[i + 1]}) (newline)

		  ) ; end for


		 ; output layer
        	 ;{i <- i + 1} ; was used with for-racket
		 ;(display "i=") (display i) (newline)


		 ; calcul des stimuli reçus par la couche cachée d'indice i+1 à-partir de la précedente

        	 ; create a list with 1 in front for the bias coefficient
        	 {z_1 <- #(1) + z[i]}

		 {z̃[i + 1] <- M[i] * z_1} ; z̃ = matrix * vector , return a vector

		 ; calcul des réponses des neurones de la couche de sortie
		 {z[i + 1] <- vector-map(activation_function_output_layer z̃[i + 1])}
		 ;(display "z[i + 1] = ") (display {z[i + 1]}) (newline)
	
	) ; end define/public

	 
	(define/public (apprentissage Lexemples) ; apprentissage des poids par une liste d'exemples
	   
	  {ip <+ 0} ; numéro de l'exemple courant

	  (declare x y)
	  (for-racket ([it (in-range nbiter)]) ; le nombre d'itérations est fixé !

		      (when {it % 100 = 0}
			(display it)(newline))

		      ;(display it)(newline)
		      
		      {err <+ 0.0} ; l'erreur totale pour cet exemple

		      {(x y) <- Lexemples[ip]}         ; un nouvel exemple à apprendre

		      ;; PROPAGATION VERS L'AVANT
		      (accepte_et_propage x)       ; sorties obtenues sur l'exemple courant, self.z_k et z_j sont mis à jour

		      ; RETRO_PROPAGATION VERS L'ARRIERE, EN DEUX TEMPS

		      {i <+ i_output_layer <+ {vector-length(z) - 1}} ; start at index i of the ouput layer

		      {ns <+ vector-length(z[i])}
		     

		      ;; TEMPS 1. calcul des gradients locaux sur la couche k de sortie (les erreurs commises)
		      (for-racket ([k (in-range ns)])
				  {ᐁ[i][k] <- y[k] - z[i][k]}     ; gradient sur un neurone de sortie (erreur locale)
				  {err <- err + ᐁ[i][k] ** 2})    ; l'erreur quadratique totale

		      {err <- err * 0.5}

		      (when {it = nbiter - 1}
			{error <- err})               ; mémorisation de l'erreur totale à la dernière itération


		      ;; modification des poids de la matrice de transition de la derniére couche de neurones cachés à la couche de sortie

		      {მzⳆმz̃ <+ activation_function_output_layer_derivative}

		      {modification_des_poids(M[i - 1] ηₛ z[i - 1] z[i] z̃[i] ᐁ[i] მzⳆმz̃)}

		      ;; TEMPS 2. calcul des gradients locaux sur les couches cachées (rétro-propagation), sauf pour le bias constant

		      {მzⳆმz̃ <- activation_function_hidden_layer_derivative}

		      (for-racket ([i (reversed (in-range 1 i_output_layer))])
				{nc <+ vector-length(z[i])}
				{ns <+ vector-length(z[i + 1])}
				(for-racket ([j (in-range nc)])
					{k <+ 0}
					{ᐁ[i][j] <- (for/sum ([k (in-range ns)])
							{მzⳆმz̃(z[i + 1][k] z̃[i + 1][k]) * M[i][k {j + 1}] * ᐁ[i + 1][k]})})
				; modification des poids de la matrice de transition de la couche i-1 à i
         			{modification_des_poids(M[i - 1] ηₛ  z[i - 1] z[i] z̃[i] ᐁ[i] მzⳆმz̃)})

		      ; et l'on passe à l'exemple suivant
            
            	      {ip <- random(vector-length(Lexemples))}

		 ) ; end for it
	  ) ; end define/public


	 
	; modify coefficients layer
	(define (modification_des_poids M_i_o η z_input z_output z̃_output ᐁ_i_o მzⳆმz̃) ; derivative of activation function of the layer
	 
	  ; the length of output and input layer with coeff. used for bias update
	  {(len_layer_output len_layer_input_plus1forBias) <+ (dim M_i_o)} ; use values and define-values to create bindings
        
	  {len_layer_input <+ {len_layer_input_plus1forBias - 1}}

	  (for-racket ([j (in-range len_layer_output)]) ; line
		
		(for-racket ([i (in-range len_layer_input)]) ; column , parcours les colonnes de la ligne sauf le bias
		    
		    {M_i_o[j {i + 1}]  <-  M_i_o[j {i + 1}] - {(- η) * z_input[i] * მzⳆმz̃(z_output[j] z̃_output[j]) * ᐁ_i_o[j]}})

		; and update the bias
            	{M_i_o[j 0]  <-  M_i_o[j 0] - {(- η) * 1.0 * მzⳆმz̃(z_output[j] z̃_output[j]) * ᐁ_i_o[j]}}))
	

	(define/public (test Lexemples)

          (display "Test des exemples :") (newline)
          {err <+ 0}

	  (declare entree sortie_attendue ᐁ)
	  (for-racket ([entree-sortie_attendue Lexemples])
		{(entree sortie_attendue) <- entree-sortie_attendue} ; use pairs in Scheme instead of tuples and vectors in Python
		(accepte_et_propage entree)
		(printf "~a --> ~a : on attendait ~a" entree {z[vector-length(z) - 1]} sortie_attendue) (newline)
		{ᐁ <- sortie_attendue[0] - z[vector-length(z) - 1][0]} ; erreur sur un element
		{error <- error + ᐁ ** 2})                             ; l'erreur quadratique totale
		
	  {err <- err * 0.5}
	  (display "Error on examples=") (display error) (newline))



   ) ; end class

) ; end define





(printf "################## NOT ##################")
(newline)

{r1 <+ (new ReseauRetroPropagation (nc #(1 2 1))
				   (nbiter 50000)
				   (ηₛ 10)
				   (activation_function_hidden_layer σ)
				   (activation_function_output_layer σ)
				   (activation_function_hidden_layer_derivative der_σ)
				   (activation_function_output_layer_derivative der_σ))}

{Lexemples1 <+ #((#(1) . #(0)) (#(0) . #(1)))}  ; use pairs in Scheme instead of vectors in Python

(send r1 apprentissage Lexemples1)

(send r1 test Lexemples1)

(newline)


(printf "################## XOR ##################")
(newline)

{r2 <+ (new ReseauRetroPropagation (nc #(2 3 1))
				   (nbiter 250000)
				   (ηₛ 10)
				   (activation_function_hidden_layer σ)
				   (activation_function_output_layer σ)
				   (activation_function_hidden_layer_derivative der_σ)
				   (activation_function_output_layer_derivative der_σ))}

{Lexemples2 <+ #( (#(1 0) . #(1))  (#(0 0) . #(0))  (#(0 1) . #(1))  (#(1 1) . #(0)))}  ; use pairs in Scheme instead of vectors in Python

(send r2 apprentissage Lexemples2)

(send r2 test Lexemples2)




(printf "################## SINUS ##################")
(newline)

{r3 <+ (new ReseauRetroPropagation (nc #(1 70 70 1))
				   (nbiter 50000)
				   (ηₛ 0.01)
				   (activation_function_hidden_layer atan)
				   (activation_function_output_layer tanh)
				   (activation_function_hidden_layer_derivative der_atan)
				   (activation_function_output_layer_derivative der_tanh))}

{Llearning <+ (vector-ec (:list x (list-ec (: n 10000)
					   (uniform (- pi) pi)))
			 (cons (vector x) (vector (sin x))))   ; vectors by eager comprehension (SRFI 42)
	   }  ; use pairs in Scheme instead of vectors in Python

{Ltest <+ (vector-ec (:list x (list-ec (: n 10)
				       (uniform {(- pi) / 2} {pi / 2})))
		     (cons (vector x) (vector (sin x))))   ; vectors by eager comprehension (SRFI 42)
       }  ; use pairs in Scheme instead of vectors in Python


(send r3 apprentissage Llearning)

(send r3 test Ltest)

and the output of code:


Welcome to DrRacket, version 8.6 [cs].
Language: reader "../Scheme-PLUS-for-Racket/main/Scheme-PLUS-for-Racket/SRFI/SRFI-105.rkt", with debugging; memory limit: 14000 MB.

################## NOT ##################
z=#(#(0) #(0 0) #(0))
z̃=#(#(0) #(0 0) #(0))
M=#((flomat: ((0.7191038484623657 0.5220062447658971) (0.8590859714173438 0.7678423821253739))) (flomat: ((-0.3449301854114697 0.24027850827619662 0.4877697409726927))))
ᐁ=#(#(0) #(0 0) #(0))
nbiter=50000
(object:ReseauRetroPropagation ...)
'#((#(1) . #(0)) (#(0) . #(1)))
0
Test des exemples :
#(1) --> #(0.0020417819126145073) : on attendait #(0)
#(0) --> #(0.9977152849624217) : on attendait #(1)
Error on examples=1.1473766485056049e-5


################## XOR ##################
z=#(#(0 0) #(0 0 0) #(0))
z̃=#(#(0 0) #(0 0 0) #(0))
M=#((flomat: ((0.870364658310043 -0.5300068995546167 0.9612426066162211) (0.4040654569504818 -0.5246283863956818 -0.5314425643393894) (-0.053038296297184574 -0.3547420573389037 0.3417644829226221))) (flomat: ((0.13672253732529652 -0.8094594372360882 -0.301966023354077 -0.18825622046303328))))
ᐁ=#(#(0 0) #(0 0 0) #(0))
nbiter=250000
(object:ReseauRetroPropagation ...)
'#((#(1 0) . #(1)) (#(0 0) . #(0)) (#(0 1) . #(1)) (#(1 1) . #(0)))

Test des exemples :
#(1 0) --> #(0.9982945690274773) : on attendait #(1)
#(0 0) --> #(0.00023046719114048114) : on attendait #(0)
#(0 1) --> #(0.9982757352419109) : on attendait #(1)
#(1 1) --> #(0.0021066729886745233) : on attendait #(0)
Error on examples=1.2592594532243627e-5


################## SINUS ##################
z=#(#(0) #(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) #(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) #(0))
z̃=#(#(0) #(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) #(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) #(0))
M=#((flomat 70 2 ...) (flomat 70 71 ...) (flomat: ((-0.5860193953597999 -0.765712813071037 0.5842271019982261 0.54199322749269 
....
0.8050058713278784 -0.4197467256587276 -0.9132786122993454 0.34333903957496403))))
ᐁ=#(#(0) #(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) #(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0) #(0))
nbiter=50000
(object:ReseauRetroPropagation ...)
'#((#(-1.0289303409522037) . #(-0.8567478182100667))
   (#(0.2537482835913112) . #(0.25103397131710614))
  ....


Test des exemples :
#(-0.703235483801749) --> #(-0.6733414643985518) : on attendait #(-0.6466889454810666)
#(1.3565912350169103) --> #(0.9751223006885534) : on attendait #(0.9771456771484469)
#(0.09227900071669493) --> #(0.07708815030306242) : on attendait #(0.0921480908180361)
#(0.4646445353940236) --> #(0.44552382224073234) : on attendait #(0.4481050511914433)
#(0.08622560706397664) --> #(0.07184379406533917) : on attendait #(0.08611880095729915)
#(1.2390914317654804) --> #(0.955626830482106) : on attendait #(0.9454885095328692)
#(0.08937877865035615) --> #(0.07456514227139113) : on attendait #(0.08925982479403971)
#(0.10344509142135827) --> #(0.0869960812142322) : on attendait #(0.10326069773652688)
#(0.34818977723209565) --> #(0.34910583332135947) : on attendait #(0.3411967726899541)
#(-0.08702153621045006) --> #(-0.08846634804100845) : on attendait #(-0.08691174576549757)
Error on examples=0.0020535693465753386

i also have a custom matrix (do not use flomat) version , file : exo_retropropagationNhidden_layers_matrix_v2_by_vectors+.rkt

All example source code are available here:

Scheme+ is available here:

Topic		Replies	Views
Check out my scheme/racket interpreter I made in Python (for fun, nothing serious) Show & Tell	0	146	December 10, 2023
Learning racket/scheme - how do you get to the practicing stage? General	64	3037	May 2, 2022
Version 3.0 of Scheme+ for Racket Show & Tell	3	360	August 10, 2022
Language implementation design decisions and trade offs Questions & Answers question	11	509	May 14, 2022
A real code example in infix/prefix Scheme+ for Racket Show & Tell infix	0	54	January 14, 2025

Comparison of code between Scheme+ (an enhanced scheme language) and Python

Related topics