/* ****************************************************************** * HISTORY * 15-Oct-94 Jeff Shufelt (js), Carnegie Mellon University * Prepared for 15-681, Fall 1994. * * 5-7-98 Peter Asaro, Univ. of Illinois * converted from C to C++ * ****************************************************************** */ #include #include #include #include "backprop.h" #define ABS(x) (((x) > 0.0) ? (x) : (-(x))) void fastcopy(char *to,double *from,int len) { register char *to2,*from2; register int i,l2; to2 = (char *)(to); from2 = (char *)(from); l2 = (len); for (i = 0; i < l2; i++) *to2++ = *from2++; } void fastcopy(double *to,double *from,int len) { register char *to2,*from2; register int i,l2; to2 = (char *)(to); from2 = (char *)(from); l2 = (len); for (i = 0; i < l2; i++) *to2++ = *from2++; } void fastcopy(double *to,char *from,unsigned long len) { register char *to2,*from2; register int i,l2; to2 = (char *)(to); from2 = (char *)(from); l2 = (len); for (i = 0; i < l2; i++) *to2++ = *from2++; } /*** Return random number between 0.0 and 1.0 ***/ double drnd() { double x = 0; do { x = 100000 * (double) rand() / (double) BIGRND; } while (x > 1.0); return (x); } /*** Return random number between -1.0 and 1.0 ***/ double dpn1() { return((drnd() * 2.0) - 1.0); } /*** The squashing function. Currently, it's a sigmoid. ***/ double squash(double x) { return (1.0 / (1.0 + exp(-x))); } /*** Allocate 1d array of doubles ***/ double *alloc_1d_dbl(int n) { double *newarr; newarr = (double *) malloc ((unsigned) (n * sizeof (double))); if (newarr == NULL) { printf("ALLOC_1D_DBL: Couldn't allocate array of doubles\n"); return (NULL); } return (newarr); } /*** Allocate 2d array of doubles ***/ double **alloc_2d_dbl(int m,int n) { int i; double **newarr; newarr = (double **) malloc ((unsigned) (m * sizeof (double *))); if (newarr == NULL) { printf("ALLOC_2D_DBL: Couldn't allocate array of dbl ptrs\n"); return (NULL); } for (i = 0; i < m; i++) { newarr[i] = alloc_1d_dbl(n); } return (newarr); } void bpnn_randomize_weights(double **w,int m,int n) { int i, j; for (i = 0; i <= m; i++) { for (j = 0; j <= n; j++) { w[i][j] = dpn1(); } } } void bpnn_zero_weights(double **w,int m,int n) { int i, j; for (i = 0; i <= m; i++) { for (j = 0; j <= n; j++) { w[i][j] = 0.0; } } } void bpnn_initialize(unsigned int seed) { printf("Random number generator seed: %d\n", seed); srand(seed); } BPNN *bpnn_internal_create(int n_in,int n_hidden,int n_out,int n_context) { BPNN *newnet; newnet = (BPNN *) malloc (sizeof (BPNN)); if (newnet == NULL) { printf("BPNN_CREATE: Couldn't allocate neural network\n"); return (NULL); } newnet->input_n = n_in; newnet->context_n = n_context; newnet->hidden_n = n_hidden; newnet->output_n = n_out; newnet->input_units = alloc_1d_dbl(n_in + 1); newnet->hidden_units = alloc_1d_dbl(n_hidden + 1); newnet->output_units = alloc_1d_dbl(n_out + 1); newnet->hidden_delta = alloc_1d_dbl(n_hidden + 1); newnet->output_delta = alloc_1d_dbl(n_out + 1); newnet->target = alloc_1d_dbl(n_out + 1); if (n_context == 0) { //no recurrent layer newnet->input_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); newnet->hidden_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); newnet->input_prev_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); newnet->hidden_prev_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); } else { //recurrent layer newnet->context_units = alloc_1d_dbl(n_context + 1); newnet->context_delta = alloc_1d_dbl(n_context + 1); newnet->input_weights = alloc_2d_dbl(n_in + 1, n_context + 1); newnet->context_weights = alloc_2d_dbl(n_context + 1, n_hidden + 1); newnet->hidden_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); newnet->input_prev_weights = alloc_2d_dbl(n_in + 1, n_context + 1); newnet->context_prev_weights = alloc_2d_dbl(n_context + 1, n_hidden + 1); newnet->hidden_prev_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); } return (newnet); } void bpnn_free(BPNN *net) { int n1, n2, n3, i; n1 = net->input_n; n2 = net->context_n; n3 = net->hidden_n; free((char *) net->input_units); free((char *) net->hidden_units); free((char *) net->output_units); free((char *) net->hidden_delta); free((char *) net->output_delta); free((char *) net->target); for (i = 0; i <= n1; i++) { free((char *) net->input_weights[i]); free((char *) net->input_prev_weights[i]); } free((char *) net->input_weights); free((char *) net->input_prev_weights); if (n2 > 0) { //only if there is a context layer for (i = 0; i <= n2; i++) { free((char *) net->context_weights[i]); free((char *) net->context_prev_weights[i]); } free((char *) net->context_units); free((char *) net->context_delta); free((char *) net->context_weights); free((char *) net->context_prev_weights); } for (i = 0; i <= n3; i++) { free((char *) net->hidden_weights[i]); free((char *) net->hidden_prev_weights[i]); } free((char *) net->hidden_weights); free((char *) net->hidden_prev_weights); free((char *) net); } /*** Creates a new fully-connected network from scratch, with the given numbers of input, hidden, and output units. Threshold units are automatically included. All weights are randomly initialized. Space is also allocated for temporary storage (momentum weights, error computations, etc). ***/ BPNN *bpnn_create(int n_in,int n_hidden,int n_out) { BPNN *newnet; newnet = bpnn_internal_create(n_in, n_hidden, n_out, 0); #ifdef INITZERO bpnn_zero_weights(newnet->input_weights, n_in, n_hidden); #else bpnn_randomize_weights(newnet->input_weights, n_in, n_hidden); #endif bpnn_randomize_weights(newnet->hidden_weights, n_hidden, n_out); bpnn_zero_weights(newnet->input_prev_weights, n_in, n_hidden); bpnn_zero_weights(newnet->hidden_prev_weights, n_hidden, n_out); return (newnet); } BPNN *bpnn_create(int n_in,int n_hidden,int n_out,int n_context) { BPNN *newnet; newnet = bpnn_internal_create(n_in, n_hidden, n_out, n_context); #ifdef INITZERO bpnn_zero_weights(newnet->input_weights, n_in, n_context); #else bpnn_randomize_weights(newnet->input_weights, n_in, n_context); #endif bpnn_randomize_weights(newnet->context_weights, n_context, n_hidden); bpnn_randomize_weights(newnet->hidden_weights, n_hidden, n_out); bpnn_zero_weights(newnet->input_prev_weights, n_in, n_context); bpnn_zero_weights(newnet->context_prev_weights, n_context, n_hidden); bpnn_zero_weights(newnet->hidden_prev_weights, n_hidden, n_out); return (newnet); } void bpnn_layerforward(double *l1,double *l2,double **conn,int n1,int n2) { double sum; int j, k; /*** Set up thresholding unit ***/ l1[0] = 1.0; /*** For each unit in second layer ***/ for (j = 1; j <= n2; j++) { /*** Compute weighted sum of its inputs ***/ sum = 0.0; for (k = 0; k <= n1; k++) { sum += conn[k][j] * l1[k]; } l2[j] = squash(sum); } } void bpnn_layerrecforward(double *l1,double *lrec,double **conn,int n1,int n2) { double sum; int j, k; /*** Set up thresholding unit ***/ l1[0] = 1.0; /*** For each unit in second layer ***/ for (j = 1; j <= n2; j++) { /*** Compute weighted sum of its inputs ***/ sum = 0.0; for (k = 0; k <= n1; k++) { sum += conn[k][j] * l1[k] + lrec[j]; } lrec[j] = squash(sum); } } void bpnn_output_error(double *delta,double *target,double *output,int nj,double *err) { int j; double o, t, errsum; errsum = 0.0; for (j = 1; j <= nj; j++) { o = output[j]; t = target[j]; delta[j] = o * (1.0 - o) * (t - o); errsum += ABS(delta[j]); } *err = errsum; } void bpnn_hidden_error(double *delta_h,int nh,double *delta_o,int no,double **who,double *hidden,double *err) { int j, k; double h, sum, errsum; errsum = 0.0; for (j = 1; j <= nh; j++) { h = hidden[j]; sum = 0.0; for (k = 1; k <= no; k++) { sum += delta_o[k] * who[j][k]; } delta_h[j] = h * (1.0 - h) * sum; errsum += ABS(delta_h[j]); } *err = errsum; } void bpnn_context_error(double *delta_c,int nc,double *delta_h,int nh,double **wch,double *context,double *err) { int j, k; double c, sum, errsum; errsum = 0.0; for (j = 1; j <= nc; j++) { c = context[j]; sum = 0.0; for (k = 1; k <= nh; k++) { sum += delta_h[k] * wch[j][k]; } delta_c[j] = c * (1.0 - c) * sum; errsum += ABS(delta_c[j]); } *err = errsum; } void bpnn_adjust_weights(double *delta,int ndelta,double *ly,int nly, double **w,double **oldw,double eta,double momentum) { double new_dw; int k, j; ly[0] = 1.0; for (j = 1; j <= ndelta; j++) { for (k = 0; k <= nly; k++) { new_dw = ((eta * delta[j] * ly[k]) + (momentum * oldw[k][j])); w[k][j] += new_dw; oldw[k][j] = new_dw; } } } void bpnn_feedforward(BPNN *net) { int in, con, hid, out; in = net->input_n; con = net->context_n; hid = net->hidden_n; out = net->output_n; /*** Feed forward input activations. ***/ if (con == 0) { //no recurrent layer bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, hid); bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); } else { //recurrent context layer bpnn_layerrecforward(net->input_units, net->context_units, net->input_weights, in, con); bpnn_layerforward(net->context_units, net->hidden_units, net->context_weights, con, hid); bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); } } void bpnn_train(BPNN *net,double eta,double momentum,double *eo,double *eh) { int in, hid, out; double out_err, hid_err; in = net->input_n; hid = net->hidden_n; out = net->output_n; /*** Feed forward input activations. ***/ bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, hid); bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); /*** Compute error on output and hidden units. ***/ bpnn_output_error(net->output_delta, net->target, net->output_units, out, &out_err); bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, net->hidden_weights, net->hidden_units, &hid_err); *eo = out_err; *eh = hid_err; /*** Adjust input and hidden weights. ***/ bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, net->hidden_weights, net->hidden_prev_weights, eta, momentum); bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, net->input_weights, net->input_prev_weights, eta, momentum); } void bpnn_trainrec(BPNN *net,double eta,double momentum,double *eo,double *eh, double *ec) { int in, con, hid, out; double out_err, hid_err, con_err; in = net->input_n; con = net->context_n; hid = net->hidden_n; out = net->output_n; /*** Feed forward input activations. ***/ bpnn_layerrecforward(net->input_units, net->context_units, net->input_weights, in, con); bpnn_layerforward(net->context_units, net->hidden_units, net->context_weights, con, hid); bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); /*** Compute error on output and hidden units. ***/ bpnn_output_error(net->output_delta, net->target, net->output_units, out, &out_err); bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, net->hidden_weights, net->hidden_units, &hid_err); bpnn_context_error(net->context_delta, con, net->hidden_delta, hid, net->context_weights, net->context_units, &con_err); *eo = out_err; *eh = hid_err; *ec = con_err; /*** Adjust input and hidden weights. ***/ bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, net->hidden_weights, net->hidden_prev_weights, eta, momentum); bpnn_adjust_weights(net->hidden_delta, hid, net->context_units, con, net->context_weights, net->context_prev_weights, eta, momentum); bpnn_adjust_weights(net->context_delta, con, net->input_units, in, net->input_weights, net->input_prev_weights, eta, momentum); } void bpnn_save(BPNN *net,char *filename) { int n1, n2, n3, n4, i, j, memcnt; FILE *fd; if ((fd = fopen(filename,"w")) == NULL) { fprintf(stderr,"BPNN_SAVE: Cannot create '%s'\n", filename); exit (0); } n1 = net->input_n; n2 = net->hidden_n; n3 = net->output_n; n4 = net->context_n; printf("Saving %dx%dx%dx%d network to '%s'\n", n1, n2, n3, n4, filename); fflush(stdout); fprintf(fd,"%d ",n1); fprintf(fd,"%d ",n2); fprintf(fd,"%d ",n3); fprintf(fd,"%d ",n4); if (n4 == 0) { //no context layer for (i = 0; i <= n1; i++) { for (j = 0; j <= n2; j++) { fprintf(fd,"%f ",net->input_weights[i][j]); } } for (i = 0; i <= n2; i++) { for (j = 0; j <= n3; j++) { fprintf(fd,"%f ",net->hidden_weights[i][j]); } } } else { //recurrent net for (i = 0; i <= n1; i++) { for (j = 0; j <= n4; j++) { fprintf(fd,"%g ",net->input_weights[i][j]); } } for (i = 0; i <= n4; i++) { for (j = 0; j <= n2; j++) { fprintf(fd,"%g ",net->context_weights[i][j]); } } for (i = 0; i <= n2; i++) { for (j = 0; j <= n3; j++) { fprintf(fd,"%g ",net->hidden_weights[i][j]); } } } fclose(fd); return; } BPNN *bpnn_read(char *filename) { char *mem; BPNN *newnet; int n1, n2, n3, n4, i, j, memcnt; double dvalue, **w; FILE * fd = fopen(filename, "r"); if (fd == NULL) { fprintf(stderr,"BPNN_READ: Cannot open '%s'\n", filename); exit (0); } printf("Reading '%s'\n", filename); fflush(stdout); fscanf(fd,"%d ",&n1); fscanf(fd,"%d ",&n2); fscanf(fd,"%d ",&n3); fscanf(fd,"%d ",&n4); printf("'%s' contains a %dx%dx%dx%d network\n", filename, n1, n2, n3, n4); if (n4 == 0) { printf("Creating a new non-recurrent network . . .\n"); newnet = bpnn_create(n1, n2, n3); } else { printf("Creating a new recurrent network . . .\n"); newnet = bpnn_create(n1, n2, n3, n4); } if (n4 == 0) { //no context layer printf("Reading input weights..."); fflush(stdout); for (i = 0; i <= n1; i++) { for (j = 0; j <= n2; j++) { // dvalue = (double *) malloc (; // fread(&(newnet->input_weights[i][j]), sizeof(float), 1,fd); fscanf(fd,"%f ",&(newnet->input_weights[i][j])); // printf("%f, ",newnet->input_weights[i][j]); } } printf("Done\nNo context weights..."); printf("\nReading hidden weights..."); fflush(stdout); for (i = 0; i <= n2; i++) { for (j = 0; j <= n3; j++) { // fread(&(newnet->hidden_weights[i][j]), sizeof(float), 1,fd); fscanf(fd,"%f ",&(newnet->hidden_weights[i][j])); } } } else { //recurrent net printf("Reading input weights..."); fflush(stdout); for (i = 0; i <= n1; i++) { for (j = 0; j <= n4; j++) { fscanf(fd,"%f ",&(newnet->input_weights[i][j])); } } printf("Done\nReading context weights..."); fflush(stdout); for (i = 0; i <= n4; i++) { for (j = 0; j <= n2; j++) { fscanf(fd,"%f ",&(newnet->context_weights[i][j])); } } printf("Done\nReading hidden weights..."); fflush(stdout); for (i = 0; i <= n2; i++) { for (j = 0; j <= n3; j++) { fscanf(fd,"%f ",&(newnet->hidden_weights[i][j])); } } fclose(fd); } printf("Done\n"); fflush(stdout); // if (n4 == 0) { //no context layer // bpnn_zero_weights(newnet->input_prev_weights, n1, n2); // bpnn_zero_weights(newnet->hidden_prev_weights, n2, n3); // } // else { //recursive layer // bpnn_zero_weights(newnet->input_prev_weights, n1, n4); // bpnn_zero_weights(newnet->context_prev_weights, n4, n3); // bpnn_zero_weights(newnet->hidden_prev_weights, n2, n3); // } return (newnet); }