Neural Network archetype in C++ - 王朝网络宽屏版

/*--------------------------------------------------------------------------------------------

* Neural Network Prototype

* Perceptron Learning (See Russell & Norvig PP 742)

* Terry 10 Nov. 2004

*------------------------------------------------------------------------------------------*/

#include <iostream>

#include <fstream>

#include <string>

#include <math.h>

//#include < cmath >

//#include <conio.h>

#include <time.h>

#include <stdlib.h>

using namespace std;

////////////////////////////////////////////////////////////////////////////////

class NeuralNet {

public:

int RULE;

double UPPERBOUND;

double LOWERBOUND;

static const int NUM_INPUT = 10;

static const int NUM_HIDDEN = 6;

static const int NUM_OUTPUT = 2;

static const int NUM_PATTERNS=200;

int xp[NUM_PATTERNS][NUM_INPUT];

double yp[NUM_PATTERNS][NUM_OUTPUT];

static const int NUM_DATA = 1000;

int xt[NUM_DATA][NUM_INPUT ];

double yt[NUM_DATA][NUM_OUTPUT];

double weights1[NUM_HIDDEN][NUM_INPUT+1];

double weights2[NUM_OUTPUT][NUM_HIDDEN+1];

static const double alpha = 0.1; // learning rate

bool fullytrained;

////////////////////////////////////////////////////////////////////////////////

NeuralNet(){

UPPERBOUND=1;

LOWERBOUND=0;

RULE = 6;

srand( 100);

for(int j=0;j<NUM_HIDDEN;j++) {

weights1[j][0] = 0.0;

for (int k =1; k<=NUM_INPUT; k++){

weights1[j][k] = (double)(rand()%1000)/2000 - 0.25; // set up starting weights - eg all random

// cout <<" w1["<< k <<"]["<< j << "]:" <<weights1[k][j]<<endl;

}

// cout <<endl;

}

for(int i=0;i<NUM_OUTPUT;i++) {

weights2[i][0] = 0.0;

for (int j =0; j<=NUM_HIDDEN; j++){

weights2[i][j] = (double)(rand()%1000)/2000 - 0.25; // set up starting weights - eg all random

// cout <<" w2["<< j <<"]["<< i << "]:" <<weights2[j][i]<<endl;

}

// cout <<endl;

}

~NeuralNet(){}

////////////////////////////////////////////////////////////////////////////////

void PrintWeights(){

for(int j=0;j<NUM_HIDDEN;j++) {

for (int k =0; k<=NUM_INPUT; k++){

cout <<" w1["<< j <<"]["<< k << "]:" <<weights1[j][k]<<endl;}

cout <<endl;}

for(int i=0;i<NUM_OUTPUT;i++) {

for (int j =0; j<=NUM_HIDDEN; j++){

cout <<" w2["<< i <<"]["<< j << "]:" <<weights2[i][j]<<endl;}

cout <<endl;}

}

////////////////////////////////////////////////////////////////////////////////

double CalInput2Hidden(int j, int vct[NUM_INPUT]){

double in_to_hidden = 0.0;

in_to_hidden+=(-1.0) * weights1[j][0];

for(int k=1;k<=NUM_INPUT;k++)

{in_to_hidden += weights1[j][k] * vct[k-1];}

return in_to_hidden;

}

double CalInput2Output(int i, double vct[NUM_HIDDEN]){

double in_to_output = 0.0;

in_to_output+=(-1.0) * weights2[i][0];

for(int j=1;j<=NUM_HIDDEN;j++)

{in_to_output += weights2[i][j] * vct[j-1];}

return in_to_output;

}

////////////////////////////////////////////////////////////////////////////////

double sigmoid( double x ){ return 1.0 /( 1.0 + exp( - x ) ); } // g

double derivative( double x ){ // the derivative of the sigmoid function g'

double sig = sigmoid(x);

return sig * ( 1.0 - sig );}

////////////////////////////////////////////////////////////////////////////////

string FormattedOutput(double input[NUM_OUTPUT]){

if (input[0] > input[1])

return "10";

else

return "01";

}

///////////////////////////////////////////////////////

void CreateTestData(){

cout <<"Generating " << NUM_DATA<< " random testing data ...";

for(int p=0; p<NUM_DATA; p++){

// for(int p=NUM_PATTERNS; p<NUM_DATA; p++){

int sum=0;

// create inputs x[j] and true outputs y[] for pattern p

// cout << "Test Data " << p << ": ";

for (int i=0; i<NUM_INPUT; i++){

xt[p][i]= rand() % 2;

sum+=xt[p][i];

// cout << xt[p][i] << "";

}

yt[p][0]= ( sum<RULE ? UPPERBOUND: LOWERBOUND) ;

yt[p][1]= ( sum>=RULE ? UPPERBOUND: LOWERBOUND) ;

// cout << "->" << FormattedOutput(yt[p]) << "" << endl;

}

cout <<" Done!" << endl;

}

///////////////////////////////////////////////////////

void CreateTrainingPatterns(){

cout <<"\nCenerating " << NUM_PATTERNS<< " training patterns...";

for(int p=0;p<NUM_PATTERNS;p++){

int sum =0;

// create inputs x[j] and true outputs y[] for pattern p

// cout << "Pat " << p << ": ";

for (int i=0; i<NUM_INPUT; i++){

xp[p][i]=rand() % 2 ;

sum+=xp[p][i];

// cout << xp[p][i] << "";

}

// cout << "";

yp[p][0]= ( sum<RULE ? UPPERBOUND: LOWERBOUND) ;

yp[p][1]= ( sum>=RULE ? UPPERBOUND: LOWERBOUND) ;

// cout << "->" << yp[p][0]<< "" << yp[p][1] << " " << endl;

}

cout <<" Done!" << endl;

}

////////////////////////////////////////////////////////

void Train (int number_of_pattern_used){

// fullytrained = false;

int times = 50;

cout <<"Begin training (epoch="<< times<<", using the first "<< number_of_pattern_used << " patterns) ... ";

int curr_pattern_idx=0;

for(int epoch=0;epoch<times;epoch++){ // for each training epoch:

// if (fullytrained) break;

// cout<<"---------------------"<<endl;

double in_to_hidden[NUM_HIDDEN];

double a_hidden[NUM_HIDDEN];

double delta_hidden[NUM_HIDDEN];

double in_to_output[NUM_OUTPUT];

double a_output[NUM_OUTPUT];

double delta_output[NUM_OUTPUT];

double err_output[NUM_OUTPUT]; // errors

for (curr_pattern_idx=0; curr_pattern_idx<number_of_pattern_used ; curr_pattern_idx++) {

curr_pattern_idx++; // for each training pattern p:

if (curr_pattern_idx>=NUM_PATTERNS)

curr_pattern_idx=0;

CalAll(xp[curr_pattern_idx], in_to_hidden, a_hidden, in_to_output, a_output);

// cout <<endl;

for(int i=0;i<NUM_OUTPUT;i++){ // for each output:

err_output[i] = yp[curr_pattern_idx][i] - a_output[i]; // sigmoid(in) is our prediction

delta_output[i] = err_output[i] * derivative( in_to_output[i] );

// cout << "delta_out["<<i<<"]= "<<err_output[i]<<"*g'("<< in_to_output[i]<<")= "<<delta_output[i]<<endl ;

}

// cout <<"Err: "<< (err_output[0]+err_output[1])/2 << "\n"; // print out the errors for each output

/* if ( abs ((err_output[0]+err_output[1])/2) < 0.00001 )

{

fullytrained=true;

cout <<"Fully trained!" << endl;

break;

}

//------------------------------------------

// cout<<endl;

for(int j=0;j<NUM_HIDDEN;j++){

double tmp = 0.0;

// cout<<"sum=";

for(int i=0; i<NUM_OUTPUT; i++) { // compute delta using back-propagation

tmp+= delta_output[i] * weights2[i][j];

// cout<<delta_output[i]<<"*"<<weights2[j][i]<<"+";

}

// cout<<"="<<tmp<<endl;

delta_hidden[j] = derivative(in_to_hidden[j]) * tmp;

// cout << "delta_hidden["<<j<<"]= "<<"g'("<< in_to_hidden[j]<<")*"<<tmp<<"= "<<delta_hidden[j]<<endl ;

}

//-------------------------------------------

// cout <<endl;

for(int i=0;i<NUM_OUTPUT;i++){ // for each output adjust the weights:

weights2[i][0] = weights2[i][0] + alpha * (double)(-1) * delta_output[i];

for(int j=1;j<=NUM_HIDDEN;j++){

// cout<< "w2["<<j<<"]["<<i<<"]="<<weights2[j][i];

weights2[i][j] = weights2[i][j] + alpha * a_hidden[j-1] * delta_output[i];

// cout << "+" <<alpha<<"*"<<a_hidden[j]<<"*"<<delta_output[i]<< "= "<< weights2[j][i]<<endl;

}

//----------------------------------------

// cout <<endl;

for(int j=0;j<NUM_HIDDEN;j++){ // for each hidden unit adjust the weights:

weights1[j][0] = weights1[j][0] + alpha * (double)(-1) * delta_hidden[j];

for(int k=1;k<=NUM_INPUT;k++){

// cout<< "w1["<<j<<"]["<<k<<"] = "<<weights1[j][k];

weights1[j][k] = weights1[j][k] + alpha * xp[curr_pattern_idx][k-1] * delta_hidden[j];

// cout << " + " <<alpha<<" * "<<xp[curr_pattern_idx][k-1]<<" * "<<delta_hidden[j]<<" = "<< weights1[j][k]<<endl;

}

//----------------------------------------

} //end of for

} // end of epoch loop

cout << " Done!"<< endl;

}

///////////////////////////////////////////////////////////

void CalAll(int in[NUM_INPUT], double * i_hid, double *o_hid, double * i_out, double *o_out) {

for(int j=0;j<NUM_HIDDEN;j++){

double in_to_hidden = 0.0;

in_to_hidden = CalInput2Hidden(j, in);

// cout << "input to hiddenlayer unit " <<j<< ":"<< in_to_hidden<<endl;

i_hid[j] = in_to_hidden;

o_hid[j] =sigmoid(in_to_hidden);

// cout << "O_hid["<<j<<"]:"<<o_hid[j]<<"\n";

}

// cout << "-----------\n";

for(int i=0;i<NUM_OUTPUT;i++){ // for each output:

double in_to_output = 0.0;

in_to_output = CalInput2Output(i, o_hid);

// cout << "input to outlayer unit " <<i<< ":"<< in_to_output<<endl;

i_out[i] = in_to_output;

o_out[i]=sigmoid(in_to_output);

// cout << "O_out["<<i<<"]:"<<o_out[i]<<"\n"; // compute the predicted output ;

}

//////////////////////////////////////////////////////////

void Test(){

cout<<"Testing using the randomly generated "<< NUM_DATA<< " testing data..." ;

int num=0;

for(int data_idx=0; data_idx<NUM_DATA; data_idx++){

double youtputpre[NUM_OUTPUT];

double yhiddenpre[NUM_HIDDEN];

double in_to_hidden[NUM_HIDDEN];

double in_to_output[NUM_OUTPUT];

CalAll(xt[data_idx], in_to_hidden, yhiddenpre, in_to_output, youtputpre);

// cout << "Testing " << data_idx << ": ";

// for (int k=0; k<NUM_INPUT; k++){

// cout << xt[data_idx][k];

// }

// cout << "->" << FormattedOutput(yt[data_idx])<< " Predicate: (" <<youtputpre[0]<<" "<<youtputpre[1]<< ")->" <<FormattedOutput(youtputpre) ;

// if (FormattedOutput(yt[data_idx]) !=FormattedOutput(youtputpre) )

// cout <<" (mismatch)" ;

// else

// cout <<" (match)" ;

// cout << endl;

if (FormattedOutput(yt[data_idx])==FormattedOutput(youtputpre)) num+=1;

}

cout <<" Done! ( Rate of Correction : " << 100.0 * (float)num / (float)NUM_DATA << "% )\n" << endl;

}

}; // end of class

//////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////

int main( int argc, char * argv[] ){

for( int number_of_patterns_4_training=0;number_of_patterns_4_training<=200 ; number_of_patterns_4_training+=20)

{

NeuralNet *ass2net = new NeuralNet();

ass2net->CreateTrainingPatterns();

ass2net->CreateTestData();

ass2net->Train(number_of_patterns_4_training);

// ass2net->PrintWeights();

ass2net->Test();

}

/*-------------------------------------------------------------------------------------------

Development Notes:

This is deliberately designed to be close to the form (identifier setc) that Russell& Norvig use. See their perceptron pseudo-code,

Neural Networks section.

-------------------------------------------------------------------------------------------*/