/*--------------------------------------------------------------------------------------------
* Neural Network Prototype
*
* Perceptron Learning (See Russell & Norvig PP 742)
*
* Terry 10 Nov. 2004
*------------------------------------------------------------------------------------------*/
#include <iostream>
#include <fstream>
#include <string>
#include <math.h>
//#include < cmath >
//#include <conio.h>
#include <time.h>
#include <stdlib.h>
using namespace std;
////////////////////////////////////////////////////////////////////////////////
class NeuralNet {
public:
int RULE;
double UPPERBOUND;
double LOWERBOUND;
static const int NUM_INPUT = 10;
static const int NUM_HIDDEN = 6;
static const int NUM_OUTPUT = 2;
static const int NUM_PATTERNS=200;
int xp[NUM_PATTERNS][NUM_INPUT];
double yp[NUM_PATTERNS][NUM_OUTPUT];
static const int NUM_DATA = 1000;
int xt[NUM_DATA][NUM_INPUT ];
double yt[NUM_DATA][NUM_OUTPUT];
double weights1[NUM_HIDDEN][NUM_INPUT+1];
double weights2[NUM_OUTPUT][NUM_HIDDEN+1];
static const double alpha = 0.1; // learning rate
bool fullytrained;
////////////////////////////////////////////////////////////////////////////////
NeuralNet(){
UPPERBOUND=1;
LOWERBOUND=0;
RULE = 6;
srand( 100);
for(int j=0;j<NUM_HIDDEN;j++) {
weights1[j][0] = 0.0;
for (int k =1; k<=NUM_INPUT; k++){
weights1[j][k] = (double)(rand()%1000)/2000 - 0.25; // set up starting weights - eg all random
// cout <<" w1["<< k <<"]["<< j << "]:" <<weights1[k][j]<<endl;
}
// cout <<endl;
}
for(int i=0;i<NUM_OUTPUT;i++) {
weights2[i][0] = 0.0;
for (int j =0; j<=NUM_HIDDEN; j++){
weights2[i][j] = (double)(rand()%1000)/2000 - 0.25; // set up starting weights - eg all random
// cout <<" w2["<< j <<"]["<< i << "]:" <<weights2[j][i]<<endl;
}
// cout <<endl;
}
}
~NeuralNet(){}
////////////////////////////////////////////////////////////////////////////////
void PrintWeights(){
for(int j=0;j<NUM_HIDDEN;j++) {
for (int k =0; k<=NUM_INPUT; k++){
cout <<" w1["<< j <<"]["<< k << "]:" <<weights1[j][k]<<endl;}
cout <<endl;}
for(int i=0;i<NUM_OUTPUT;i++) {
for (int j =0; j<=NUM_HIDDEN; j++){
cout <<" w2["<< i <<"]["<< j << "]:" <<weights2[i][j]<<endl;}
cout <<endl;}
}
////////////////////////////////////////////////////////////////////////////////
double CalInput2Hidden(int j, int vct[NUM_INPUT]){
double in_to_hidden = 0.0;
in_to_hidden+=(-1.0) * weights1[j][0];
for(int k=1;k<=NUM_INPUT;k++)
{in_to_hidden += weights1[j][k] * vct[k-1];}
return in_to_hidden;
}
double CalInput2Output(int i, double vct[NUM_HIDDEN]){
double in_to_output = 0.0;
in_to_output+=(-1.0) * weights2[i][0];
for(int j=1;j<=NUM_HIDDEN;j++)
{in_to_output += weights2[i][j] * vct[j-1];}
return in_to_output;
}
////////////////////////////////////////////////////////////////////////////////
double sigmoid( double x ){ return 1.0 /( 1.0 + exp( - x ) ); } // g
double derivative( double x ){ // the derivative of the sigmoid function g'
double sig = sigmoid(x);
return sig * ( 1.0 - sig );}
////////////////////////////////////////////////////////////////////////////////
string FormattedOutput(double input[NUM_OUTPUT]){
if (input[0] > input[1])
return "10";
else
return "01";
}
///////////////////////////////////////////////////////
void CreateTestData(){
cout <<"Generating " << NUM_DATA<< " random testing data ...";
for(int p=0; p<NUM_DATA; p++){
// for(int p=NUM_PATTERNS; p<NUM_DATA; p++){
int sum=0;
// create inputs x[j] and true outputs y[] for pattern p
// cout << "Test Data " << p << ": ";
for (int i=0; i<NUM_INPUT; i++){
xt[p][i]= rand() % 2;
sum+=xt[p][i];
// cout << xt[p][i] << "";
}
yt[p][0]= ( sum<RULE ? UPPERBOUND: LOWERBOUND) ;
yt[p][1]= ( sum>=RULE ? UPPERBOUND: LOWERBOUND) ;
// cout << "->" << FormattedOutput(yt[p]) << "" << endl;
}
cout <<" Done!" << endl;
}
///////////////////////////////////////////////////////
void CreateTrainingPatterns(){
cout <<"\nCenerating " << NUM_PATTERNS<< " training patterns...";
for(int p=0;p<NUM_PATTERNS;p++){
int sum =0;
// create inputs x[j] and true outputs y[] for pattern p
// cout << "Pat " << p << ": ";
for (int i=0; i<NUM_INPUT; i++){
xp[p][i]=rand() % 2 ;
sum+=xp[p][i];
// cout << xp[p][i] << "";
}
// cout << "";
yp[p][0]= ( sum<RULE ? UPPERBOUND: LOWERBOUND) ;
yp[p][1]= ( sum>=RULE ? UPPERBOUND: LOWERBOUND) ;
// cout << "->" << yp[p][0]<< "" << yp[p][1] << " " << endl;
}
cout <<" Done!" << endl;
}
////////////////////////////////////////////////////////
void Train (int number_of_pattern_used){
// fullytrained = false;
int times = 50;
cout <<"Begin training (epoch="<< times<<", using the first "<< number_of_pattern_used << " patterns) ... ";
int curr_pattern_idx=0;
for(int epoch=0;epoch<times;epoch++){ // for each training epoch:
// if (fullytrained) break;
// cout<<"---------------------"<<endl;
double in_to_hidden[NUM_HIDDEN];
double a_hidden[NUM_HIDDEN];
double delta_hidden[NUM_HIDDEN];
double in_to_output[NUM_OUTPUT];
double a_output[NUM_OUTPUT];
double delta_output[NUM_OUTPUT];
double err_output[NUM_OUTPUT]; // errors
for (curr_pattern_idx=0; curr_pattern_idx<number_of_pattern_used ; curr_pattern_idx++) {
/*
curr_pattern_idx++; // for each training pattern p:
if (curr_pattern_idx>=NUM_PATTERNS)
curr_pattern_idx=0;
*/
CalAll(xp[curr_pattern_idx], in_to_hidden, a_hidden, in_to_output, a_output);
// cout <<endl;
for(int i=0;i<NUM_OUTPUT;i++){ // for each output:
err_output[i] = yp[curr_pattern_idx][i] - a_output[i]; // sigmoid(in) is our prediction
delta_output[i] = err_output[i] * derivative( in_to_output[i] );
// cout << "delta_out["<<i<<"]= "<<err_output[i]<<"*g'("<< in_to_output[i]<<")= "<<delta_output[i]<<endl ;
}
// cout <<"Err: "<< (err_output[0]+err_output[1])/2 << "\n"; // print out the errors for each output
/* if ( abs ((err_output[0]+err_output[1])/2) < 0.00001 )
{
fullytrained=true;
cout <<"Fully trained!" << endl;
break;
}
*/
//------------------------------------------
// cout<<endl;
for(int j=0;j<NUM_HIDDEN;j++){
double tmp = 0.0;
// cout<<"sum=";
for(int i=0; i<NUM_OUTPUT; i++) { // compute delta using back-propagation
tmp+= delta_output[i] * weights2[i][j];
// cout<<delta_output[i]<<"*"<<weights2[j][i]<<"+";
}
// cout<<"="<<tmp<<endl;
delta_hidden[j] = derivative(in_to_hidden[j]) * tmp;
// cout << "delta_hidden["<<j<<"]= "<<"g'("<< in_to_hidden[j]<<")*"<<tmp<<"= "<<delta_hidden[j]<<endl ;
}
//-------------------------------------------
// cout <<endl;
for(int i=0;i<NUM_OUTPUT;i++){ // for each output adjust the weights:
weights2[i][0] = weights2[i][0] + alpha * (double)(-1) * delta_output[i];
for(int j=1;j<=NUM_HIDDEN;j++){
// cout<< "w2["<<j<<"]["<<i<<"]="<<weights2[j][i];
weights2[i][j] = weights2[i][j] + alpha * a_hidden[j-1] * delta_output[i];
// cout << "+" <<alpha<<"*"<<a_hidden[j]<<"*"<<delta_output[i]<< "= "<< weights2[j][i]<<endl;
}
}
//----------------------------------------
// cout <<endl;
for(int j=0;j<NUM_HIDDEN;j++){ // for each hidden unit adjust the weights:
weights1[j][0] = weights1[j][0] + alpha * (double)(-1) * delta_hidden[j];
for(int k=1;k<=NUM_INPUT;k++){
// cout<< "w1["<<j<<"]["<<k<<"] = "<<weights1[j][k];
weights1[j][k] = weights1[j][k] + alpha * xp[curr_pattern_idx][k-1] * delta_hidden[j];
// cout << " + " <<alpha<<" * "<<xp[curr_pattern_idx][k-1]<<" * "<<delta_hidden[j]<<" = "<< weights1[j][k]<<endl;
}
}
//----------------------------------------
} //end of for
} // end of epoch loop
cout << " Done!"<< endl;
}
///////////////////////////////////////////////////////////
void CalAll(int in[NUM_INPUT], double * i_hid, double *o_hid, double * i_out, double *o_out) {
for(int j=0;j<NUM_HIDDEN;j++){
double in_to_hidden = 0.0;
in_to_hidden = CalInput2Hidden(j, in);
// cout << "input to hiddenlayer unit " <<j<< ":"<< in_to_hidden<<endl;
i_hid[j] = in_to_hidden;
o_hid[j] =sigmoid(in_to_hidden);
// cout << "O_hid["<<j<<"]:"<<o_hid[j]<<"\n";
}
// cout << "-----------\n";
for(int i=0;i<NUM_OUTPUT;i++){ // for each output:
double in_to_output = 0.0;
in_to_output = CalInput2Output(i, o_hid);
// cout << "input to outlayer unit " <<i<< ":"<< in_to_output<<endl;
i_out[i] = in_to_output;
o_out[i]=sigmoid(in_to_output);
// cout << "O_out["<<i<<"]:"<<o_out[i]<<"\n"; // compute the predicted output ;
}
}
//////////////////////////////////////////////////////////
void Test(){
cout<<"Testing using the randomly generated "<< NUM_DATA<< " testing data..." ;
int num=0;
for(int data_idx=0; data_idx<NUM_DATA; data_idx++){
double youtputpre[NUM_OUTPUT];
double yhiddenpre[NUM_HIDDEN];
double in_to_hidden[NUM_HIDDEN];
double in_to_output[NUM_OUTPUT];
CalAll(xt[data_idx], in_to_hidden, yhiddenpre, in_to_output, youtputpre);
// cout << "Testing " << data_idx << ": ";
// for (int k=0; k<NUM_INPUT; k++){
// cout << xt[data_idx][k];
// }
// cout << "->" << FormattedOutput(yt[data_idx])<< " Predicate: (" <<youtputpre[0]<<" "<<youtputpre[1]<< ")->" <<FormattedOutput(youtputpre) ;
// if (FormattedOutput(yt[data_idx]) !=FormattedOutput(youtputpre) )
// cout <<" (mismatch)" ;
// else
// cout <<" (match)" ;
// cout << endl;
if (FormattedOutput(yt[data_idx])==FormattedOutput(youtputpre)) num+=1;
}
cout <<" Done! ( Rate of Correction : " << 100.0 * (float)num / (float)NUM_DATA << "% )\n" << endl;
}
}; // end of class
//////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
int main( int argc, char * argv[] ){
for( int number_of_patterns_4_training=0;number_of_patterns_4_training<=200 ; number_of_patterns_4_training+=20)
{
NeuralNet *ass2net = new NeuralNet();
ass2net->CreateTrainingPatterns();
ass2net->CreateTestData();
ass2net->Train(number_of_patterns_4_training);
// ass2net->PrintWeights();
ass2net->Test();
}
}
/*-------------------------------------------------------------------------------------------
Development Notes:
This is deliberately designed to be close to the form (identifier setc) that Russell& Norvig use. See their perceptron pseudo-code,
Neural Networks section.
-------------------------------------------------------------------------------------------*/