#include<stdio.h>
#include<math.h>
#include<string.h>
void copy(char a[50],char b[100][10][50],int u,int w);
float toi(char M[100][10][50],int i,int k);
int stcmp(char M[100][10][50],int i,int k,char (*str)[20],int f);
int missing_handle(float d[100][50],float new_d[100][50],int w,int u,int missing_attribute[],int
missing_tuple[],int datatype[]);
float detrm(float[25][25],float [25][25],int);
void cofact(float[25][25],float [25][25],int);
void trans(float[25][25],float[25][25],float [25][25],int);
void inverse(float [25][25],float ver[25][25],int);
void multiply(float [25][25],float [25],float [25][25],int);
void normalization(float d[100][50],int w,int u,int datatype[50]);
float mode(float new_d[100][50],int w,int v);
void missing_check(float d[100][50],int w,int u,int datatype);
int main()
 {
  FILE *fp;
  int m,i=0,j=0,k=0,u=0,v=0,w=0,loc=0,f=0,norm;
  char ch,name[50],a[100],M[100][10][50],*NomiValue[50];
  int Nattributes,datatype[50],Nomnmber;
  char (*str)[20],r=0;
  float d[100][50],value;
  printf("Enter the total number of attrributes=");
  scanf("%d",&Nattributes);
  for(i=0;i<Nattributes;i++)
   {
     printf("Enter the datatype of %dth attribute",i+1);
     printf("ntNumerial=0 Nominal=1t=");
     scanf("%d",&datatype[i]);
     if(datatype[i]==0)
       {
        continue;
       }
     else
      {
        printf("nntEnter the total number of possible values of %dth attribute=",i+1);
        scanf("%d",&Nomnmber);
        printf("Enter all the possible attribute values ");
        str=(char (*)[20])malloc(15*sizeof(*str));
        NomiValue[r]=str;
r++;
        for(j=0;j<Nomnmber;j++)
         {
          printf("n%d Attribute value=",j+1);
          scanf("%s",str[j]);
         }
    }
  }
/*for(k=0;k<2;k++)
 {
    str=NomiValue[k];
       for(i=0;i<2;i++)
        {
          printf("%st",str[i]);
        }
printf("n");
 }*/
  i=0;
    //SEARCHING FOR TEXT FILE
     do
     {
               puts("ntEnter the path of the file(EXTENSION SHOULD BE .txt): ");
               gets(name);
                //OPENING FILE IN READ MODE
               fp=fopen(name,"r");
               if(fp=='0')
               puts("ntFile not found... Re-enter:");
               else
               puts("ntFile found");
     }while(fp=='0');

          //READING FROM FILE CHARACTER BY CHARACTER
  while(1)
  {
            //WHILE BEGINS
           ch=fgetc(fp);
     if(ch==EOF)
            {
              printf("world");
              break;
            }
   if(ch==32&&i==0)
{
     //printf("i m reachable");
     M[u][v][w]=EOF;
     v=0;
     u++;
     ch=fgetc(fp);
    }
   else if(ch==32||ch=='n')
    {
      a[i]='0';
      copy(a,M,u,w);
      if(ch=='n')
      {
        i=0;
        u=0;
        v=0;
        w++;
      }
      else{
          i=0;
          u++;
          v=0;
          continue;
         }
    }
   else{
      a[i]=ch;
      i++;
      }
 }//end of while

printf("ntotal no. of tuples=%d",w+1);
printf("ntotal no. of attributes=%dn",u);
//printf("hellon");
 for(k=0;k<=w;k++)
 {
     for(i=0;i<u;i++)
      {
      j=0;
      while(M[i][j][k]!='0')
      {
       printf("%c",M[i][j][k]);
j++;
     }
       printf("t");
     }
printf("n");
}
//printf("*******%c",M[0][0][0]);

//Conversion of 3D Array to integer array 2D
for(k=0;k<=w;k++)
{ j=0;
r=0;
loc=0;
printf("n");
for(i=0;i<Nattributes;i++)
  {
      j=0;
      loc=0;
  if(datatype[i]==0)
   {
     value=toi(M,i,k);
     d[k][i]=value;
     //printf("%f",value);
   }
 else
  {
      str=NomiValue[r];
      r++;
      for(f=0;f<15;f++)
       {
        // printf("n########%s###### %d#######n",str[f],stcmp(M,i,k,str,f));
        if(stcmp(M,i,k,str,f)==0)
          {
           //printf("t%d",f);
           d[k][i]=f;
           loc=1;
           j++;
           break;
          }
       }
       if(loc==0)
         {
d[k][i]=-1;
     }
 }
}
}
fclose(fp);
//printf("nnvatesh");
for(i=0;i<=w;i++)
{
  for(j=0;j<Nattributes;j++)
  printf("%ft",d[i][j]);
  printf("n");
}
missing_check(d,w,u,datatype);
printf("Do you want to normalise the data");
if(scanf("%d",&norm))
  {
   normalization(d,w,u,datatype);
  }
return 0;
}
float toi(char M[100][10][50],int i,int k)
  {
    int j=0,e=0;
    int count=0,countf=0;
    float sum=0;
    int setNum=0,c;
    while(M[i][j][k]!='0')
    {
       if(M[i][j][k]=='.')
         {
           setNum=1;
           break; //means numeric is float
         }
    j++;
    }
    j=0;
    if(setNum==0)
    {
      while(M[i][j][k]!='0')
      {
       count++;
j++;
 }
}
else
  {
   while(M[i][j][k]!='.')
    {
     countf++;
     j++;
    }
  }
   //printf("count=%d",count);
  j=0;
 while(M[i][j][k]!='0')
  {
      e=0;
     if(M[i][j][k]==EOF)
      return -1;

   if(setNum==1)
    {
   if(M[i][j][k]!='.'){
      c=M[i][j][k]-48;
      sum=sum+c*(pow(10,countf-1-j));
      j++;
               }
    else{
       e++;
       j++;
       while(M[i][j][k]!='0')
         {
            c=M[i][j][k]-48;
            sum=sum+c/(float)(pow(10,e));
            e++;
            j++;
         }
        break;

         }
    }
    else{
       if(M[i][j][k]!='.'){
c=M[i][j][k]-48;
          sum=sum+c*(pow(10,count-1-j));
          j++;
                }
         }
   }
   //printf("n%d",sum);
   return sum;
 }
void copy(char a[50],char b[100][10][50],int u,int w)
 {
    //printf("copy");
    int j=0;
  while(a[j]!='0')
    {
   // printf("n%c",a[j]);
      b[u][j][w]=a[j];
      j++;
    }
   b[u][j][w]='0';
 }

int stcmp(char M[100][10][50],int i,int k,char (*str)[20],int f)
  {
     if(M[i][0][k]==str[f][0])
      {
         return 0;
      }
      else
       return 1;
  }
void missing_check(float d[100][50],int w,int u,int datatype)
 {
    int missing_attribute[50],missing_tuple[100],i,j,handle,loc=0;
    float new_d[100][50];
    int m=0,n=0;
//finding missing attributes
printf("nNumber of attributes=%dn",u);
printf("Number of tuples=%dn",w+1);               //note total no. of tuples in data is w+1
    for(j=0;j<u;j++)
     {
      loc=0;
for(i=0;i<=w;i++)
        {
         if(d[i][j]==-1)
          {
             loc=1;
             break;
          }
        }
          if(loc==1)
                 {
                  missing_attribute[j]=1;
                  //printf("****%d",missing_attribute[j]);
                  }
          else{
              missing_attribute[j]=0;
             }
   }

//finding missing tuples
for(i=0;i<=w;i++)
    {
       loc=0;
   for(j=0;j<u;j++)
     {
        if(d[i][j]==-1)
         {
            loc=1;
            break;
         }
     }
         if(loc==1){
                 missing_tuple[i]=1;
                }
         else
           {
              missing_tuple[i]=0;
           }
    }
 loc=0;
 for(i=0;i<u;i++)
 {
   if(missing_attribute[i]==1)
{
       printf("%d attribute contains missing datann ",i+1);
       loc=1;      //it is correct there is no need to change the value of loc=0
   }
 }
 if(loc==1)
 {
  printf("Want to ignore missing values or handle using imputation methodn");
  printf("Enter 0 for ignoring and 1 for handling=");
  scanf("%d",&handle);
 }
      for(i=0;i<=w;i++)
       {
          n=0;
         if(missing_tuple[i]!=1)
           {
             for(j=0;j<u;j++)
              {
               new_d[m][n]=d[i][j];
               n++;
              }
              m++;
           }
      }
//printf("*******m=%d*********n=%d*********",m,n);
if(handle==1)
  {
      missing_handle(d,new_d,w,u,missing_attribute,missing_tuple,datatype);
  }
 else
  {
    for(i=0;i<m;i++)
     {
      for(j=0;j<n;j++)
       {
         printf("%ft",new_d[i][j]);
       }
      printf("n");
     }
  }
 }
int missing_handle(float d[100][50],float new_d[100][50],int w,int u,int missing_attribute[],int
missing_tuple[],int datatype[]) //yth attribute contain missing data
 {
   int i=0,j=0,add=0,m=0,n=0,k=0,c=0,loc=0,v,f,g,z=0;
   float sum,N[100][50],A[100][50],variable; //program can handle maximum 50 attributes
   float (*p)[50];
   int dependent[50],independent[50];
   int count=0,r,s,q;
   float (*array1)[25],*array2,ver[25][25],mul[25][25];

   //calculation of no. of tuples after ignoring missing tuples
   for(i=0;i<=w;i++)
      {
          if(missing_tuple[i]!=1)
          count++;
      }
   printf("nno. of tuples after ignoring missing tuples=%dn",count);
   for(i=0;i<u;i++)
    {
        if(missing_attribute[i]==1){
        dependent[m]=i;
         m++;
        }
        else{
        independent[n]=i;
        n++;
        }
    }
   /* for(i=0;i<n;i++)
      printf("%dt",independent[i]);
    printf("n");*/
/* for(i=0;i<n;i++)
      printf("%dt",independent[i]);
    printf("n");*/
   //formation of matrix with all independent variables and only one dependent variable
  printf("nm=%d n=%d",m,n);
  //for(z=0;z<m;z++) //will work depend on number of dependent variable
    // {
      v=dependent[z];
      printf("nv=%d",v);
      p=(float (*)[50])malloc(100*sizeof(*p));
     printf("vatesh developer");
for(k=0;k<count;k++)
      {
            c=0;
            for(j=0;j<n;j++)
             {
                 f=independent[j];
                 //printf("#####%d",f);
                 p[k][c]=new_d[k][f];
                 //printf("%ft",p[k][c]);
                 c++;
             }
           p[k][c]=new_d[k][v];
           //printf("%ft",p[k][c]);
           c++;
           //printf("n");
      }
   printf("ntuples after removaln");
   for(r=0;r<count;r++) //no. of tuple without any missing value
     {
     for(s=0;s<=n;s++)
       {
        printf("%ft",p[r][s]);     //p[][] contains independent variable and one dependent variable
       }
       printf("n");
     }

//concept to be checked 2mrw
    for(g=0;g<=n;g++)
     {
     for(s=g;s<=n;s++)
       {
           sum=0;
         for(i=0;i<count;i++)
          {
           sum=sum+(p[i][g])*(p[i][s]);
          }
          printf("%f ",sum);
          A[g+1][s+1]=sum;
       }
     }
     N[0][0]=count;
for(j=0;j<=n;j++)
{
      sum=0;
    for(k=0;k<count;k++)
     {
      sum=sum+p[k][j];
     }
     N[0][j+1]=sum;
}

for(i=1;i<=n+1;i++)
 {
   for(j=i;j<=n+1;j++)
    {
       N[i][j]=A[i][j];
    }
 }
 printf("n");
/*for(i=0;i<=n+1;i++)
 {
   for(j=0;j<=n+1;j++)
    {
     if(i<=j)
     //printf("%ft",N[i][j]);
     else
     //printf("0.000000t");
    }
    printf("n");
 }*/

//taking transpose of matrix of N
for(i=0;i<=n+1;i++)
 {
   for(j=i;j<=n+1;j++)
    {
     N[j][i]=N[i][j];
    }
    printf("n");
 }

for(i=0;i<=n+1;i++)
 {
   for(j=0;j<=n+1;j++)
{
    printf("%ft",N[i][j]);
   }
   printf("n");
 }
//Array is the final array1,take its inverse and multiply by array2 to get final coefficients of equations.
array1=(float (*)[25])malloc(sizeof(*array1)*n);
array2=(float *)malloc(sizeof(float)*n);
for(i=0;i<=n;i++)
 {
   for(j=0;j<=n;j++)
    {
      array1[i][j]=N[i][j];
    }
 }
printf("nArray1n");
for(i=0;i<=n;i++)
 {
   for(j=0;j<=n;j++)
    {
     printf("%ft",array1[i][j]);
    }
     printf("n");
 }

printf("nArray2n");
for(i=n+1;i<=n+1;i++) //will work only once
 {
   for(j=0;j<n+1;j++)
    {
     array2[j]=N[i][j];
     printf("%ftn",array2[j]);
    }
     printf("n");
 }
    inverse(array1,ver,n+1);

printf("**********************************n");
for(i=0;i<=n;i++)
 {
   for(j=0;j<=n;j++)
    {
printf("%ft",ver[i][j]);
    }
     printf("n");
}

    multiply(ver,array2,mul,n+1);      //note:-order of matrix is n+1



    for(i=0;i<n+1;i++)
     {
       for(j=0;j<1;j++)         //will work only once hence can be eliminated
        {
         printf("%ft",mul[i][j]);
        }
        printf("n");
     }

    for(i=0;i<w+1;i++)
     {
       sum=0;
       if(d[i][v]==-1)
          {
             for(j=0;j<n;j++)
               {
                 q=independent[j];
                 //printf("q=%")
                 sum=sum+d[i][q]*mul[j+1][0];
               }
               sum=sum+mul[0][0];
             if(datatype[v]!=1)
             {
               d[i][v]=sum;
             printf("nd[%d][%d]=%f",i,v,sum);
             }
             else
             {
              variable=mode(new_d,w,v);
              d[i][v]=variable;
              printf("nd[%d][%d]=%f",i,v,d[i][v]);
             }

         }
}
    for(i=0;i<25;i++)
      {
         free((void *)array1[i]);
      }

   free(array1);

   free(array2);

  for(i=0;i<100;i++)
   {free(p[i]);}

   free(p);

missing_check(d,w,u,datatype);

  //}



  }
/* Inverse of a n by n matrix */
void inverse(float a[25][25],float inv[25][25],int k)
{
float d;
int i,j;
/*printf("in inversen");
for(i=0;i<k;i++)
 {
  for(j=0;j<k;j++)
    {
     printf("%ft",a[i][j]);
    }
    printf("n");
 }
 printf("k=%d",k);*/
d=detrm(a,inv,k);
printf("THE DETERMINANT IS=%f",d);
if(d==0)
printf("nMATRIX IS NOT INVERSIBLEn");
else
cofact(a,inv,k);
}
/******************FUNCTION TO FIND THE DETERMINANT OF THE
MATRIX************************/



float detrm(float a[25][25],float inv[25][25],int k)
{
 float s=1,det=0,b[25][25];
int i,j,m,n,c;
/*
printf("nnIn determinantn");
for(i=0;i<k;i++)
 {
  for(j=0;j<k;j++)
   {
    printf("%ft",a[i][j]);
   }
   printf("n");
 }*/
if(k==1)
{
return(a[0][0]);
}
else
{
det=0;
for(c=0;c<k;c++)
{
m=0;
n=0;
for(i=0;i<k;i++)
{
for(j=0;j<k;j++)
{
b[i][j]=0;
if(i!=0&&j!=c)
{
b[m][n]=a[i][j];
if(n<(k-2))
n++;
else
{
n=0;
m++;
}
}
}
}
det=det+s*(a[0][c]*detrm(b,inv,k-1));
s=-1*s;
}
}
return(det);
}




/*******************FUNCTION TO FIND COFACTOR*********************************/



void cofact(float num[25][25],float inv[25][25],int f)
{
float b[25][25],fac[25][25];
int p,q,m,n,i,j;
for(q=0;q<f;q++)
{
for(p=0;p<f;p++)
{
m=0;
n=0;
for(i=0;i<f;i++)
{
for(j=0;j<f;j++)
{
b[i][j]=0;
if(i!=q&&j!=p)
{
b[m][n]=num[i][j];
if(n<(f-2))
n++;
else
{
n=0;
m++;
}
}
}
}
fac[q][p]=pow(-1,q+p)*detrm(b,inv,f-1);
}
}
trans(num,fac,inv,f);
}



/*************FUNCTION TO FIND TRANSPOSE AND INVERSE OF A
MATRIX**************************/




void trans(float num[25][25],float fac[25][25],float inv[25][25],int r)
{
int i,j;
float b[25][25],d;
printf("In Inverse");
for(i=0;i<r;i++)
{
for(j=0;j<r;j++)
{
b[i][j]=fac[j][i];
}
}

d=detrm(num,inv,r);
printf("n**************j =%dn",j);
inv[i][j]=0;
for(i=0;i<r;i++)
{
for(j=0;j<r;j++)
{
inv[i][j]=b[i][j]/d;
}
}

printf("nTHE INVERSE OF THE MATRIX:n");
for(i=0;i<r;i++)
{
for(j=0;j<r;j++)
{
printf("t%f",inv[i][j]);
}
printf("n");
}
}
void multiply(float ver[25][25],float array2[25],float mul[25][25],int k)
 {
   int i,j,m=0;
   float sum=0;
   for(i=0;i<k;i++)
    {
      for(j=0;j<1;j++)       //will work only once hence can be eliminated
       {
           for(m=0;m<k;m++)
            {
              sum=sum+ver[i][m]*array2[m];
            }
          mul[i][j]=sum;
       }
    }
 }

normalization(float d[100][50],int w,int u,int datatype[50])
 {
float min,maxA,minA;
int i,j,norm=0,loc=0;
 for(j=0;j<u;j++)
  {
    norm=0;
  // loc=0;
/* if(datatype[j]==0);
      {
       for(i=0;i<=w;i++)
        {
          if(d[i][j]<0)
            {loc=1;
            break;
            }
}
       }
     if(loc==1)
      {
         min=d[0][j];
         for(i=0;i<=w;i++)
          {
            if(d[i][j]<min)
             {
                min=d[i][j];
             }
          }
         for(i=0;i<=w;i++)
           {
               d[i][j]=d[i][j]-min;
           }
      }
*/
     for(i=0;i<=w;i++)
      {
         if(d[i][j]>1||d[i][j]<0)
         {
            norm=1;
            break;
         }
      }
      if(norm==1)
       {
        minA=d[0][j];
          for(i=0;i<=w;i++)
           {
             if(d[i][j]<minA)
              {
                 minA=d[i][j];
              }
           }
         maxA=d[0][j];
          for(i=0;i<=w;i++)
           {
             if(d[i][j]>maxA)
              {
                 maxA=d[i][j];
}
         }

         for(i=0;i<=w;i++)
          {
            d[i][j]=(d[i][j]-minA)/(maxA-minA);

         }

     }
 }
 for(i=0;i<=w;i++)
{
for(j=0;j<u;j++)
printf("%ft",d[i][j]);
printf("n");
}

 }

 float mode(float new_d[100][50],int w,int v)
  {
    float my[100],occurence[100],max;
    int e=0,i,j,number,ret;
           for(i=0;i<=w;i++)
             {
              for(j=0;j<e;j++)
                {
                  if(my[j]==new_d[i][v])
                   {
                      continue;
                   }
                     my[e]=new_d[i][v];
                     e++;
                }
             }
             for(i=0;i<e;i++)
             {
              for(j=0;j<=w;j++)
                {
                  if(my[i]==new_d[j][v])
                   {
number++;
          }
        }
        occurence[i]=number;
    }

    //now calculate max occurence

          max=occurence[0];
          ret=0;
          for(j=0;j<e;j++)
           {
              if(max<occurence[j])
               {
                 max=occurence[j];
                 ret=j;
               }

        }
    return new_d[ret][v];
}

More Related Content

DOC
Ngon ngu lap trinh
PDF
Yohan jacobi gaussseidel_analisis
PDF
Ejercicios resueltos Practica 4 informatica II
DOC
Cg lab cse-vii
PDF
Programs
DOCX
Programs
PDF
Javascript i wydajność - czy to się spina?
Ngon ngu lap trinh
Yohan jacobi gaussseidel_analisis
Ejercicios resueltos Practica 4 informatica II
Cg lab cse-vii
Programs
Programs
Javascript i wydajność - czy to się spina?

What's hot (20)

DOCX
KEY
Sbaw090630
PDF
Programación funcional en Haskell
DOCX
Sequential
TXT
C Program : Sorting : Bubble,
DOCX
Info clasa
PDF
Lab2 sdmp
PDF
An introduction to functional programming with Go [redux]
PDF
Bai tap tham khao CSPE
DOCX
Prueba de montecarlo
DOCX
PDF
openFrameworks 動きを生みだす様々なアルゴリズム - 多摩美メディアアートII
DOCX
c ++ informe Nº5 ucsm
DOCX
Contoh uas soal praktek
TXT
Ugd9 c 7644
DOCX
Daniel snake
PDF
Chuong10
DOCX
Simulacion - Algoritmo congruencial cuadratico
Sbaw090630
Programación funcional en Haskell
Sequential
C Program : Sorting : Bubble,
Info clasa
Lab2 sdmp
An introduction to functional programming with Go [redux]
Bai tap tham khao CSPE
Prueba de montecarlo
openFrameworks 動きを生みだす様々なアルゴリズム - 多摩美メディアアートII
c ++ informe Nº5 ucsm
Contoh uas soal praktek
Ugd9 c 7644
Daniel snake
Chuong10
Simulacion - Algoritmo congruencial cuadratico
Ad

Viewers also liked (20)

ODS
Lucaci bianca 2010-11_esercizio4
PDF
125 cau hoi_tra_loi_khi_phong_van_xin_viec_bang_tieng_anh-2
PPTX
Strategeek Intro Deck v2
PDF
Nguyễn Nho Vĩnh-Facebookmarketting
PDF
Quine Relativita Ontologica
PPTX
Bengkel dskp
PDF
Thuật ngữ xây dựng cầu việt - pháp - anh
PDF
Nguyễn Nho Vĩnh-Sổ Tay Tiếng Anh Kỹ Thuật
PDF
Nguyễn Nho Vĩnh-Em phai den_havard
PPTX
Leadership & emotional intelligence
PDF
Tiếng anh trong xây dựng và kiến trúc-Nguyễn Nho Vĩnh
DOC
Thuật ngữ dự thầu
PDF
Pride and prejudice
PDF
PDF
Tiếng anh trong kiến trúc và xây dựng-Nguyễn Nho Vĩnh
DOC
Kem kepimpinan paperwork
PDF
Nguyễn Nho Vĩnh-Tiếng anh trong xây dựng
PPTX
Environmental quality act 1974
PDF
Criterios examen 30 mayo
Lucaci bianca 2010-11_esercizio4
125 cau hoi_tra_loi_khi_phong_van_xin_viec_bang_tieng_anh-2
Strategeek Intro Deck v2
Nguyễn Nho Vĩnh-Facebookmarketting
Quine Relativita Ontologica
Bengkel dskp
Thuật ngữ xây dựng cầu việt - pháp - anh
Nguyễn Nho Vĩnh-Sổ Tay Tiếng Anh Kỹ Thuật
Nguyễn Nho Vĩnh-Em phai den_havard
Leadership & emotional intelligence
Tiếng anh trong xây dựng và kiến trúc-Nguyễn Nho Vĩnh
Thuật ngữ dự thầu
Pride and prejudice
Tiếng anh trong kiến trúc và xây dựng-Nguyễn Nho Vĩnh
Kem kepimpinan paperwork
Nguyễn Nho Vĩnh-Tiếng anh trong xây dựng
Environmental quality act 1974
Criterios examen 30 mayo
Ad

Vatesh

  • 1. #include<stdio.h> #include<math.h> #include<string.h> void copy(char a[50],char b[100][10][50],int u,int w); float toi(char M[100][10][50],int i,int k); int stcmp(char M[100][10][50],int i,int k,char (*str)[20],int f); int missing_handle(float d[100][50],float new_d[100][50],int w,int u,int missing_attribute[],int missing_tuple[],int datatype[]); float detrm(float[25][25],float [25][25],int); void cofact(float[25][25],float [25][25],int); void trans(float[25][25],float[25][25],float [25][25],int); void inverse(float [25][25],float ver[25][25],int); void multiply(float [25][25],float [25],float [25][25],int); void normalization(float d[100][50],int w,int u,int datatype[50]); float mode(float new_d[100][50],int w,int v); void missing_check(float d[100][50],int w,int u,int datatype); int main() { FILE *fp; int m,i=0,j=0,k=0,u=0,v=0,w=0,loc=0,f=0,norm; char ch,name[50],a[100],M[100][10][50],*NomiValue[50]; int Nattributes,datatype[50],Nomnmber; char (*str)[20],r=0; float d[100][50],value; printf("Enter the total number of attrributes="); scanf("%d",&Nattributes); for(i=0;i<Nattributes;i++) { printf("Enter the datatype of %dth attribute",i+1); printf("ntNumerial=0 Nominal=1t="); scanf("%d",&datatype[i]); if(datatype[i]==0) { continue; } else { printf("nntEnter the total number of possible values of %dth attribute=",i+1); scanf("%d",&Nomnmber); printf("Enter all the possible attribute values "); str=(char (*)[20])malloc(15*sizeof(*str)); NomiValue[r]=str;
  • 2. r++; for(j=0;j<Nomnmber;j++) { printf("n%d Attribute value=",j+1); scanf("%s",str[j]); } } } /*for(k=0;k<2;k++) { str=NomiValue[k]; for(i=0;i<2;i++) { printf("%st",str[i]); } printf("n"); }*/ i=0; //SEARCHING FOR TEXT FILE do { puts("ntEnter the path of the file(EXTENSION SHOULD BE .txt): "); gets(name); //OPENING FILE IN READ MODE fp=fopen(name,"r"); if(fp=='0') puts("ntFile not found... Re-enter:"); else puts("ntFile found"); }while(fp=='0'); //READING FROM FILE CHARACTER BY CHARACTER while(1) { //WHILE BEGINS ch=fgetc(fp); if(ch==EOF) { printf("world"); break; } if(ch==32&&i==0)
  • 3. { //printf("i m reachable"); M[u][v][w]=EOF; v=0; u++; ch=fgetc(fp); } else if(ch==32||ch=='n') { a[i]='0'; copy(a,M,u,w); if(ch=='n') { i=0; u=0; v=0; w++; } else{ i=0; u++; v=0; continue; } } else{ a[i]=ch; i++; } }//end of while printf("ntotal no. of tuples=%d",w+1); printf("ntotal no. of attributes=%dn",u); //printf("hellon"); for(k=0;k<=w;k++) { for(i=0;i<u;i++) { j=0; while(M[i][j][k]!='0') { printf("%c",M[i][j][k]);
  • 4. j++; } printf("t"); } printf("n"); } //printf("*******%c",M[0][0][0]); //Conversion of 3D Array to integer array 2D for(k=0;k<=w;k++) { j=0; r=0; loc=0; printf("n"); for(i=0;i<Nattributes;i++) { j=0; loc=0; if(datatype[i]==0) { value=toi(M,i,k); d[k][i]=value; //printf("%f",value); } else { str=NomiValue[r]; r++; for(f=0;f<15;f++) { // printf("n########%s###### %d#######n",str[f],stcmp(M,i,k,str,f)); if(stcmp(M,i,k,str,f)==0) { //printf("t%d",f); d[k][i]=f; loc=1; j++; break; } } if(loc==0) {
  • 5. d[k][i]=-1; } } } } fclose(fp); //printf("nnvatesh"); for(i=0;i<=w;i++) { for(j=0;j<Nattributes;j++) printf("%ft",d[i][j]); printf("n"); } missing_check(d,w,u,datatype); printf("Do you want to normalise the data"); if(scanf("%d",&norm)) { normalization(d,w,u,datatype); } return 0; } float toi(char M[100][10][50],int i,int k) { int j=0,e=0; int count=0,countf=0; float sum=0; int setNum=0,c; while(M[i][j][k]!='0') { if(M[i][j][k]=='.') { setNum=1; break; //means numeric is float } j++; } j=0; if(setNum==0) { while(M[i][j][k]!='0') { count++;
  • 6. j++; } } else { while(M[i][j][k]!='.') { countf++; j++; } } //printf("count=%d",count); j=0; while(M[i][j][k]!='0') { e=0; if(M[i][j][k]==EOF) return -1; if(setNum==1) { if(M[i][j][k]!='.'){ c=M[i][j][k]-48; sum=sum+c*(pow(10,countf-1-j)); j++; } else{ e++; j++; while(M[i][j][k]!='0') { c=M[i][j][k]-48; sum=sum+c/(float)(pow(10,e)); e++; j++; } break; } } else{ if(M[i][j][k]!='.'){
  • 7. c=M[i][j][k]-48; sum=sum+c*(pow(10,count-1-j)); j++; } } } //printf("n%d",sum); return sum; } void copy(char a[50],char b[100][10][50],int u,int w) { //printf("copy"); int j=0; while(a[j]!='0') { // printf("n%c",a[j]); b[u][j][w]=a[j]; j++; } b[u][j][w]='0'; } int stcmp(char M[100][10][50],int i,int k,char (*str)[20],int f) { if(M[i][0][k]==str[f][0]) { return 0; } else return 1; } void missing_check(float d[100][50],int w,int u,int datatype) { int missing_attribute[50],missing_tuple[100],i,j,handle,loc=0; float new_d[100][50]; int m=0,n=0; //finding missing attributes printf("nNumber of attributes=%dn",u); printf("Number of tuples=%dn",w+1); //note total no. of tuples in data is w+1 for(j=0;j<u;j++) { loc=0;
  • 8. for(i=0;i<=w;i++) { if(d[i][j]==-1) { loc=1; break; } } if(loc==1) { missing_attribute[j]=1; //printf("****%d",missing_attribute[j]); } else{ missing_attribute[j]=0; } } //finding missing tuples for(i=0;i<=w;i++) { loc=0; for(j=0;j<u;j++) { if(d[i][j]==-1) { loc=1; break; } } if(loc==1){ missing_tuple[i]=1; } else { missing_tuple[i]=0; } } loc=0; for(i=0;i<u;i++) { if(missing_attribute[i]==1)
  • 9. { printf("%d attribute contains missing datann ",i+1); loc=1; //it is correct there is no need to change the value of loc=0 } } if(loc==1) { printf("Want to ignore missing values or handle using imputation methodn"); printf("Enter 0 for ignoring and 1 for handling="); scanf("%d",&handle); } for(i=0;i<=w;i++) { n=0; if(missing_tuple[i]!=1) { for(j=0;j<u;j++) { new_d[m][n]=d[i][j]; n++; } m++; } } //printf("*******m=%d*********n=%d*********",m,n); if(handle==1) { missing_handle(d,new_d,w,u,missing_attribute,missing_tuple,datatype); } else { for(i=0;i<m;i++) { for(j=0;j<n;j++) { printf("%ft",new_d[i][j]); } printf("n"); } } }
  • 10. int missing_handle(float d[100][50],float new_d[100][50],int w,int u,int missing_attribute[],int missing_tuple[],int datatype[]) //yth attribute contain missing data { int i=0,j=0,add=0,m=0,n=0,k=0,c=0,loc=0,v,f,g,z=0; float sum,N[100][50],A[100][50],variable; //program can handle maximum 50 attributes float (*p)[50]; int dependent[50],independent[50]; int count=0,r,s,q; float (*array1)[25],*array2,ver[25][25],mul[25][25]; //calculation of no. of tuples after ignoring missing tuples for(i=0;i<=w;i++) { if(missing_tuple[i]!=1) count++; } printf("nno. of tuples after ignoring missing tuples=%dn",count); for(i=0;i<u;i++) { if(missing_attribute[i]==1){ dependent[m]=i; m++; } else{ independent[n]=i; n++; } } /* for(i=0;i<n;i++) printf("%dt",independent[i]); printf("n");*/ /* for(i=0;i<n;i++) printf("%dt",independent[i]); printf("n");*/ //formation of matrix with all independent variables and only one dependent variable printf("nm=%d n=%d",m,n); //for(z=0;z<m;z++) //will work depend on number of dependent variable // { v=dependent[z]; printf("nv=%d",v); p=(float (*)[50])malloc(100*sizeof(*p)); printf("vatesh developer");
  • 11. for(k=0;k<count;k++) { c=0; for(j=0;j<n;j++) { f=independent[j]; //printf("#####%d",f); p[k][c]=new_d[k][f]; //printf("%ft",p[k][c]); c++; } p[k][c]=new_d[k][v]; //printf("%ft",p[k][c]); c++; //printf("n"); } printf("ntuples after removaln"); for(r=0;r<count;r++) //no. of tuple without any missing value { for(s=0;s<=n;s++) { printf("%ft",p[r][s]); //p[][] contains independent variable and one dependent variable } printf("n"); } //concept to be checked 2mrw for(g=0;g<=n;g++) { for(s=g;s<=n;s++) { sum=0; for(i=0;i<count;i++) { sum=sum+(p[i][g])*(p[i][s]); } printf("%f ",sum); A[g+1][s+1]=sum; } } N[0][0]=count; for(j=0;j<=n;j++)
  • 12. { sum=0; for(k=0;k<count;k++) { sum=sum+p[k][j]; } N[0][j+1]=sum; } for(i=1;i<=n+1;i++) { for(j=i;j<=n+1;j++) { N[i][j]=A[i][j]; } } printf("n"); /*for(i=0;i<=n+1;i++) { for(j=0;j<=n+1;j++) { if(i<=j) //printf("%ft",N[i][j]); else //printf("0.000000t"); } printf("n"); }*/ //taking transpose of matrix of N for(i=0;i<=n+1;i++) { for(j=i;j<=n+1;j++) { N[j][i]=N[i][j]; } printf("n"); } for(i=0;i<=n+1;i++) { for(j=0;j<=n+1;j++)
  • 13. { printf("%ft",N[i][j]); } printf("n"); } //Array is the final array1,take its inverse and multiply by array2 to get final coefficients of equations. array1=(float (*)[25])malloc(sizeof(*array1)*n); array2=(float *)malloc(sizeof(float)*n); for(i=0;i<=n;i++) { for(j=0;j<=n;j++) { array1[i][j]=N[i][j]; } } printf("nArray1n"); for(i=0;i<=n;i++) { for(j=0;j<=n;j++) { printf("%ft",array1[i][j]); } printf("n"); } printf("nArray2n"); for(i=n+1;i<=n+1;i++) //will work only once { for(j=0;j<n+1;j++) { array2[j]=N[i][j]; printf("%ftn",array2[j]); } printf("n"); } inverse(array1,ver,n+1); printf("**********************************n"); for(i=0;i<=n;i++) { for(j=0;j<=n;j++) {
  • 14. printf("%ft",ver[i][j]); } printf("n"); } multiply(ver,array2,mul,n+1); //note:-order of matrix is n+1 for(i=0;i<n+1;i++) { for(j=0;j<1;j++) //will work only once hence can be eliminated { printf("%ft",mul[i][j]); } printf("n"); } for(i=0;i<w+1;i++) { sum=0; if(d[i][v]==-1) { for(j=0;j<n;j++) { q=independent[j]; //printf("q=%") sum=sum+d[i][q]*mul[j+1][0]; } sum=sum+mul[0][0]; if(datatype[v]!=1) { d[i][v]=sum; printf("nd[%d][%d]=%f",i,v,sum); } else { variable=mode(new_d,w,v); d[i][v]=variable; printf("nd[%d][%d]=%f",i,v,d[i][v]); } }
  • 15. } for(i=0;i<25;i++) { free((void *)array1[i]); } free(array1); free(array2); for(i=0;i<100;i++) {free(p[i]);} free(p); missing_check(d,w,u,datatype); //} } /* Inverse of a n by n matrix */ void inverse(float a[25][25],float inv[25][25],int k) { float d; int i,j; /*printf("in inversen"); for(i=0;i<k;i++) { for(j=0;j<k;j++) { printf("%ft",a[i][j]); } printf("n"); } printf("k=%d",k);*/ d=detrm(a,inv,k); printf("THE DETERMINANT IS=%f",d); if(d==0) printf("nMATRIX IS NOT INVERSIBLEn"); else
  • 16. cofact(a,inv,k); } /******************FUNCTION TO FIND THE DETERMINANT OF THE MATRIX************************/ float detrm(float a[25][25],float inv[25][25],int k) { float s=1,det=0,b[25][25]; int i,j,m,n,c; /* printf("nnIn determinantn"); for(i=0;i<k;i++) { for(j=0;j<k;j++) { printf("%ft",a[i][j]); } printf("n"); }*/ if(k==1) { return(a[0][0]); } else { det=0; for(c=0;c<k;c++) { m=0; n=0; for(i=0;i<k;i++) { for(j=0;j<k;j++) { b[i][j]=0; if(i!=0&&j!=c) { b[m][n]=a[i][j]; if(n<(k-2)) n++; else
  • 17. { n=0; m++; } } } } det=det+s*(a[0][c]*detrm(b,inv,k-1)); s=-1*s; } } return(det); } /*******************FUNCTION TO FIND COFACTOR*********************************/ void cofact(float num[25][25],float inv[25][25],int f) { float b[25][25],fac[25][25]; int p,q,m,n,i,j; for(q=0;q<f;q++) { for(p=0;p<f;p++) { m=0; n=0; for(i=0;i<f;i++) { for(j=0;j<f;j++) { b[i][j]=0; if(i!=q&&j!=p) { b[m][n]=num[i][j]; if(n<(f-2)) n++; else { n=0;
  • 18. m++; } } } } fac[q][p]=pow(-1,q+p)*detrm(b,inv,f-1); } } trans(num,fac,inv,f); } /*************FUNCTION TO FIND TRANSPOSE AND INVERSE OF A MATRIX**************************/ void trans(float num[25][25],float fac[25][25],float inv[25][25],int r) { int i,j; float b[25][25],d; printf("In Inverse"); for(i=0;i<r;i++) { for(j=0;j<r;j++) { b[i][j]=fac[j][i]; } } d=detrm(num,inv,r); printf("n**************j =%dn",j); inv[i][j]=0; for(i=0;i<r;i++) { for(j=0;j<r;j++) { inv[i][j]=b[i][j]/d; } } printf("nTHE INVERSE OF THE MATRIX:n");
  • 19. for(i=0;i<r;i++) { for(j=0;j<r;j++) { printf("t%f",inv[i][j]); } printf("n"); } } void multiply(float ver[25][25],float array2[25],float mul[25][25],int k) { int i,j,m=0; float sum=0; for(i=0;i<k;i++) { for(j=0;j<1;j++) //will work only once hence can be eliminated { for(m=0;m<k;m++) { sum=sum+ver[i][m]*array2[m]; } mul[i][j]=sum; } } } normalization(float d[100][50],int w,int u,int datatype[50]) { float min,maxA,minA; int i,j,norm=0,loc=0; for(j=0;j<u;j++) { norm=0; // loc=0; /* if(datatype[j]==0); { for(i=0;i<=w;i++) { if(d[i][j]<0) {loc=1; break; }
  • 20. } } if(loc==1) { min=d[0][j]; for(i=0;i<=w;i++) { if(d[i][j]<min) { min=d[i][j]; } } for(i=0;i<=w;i++) { d[i][j]=d[i][j]-min; } } */ for(i=0;i<=w;i++) { if(d[i][j]>1||d[i][j]<0) { norm=1; break; } } if(norm==1) { minA=d[0][j]; for(i=0;i<=w;i++) { if(d[i][j]<minA) { minA=d[i][j]; } } maxA=d[0][j]; for(i=0;i<=w;i++) { if(d[i][j]>maxA) { maxA=d[i][j];
  • 21. } } for(i=0;i<=w;i++) { d[i][j]=(d[i][j]-minA)/(maxA-minA); } } } for(i=0;i<=w;i++) { for(j=0;j<u;j++) printf("%ft",d[i][j]); printf("n"); } } float mode(float new_d[100][50],int w,int v) { float my[100],occurence[100],max; int e=0,i,j,number,ret; for(i=0;i<=w;i++) { for(j=0;j<e;j++) { if(my[j]==new_d[i][v]) { continue; } my[e]=new_d[i][v]; e++; } } for(i=0;i<e;i++) { for(j=0;j<=w;j++) { if(my[i]==new_d[j][v]) {
  • 22. number++; } } occurence[i]=number; } //now calculate max occurence max=occurence[0]; ret=0; for(j=0;j<e;j++) { if(max<occurence[j]) { max=occurence[j]; ret=j; } } return new_d[ret][v]; }