I THINK ∴ I'M DANGEROUS

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
n-depth_mean_compare [2016/03/11 00:27]
zashi
n-depth_mean_compare [2017/05/12 13:00]
zashi
Line 25: Line 25:
 To account for differences in length, the length of each set is appended to their respective sets. To account for differences in length, the length of each set is appended to their respective sets.
  
-==== Rolling Average NMC ==== +There is an additional operation--it's not just the average of the differences. The final result is the square root of the average of the square of the differences. This is similar to how standard deviation is calculated and I cannot offer any better rationale than I find using this calculation produces better results in my testing.
- +
-By doing a rolling average NMC, NMC_n = ( NMC(n) + NMC_(n-1) )/ 2, some occasional poor comparisons can be avoided. +
  
  
 ===== Proof of Concept Implementation ===== ===== Proof of Concept Implementation =====
- 
-Use the function nmc_ra (n-Depth Mean Compare Rolling Average) for best results. 
  
 nmc.c can be compiled to a shared library with gcc -O2 -shared -fPIC nmc.c -o libnmc.so -lm nmc.c can be compiled to a shared library with gcc -O2 -shared -fPIC nmc.c -o libnmc.so -lm
  
 <file c nmc.h> <file c nmc.h>
-#include <​stdio.h>​ 
-#include <​errno.h>​ 
-#include <​stdlib.h>​ 
-#include <​math.h>​ 
- 
 double nmc_mean (double *l, unsigned int len); double nmc_mean (double *l, unsigned int len);
 double nmc_mean_c (unsigned char *l, unsigned int len); double nmc_mean_c (unsigned char *l, unsigned int len);
 double nmc (unsigned int n, unsigned char *s1, unsigned int s1_len, unsigned char *s2, unsigned int s2_len); double nmc (unsigned int n, unsigned char *s1, unsigned int s1_len, unsigned char *s2, unsigned int s2_len);
-double nmc_ra (unsigned char *s1, unsigned int s1_len, unsigned char *s2, unsigned int s2_len); 
- 
 </​file>​ </​file>​
  
 <file c nmc.c> <file c nmc.c>
 +#include <​string.h>​
 +#include <​stdio.h>​
 +#include <​errno.h>​
 +#include <​stdlib.h>​
 +#include <​math.h>​
 +
 #include "​nmc.h"​ #include "​nmc.h"​
  
Line 56: Line 50:
 { {
     /* given an array of doubles (l) of length len, cacluate their mean */     /* given an array of doubles (l) of length len, cacluate their mean */
-    ​+
     double avg = 0;     double avg = 0;
     unsigned int i=len;     unsigned int i=len;
-    ​+    ​if (len == 1) return (double) *l;
     while (i--) avg += *(l++);     while (i--) avg += *(l++);
-    ​ 
     return avg / len;     return avg / len;
 } }
Line 69: Line 62:
     double avg = 0;     double avg = 0;
     unsigned int i = len;     unsigned int i = len;
-    ​+    ​if (len == 1) return (double) *l;
     while (i--) avg += (double) *(l++);     while (i--) avg += (double) *(l++);
-    ​ 
     return avg / len;     return avg / len;
 } }
Line 81: Line 73:
     unsigned int i;     unsigned int i;
     double *diffs, mean;     double *diffs, mean;
-     +
-    /* n must be less than or equal to both s1 and s2 */+
     if ( n > s1_len || n > s2_len )     if ( n > s1_len || n > s2_len )
     {     {
         errno = EINVAL;         errno = EINVAL;
-        return -1;+        return -1.0;
     }     }
-    ​ + 
-    /* allocate memory for the difference array. We need n+1, n pieces ​+    if ( 0 == n ) 
 + n = (s1_len < s2_len) ? s1_len : s2_len; 
 + 
 +    /* allocate memory for the difference array. We need n+1, n pieces
      * plus one final slot for the difference in length */      * plus one final slot for the difference in length */
-    diffs = calloc(n+2, sizeof(double));​ +    diffs = calloc(n+1, sizeof(double));​ 
-    +
     /* breaking s1 and s2 apart into n-pieces gives chunk-sized pieces */     /* breaking s1 and s2 apart into n-pieces gives chunk-sized pieces */
     s1_chunk = s1_len / n;     s1_chunk = s1_len / n;
Line 103: Line 97:
     for (i = 0; i < n-1; i++)     for (i = 0; i < n-1; i++)
     {     {
-        diffs[i] = fabs (  +        diffs[i] = 
-                nmc_mean_c ( s1 + i*s1_chunk, s1_chunk ) + fabs ( 
-                + nmc_mean_c ( s1 + i*s1_chunk, s1_chunk ) 
-                nmc_mean_c ( s2 + i*s2_chunk, s2_chunk ) +
-            );+ nmc_mean_c ( s2 + i*s2_chunk, s2_chunk ) 
 + )
 +        diffs[i] = diffs[i] < 128 ? diffs[i] : 256 - diffs[i]; 
 +        diffs[i] *= diffs[i];
     }     }
  
     /* the final chunk */     /* the final chunk */
-    diffs[n-1] = fabs(  +    diffs[n-1] = 
-            nmc_mean_c( s1 + (n-1)*s1_chunk,​ s1_chunk_last ) + fabs ( 
-            + nmc_mean_c ( s1 + (n-1)*s1_chunk,​ s1_chunk_last ) 
-            nmc_mean_c( s2 + (n-1)*s2_chunk,​ s2_chunk_last ) +
-    );+ nmc_mean_c ( s2 + (n-1)*s2_chunk,​ s2_chunk_last ) 
 + )
 +    diffs[n-1] = diffs[n-1] < 128 ? diffs[n-1] : 256 - diffs[n-1];​ 
 +    diffs[n-1] *= diffs[n-1];
  
     /* final diff: size difference */     /* final diff: size difference */
-    diffs[n] = (double) ​abs( s1_len - s2_len );+    diffs[n] = fabs ( (double) s1_len - s2_len ​); 
 + diffs[n] *= diffs[n]; 
 + 
 +    mean = sqrt(nmc_mean(diffs,​n+1));
  
-    ​ 
-    mean = nmc_mean(diffs,​n+1);​ 
-    ​ 
     free(diffs);​     free(diffs);​
-    ​+
     return mean;     return mean;
 } }
  
- 
-double nmc_ra (unsigned char *s1, unsigned int s1_len, unsigned char *s2, unsigned int s2_len) 
-{ 
-    /* nmc rolling average */ 
-    double ra = 0; 
-    unsigned int maxn = s1_len < s2_len ? s1_len : s2_len; 
-    unsigned int i; 
-    ​ 
-    for ( i = 1; i <= maxn; i++ ) 
-        ra = ( ra + nmc (i, s1, s1_len, s2, s2_len) ) / 2; 
-    ​ 
-    return ra; 
-} 
 </​file>​ </​file>​
 ===== Examples ===== ===== Examples =====