/*-------------------------------------------------------------------------*/
/* This file contains code to calculate Kendall's Tau in O(N log N) time in
 * a manner similar to the following reference:
 *
 * A Computer Method for Calculating Kendall's Tau with Ungrouped Data
 * William R. Knight Journal of the American Statistical Association,
 * Vol. 61, No. 314, Part 1 (Jun., 1966), pp. 436-439
 *
 * Copyright 2010 David Simcha
 *
 * License:
 * Boost Software License - Version 1.0 - August 17th, 2003
 *
 * Permission is hereby granted, free of charge, to any person or organization
 * obtaining a copy of the software and accompanying documentation covered by
 * this license (the "Software") to use, reproduce, display, distribute,
 * execute, and transmit the Software, and to prepare derivative works of the
 * Software, and to permit third-parties to whom the Software is furnished to
 * do so, all subject to the following:
 *
 * The copyright notices in the Software and this entire statement, including
 * the above license grant, this restriction and the following disclaimer,
 * must be included in all copies of the Software, in whole or in part, and
 * all derivative works of the Software, unless such copies or derivative
 * works are solely in the form of machine-executable object code generated by
 * a source language processor.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
 * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
 * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 *
 */
/*-------------------------------------------------------------------------*/
/**---------------- Adapted for AFNI by RWCox - April 2010 ---------------**/
/*-------------------------------------------------------------------------*/

#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <stdio.h>
#include <assert.h>
#include <time.h>

#ifdef SOLARIS_OLD      /* 'if' might be overkill, but just to be minimal... */
                        /*                               10 May 2010 [rickr] */
#include "machdep.h"
#else
#include <stdint.h>
#endif


/*-------------------------------------------------------------------------*/
/* Sorts in place, returns the bubble sort distance between the input array
 * and the sorted array.
 */

static int insertionSort(float *arr, int len)
{
    int maxJ, i,j , swapCount = 0;

/* printf("enter insertionSort len=%d\n",len) ; */

    if(len < 2) { return 0; }

    maxJ = len - 1;
    for(i = len - 2; i >= 0; --i) {
        float  val = arr[i];
        for(j=i; j < maxJ && arr[j + 1] < val; ++j) {
            arr[j] = arr[j + 1];
        }

        arr[j] = val;
        swapCount += (j - i);
    }

    return swapCount;
}

/*-------------------------------------------------------------------------*/

static int merge(float *from, float *to, int middle, int len)
{
    int bufIndex, leftLen, rightLen , swaps ;
    float *left , *right;

/* printf("enter merge\n") ; */

    bufIndex = 0;
    swaps = 0;

    left = from;
    right = from + middle;
    rightLen = len - middle;
    leftLen = middle;

    while(leftLen && rightLen) {
        if(right[0] < left[0]) {
            to[bufIndex] = right[0];
            swaps += leftLen;
            rightLen--;
            right++;
        } else {
            to[bufIndex] = left[0];
            leftLen--;
            left++;
        }
        bufIndex++;
    }

    if(leftLen) {
#pragma omp critical (MEMCPY)
        memcpy(to + bufIndex, left, leftLen * sizeof(float));
    } else if(rightLen) {
#pragma omp critical (MEMCPY)
        memcpy(to + bufIndex, right, rightLen * sizeof(float));
    }

    return swaps;
}

/*-------------------------------------------------------------------------*/
/* Sorts in place, returns the bubble sort distance between the input array
 * and the sorted array.
 */

static int mergeSort(float *x, float *buf, int len)
{
    int swaps , half ;

/* printf("enter mergeSort\n") ; */

    if(len < 10) {
        return insertionSort(x, len);
    }

    swaps = 0;

    if(len < 2) { return 0; }

    half = len / 2;
    swaps += mergeSort(x, buf, half);
    swaps += mergeSort(x + half, buf + half, len - half);
    swaps += merge(x, buf, half, len);

#pragma omp critical (MEMCPY)
    memcpy(x, buf, len * sizeof(float));
    return swaps;
}

/*-------------------------------------------------------------------------*/

static int getMs(float *data, int len)  /* Assumes data is sorted */
{
    int Ms = 0, tieCount = 0 , i ;

/* printf("enter getMs\n") ; */

    for(i = 1; i < len; i++) {
        if(data[i] == data[i-1]) {
            tieCount++;
        } else if(tieCount) {
            Ms += (tieCount * (tieCount + 1)) / 2;
            tieCount = 0;
        }
    }
    if(tieCount) {
        Ms += (tieCount * (tieCount + 1)) / 2;
    }
    return Ms;
}

/*-------------------------------------------------------------------------*/
/* This function calculates the Kendall correlation tau_b.
 * The arrays arr1 should be sorted before this call, and arr2 should be
 * re-ordered in lockstep.  This can be done by calling
 *   qsort_floatfloat(len,arr1,arr2)
 * for example.
 * Note also that arr1 and arr2 will be modified, so if they need to
 * be preserved, do so before calling this function.
 */

float kendallNlogN( float *arr1, float *arr2, int len )
{
    int m1 = 0, m2 = 0, tieCount, swapCount, nPair, s,i ;
    float cor ;

/* printf("enter kendallNlogN\n") ; */

    if( len < 2 ) return (float)0 ;

    nPair = len * (len - 1) / 2;
    s = nPair;

    tieCount = 0;
    for(i = 1; i < len; i++) {
        if(arr1[i - 1] == arr1[i]) {
            tieCount++;
        } else if(tieCount > 0) {
            insertionSort(arr2 + i - tieCount - 1, tieCount + 1);
            m1 += tieCount * (tieCount + 1) / 2;
            s += getMs(arr2 + i - tieCount - 1, tieCount + 1);
            tieCount = 0;
        }
    }
    if(tieCount > 0) {
        insertionSort(arr2 + i - tieCount - 1, tieCount + 1);
        m1 += tieCount * (tieCount + 1) / 2;
        s += getMs(arr2 + i - tieCount - 1, tieCount + 1);
    }

    swapCount = mergeSort(arr2, arr1, len);

    m2 = getMs(arr2, len);
    s -= (m1 + m2) + 2 * swapCount;

    if( m1 < nPair && m2 < nPair )
      cor = s / ( sqrtf((float)(nPair-m1)) * sqrtf((float)(nPair-m2)) ) ;
    else
      cor = 0.0f ;

    return cor ;
}

/*-------------------------------------------------------------------------*/
/* This function uses a simple O(N^2) implementation.  It probably has a
 * smaller constant and therefore is useful in the small N case, and is also
 * useful for testing the relatively complex O(N log N) implementation.
 */

float kendallSmallN( float *arr1, float *arr2, int len )
{
    int m1 = 0, m2 = 0, s = 0, nPair , i,j ;
    float cor ;

/* printf("enter kendallSmallN\n") ; */

    for(i = 0; i < len; i++) {
        for(j = i + 1; j < len; j++) {
            if(arr2[i] > arr2[j]) {
                if (arr1[i] > arr1[j]) {
                    s++;
                } else if(arr1[i] < arr1[j]) {
                    s--;
                } else {
                    m1++;
                }
            } else if(arr2[i] < arr2[j]) {
                if (arr1[i] > arr1[j]) {
                    s--;
                } else if(arr1[i] < arr1[j]) {
                    s++;
                } else {
                    m1++;
                }
            } else {
                m2++;

                if(arr1[i] == arr1[j]) {
                    m1++;
                }
            }
        }
    }

    nPair = len * (len - 1) / 2;

    if( m1 < nPair && m2 < nPair )
      cor = s / ( sqrtf((float)(nPair-m1)) * sqrtf((float)(nPair-m2)) ) ;
    else
      cor = 0.0f ;

    return cor ;
}

#if 0

int main( int argc , char *argv[] )
{
    float  a[100], b[100];
    float  smallNCor, smallNCov, largeNCor, largeNCov;
    int i;
    int N ;
#define NBOT  10
#define NTOP  2000
#define NSTEP 10

    /* Test the small N version against a few values obtained from the old
     * version in R.  Only exercising the small N version because the large
     * N version requires the first array to be sorted and the second to be
     * reordered in lockstep before it's called.*/
    {
        float  a1[] = {1,2,3,5,4};
        float  a2[] = {1,2,3,3,5};
        float  b1[] = {8,6,7,5,3,0,9};
        float  b2[] = {3,1,4,1,5,9,2};
        float  c1[] = {1,1,1,2,3,3,4,4};
        float  c2[] = {1,2,1,3,3,5,5,5};

        printf("kSmall 5 = %g\n", kendallSmallN(a1, a2, 5) ) ;
        printf("kSmall 7 = %g\n", kendallSmallN(b1, b2, 7) ) ;
        printf("kSmall 8 = %g\n", kendallSmallN(c1, c2, 8) ) ;
    }

    /* Now that we're confident that the simple, small N version works,
     * extensively test it against the much more complex and bug-prone
     * O(N log N) version.
     */
    for(i = 0; i < 100; i++) {
        int j, len;
        for(j = 0; j < 100; j++) {
            a[j] = rand() % 30;
            b[j] = rand() % 30;
        }

        len = rand() % 50 + 50;

        /* The large N version assumes that the first array is sorted.  This
         * will usually be made true in R before passing the arrays to the
         * C functions.
         */
        insertionSort(a, len);

        smallNCor = kendallSmallN(a, b, len);
        largeNCor = kendallNlogN (a, b, len);
        printf("Cor: kSmall = %g  kNlogN = %g\n",smallNCor,largeNCor) ;
    }

    printf("-- short tests done --\n") ;

    /* Speed test.  Compare the O(N^2) version, which is very similar to
     * R's current impl, to my O(N log N) version.
     */
    {
        float  *foo, *bar, *buf;
        int i;
        float  startTime, stopTime;

        foo = (float *) malloc(NTOP * sizeof(float));
        bar = (float *) malloc(NTOP * sizeof(float));
        buf = (float *) malloc(NTOP * sizeof(float));

     for( N=NBOT ; N <= NTOP ; N += NSTEP ){
        for(i = 0; i < N; i++) {
            foo[i] = rand();
            bar[i] = rand();
        }

        startTime = clock();
        smallNCor = kendallSmallN(foo, bar, N);
        stopTime = clock();
        printf("N=%d: slow = %f ms  val = %g\n", N,stopTime - startTime,smallNCor);

        startTime = clock();

        /* Only sorting first array.  Normally the second one would be
         * reordered in lockstep.
         */
#if 1
        qsort_floatfloat( N , foo , bar ) ;
#else
        mergeSort(foo, buf, N);
#endif
        largeNCor = kendallNlogN(foo, bar, N);
        stopTime = clock();
        printf("N=%d: fast = %f ms  val = %g\n", N,stopTime - startTime,largeNCor);
     }
    }

    return 0;
}

#endif