/*
 *	matrix.c
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#ifdef USE_SPL

#define nsp_UsesVector
#include <nsp.h>
#endif

#include <sp/sp.h>
#include <sp/base.h>
#include <sp/memory.h>
#include <sp/vector.h>
#include <sp/voperate.h>
#include <sp/matrix.h>
#include <sp/fileio.h>

static double spPreciseCplxAbs(double re, double im)
{
    double are, aim;
    double ratio;
    
    are = FABS(re);
    aim = FABS(im);
    
    if (are > aim) {
        ratio = aim / are;
        return are * sqrt(1.0 + ratio * ratio);
    } else {
        if (aim == 0.0) {
            return 0.0;
        } else {
            ratio = are / aim;
            return aim * sqrt(1.0 + ratio * ratio);
        }
    }
}

static double spPreciseVectorNorm(double *vec, long length)
{
    long k;
    long amaxi;
    double avalue;
    double amaxv;
    double sqsumv;
    double ratio;

    amaxv = FABS(vec[0]);
    amaxi = 0;
    
    for (k = 1; k < length; k++) {
        avalue = FABS(vec[k]);
        if (amaxv < avalue) {
            amaxv = avalue;
            amaxi = k;
        }
    }

    for (k = 0, sqsumv = 0.0; k < length; k++) {
        if (k == amaxi) {
            sqsumv += 1.0;
        } else {
            ratio = vec[k] / amaxv;
            sqsumv += ratio * ratio;
        }
    }
    
    return amaxv * sqrt(sqsumv);
}

static double spPreciseCplxVectorNorm(double *vre, double *vim, long length)
{
    long k;
    long amaxi;
    double avalue;
    double amaxv;
    double sqsumv;
    double ratio;

    if (vim != NULL) {
        amaxv = spPreciseCplxAbs(vre[0], vim[0]);
    } else {
        amaxv = FABS(vre[0]);
    }
    amaxi = 0;
    
    for (k = 1; k < length; k++) {
        if (vim != NULL) {
            avalue = spPreciseCplxAbs(vre[k], vim[k]);
        } else {
            avalue = FABS(vre[k]);
        }
        if (amaxv < avalue) {
            amaxv = avalue;
            amaxi = k;
        }
    }

    for (k = 0, sqsumv = 0.0; k < length; k++) {
        if (k == amaxi) {
            sqsumv += 1.0;
        } else {
            if (vim != NULL) {
                avalue = spPreciseCplxAbs(vre[k], vim[k]);
            } else {
                avalue = FABS(vre[k]);
            }
            ratio = avalue / amaxv;
            sqsumv += ratio * ratio;
        }
    }
    
    return amaxv * sqrt(sqsumv);
}

/*
 *	allocate and free memory
 */
spSMatrix xsmalloc(long row, long col)
{
    spSMatrix matrix;

    matrix = xalloc(1, struct _spSMatrix);
    if (row >= 1 && col >= 1) {
        matrix->data = xsmatalloc(row, col);
    } else {
        matrix->data = NULL;
    }
    matrix->imag = NULL;
    matrix->row = row;
    matrix->col = col;

    return matrix;
}

spLMatrix xlmalloc(long row, long col)
{
    spLMatrix matrix;

    matrix = xalloc(1, struct _spLMatrix);
    if (row >= 1 && col >= 1) {
        matrix->data = xlmatalloc(row, col);
    } else {
        matrix->data = NULL;
    }
    matrix->imag = NULL;
    matrix->row = row;
    matrix->col = col;

    return matrix;
}

spFMatrix xfmalloc(long row, long col)
{
    spFMatrix matrix;

    matrix = xalloc(1, struct _spFMatrix);
    if (row >= 1 && col >= 1) {
        matrix->data = xfmatalloc(row, col);
    } else {
        matrix->data = NULL;
    }
    matrix->imag = NULL;
    matrix->row = row;
    matrix->col = col;

    return matrix;
}

spDMatrix xdmalloc(long row, long col)
{
    spDMatrix matrix;

    matrix = xalloc(1, struct _spDMatrix);
    if (row >= 1 && col >= 1) {
        matrix->data = xdmatalloc(row, col);
    } else {
        matrix->data = NULL;
    }
    matrix->imag = NULL;
    matrix->row = row;
    matrix->col = col;

    return matrix;
}

void xsmfree(spSMatrix matrix)
{
    if (matrix != NULL) {
	if (matrix->data != NULL) {
	    xsmatfree(matrix->data, matrix->row);
	}
	if (matrix->imag != NULL) {
	    xsmatfree(matrix->imag, matrix->row);
	}
	xfree(matrix);
    }

    return;
}

void xlmfree(spLMatrix matrix)
{
    if (matrix != NULL) {
	if (matrix->data != NULL) {
	    xlmatfree(matrix->data, matrix->row);
	}
	if (matrix->imag != NULL) {
	    xlmatfree(matrix->imag, matrix->row);
	}
	xfree(matrix);
    }

    return;
}

void xfmfree(spFMatrix matrix)
{
    if (matrix != NULL) {
	if (matrix->data != NULL) {
	    xfmatfree(matrix->data, matrix->row);
	}
	if (matrix->imag != NULL) {
	    xfmatfree(matrix->imag, matrix->row);
	}
	xfree(matrix);
    }

    return;
}

void xdmfree(spDMatrix matrix)
{
    if (matrix != NULL) {
	if (matrix->data != NULL) {
	    xdmatfree(matrix->data, matrix->row);
	}
	if (matrix->imag != NULL) {
	    xdmatfree(matrix->imag, matrix->row);
	}
	xfree(matrix);
    }

    return;
}

void smialloc(spSMatrix x)
{
    if (x->imag != NULL) {
	xsmatfree(x->imag, x->row);
    }
    x->imag = xsmatalloc(x->row, x->col);

    return;
}

void lmialloc(spLMatrix x)
{
    if (x->imag != NULL) {
	xlmatfree(x->imag, x->row);
    }
    x->imag = xlmatalloc(x->row, x->col);

    return;
}

void fmialloc(spFMatrix x)
{
    if (x->imag != NULL) {
	xfmatfree(x->imag, x->row);
    }
    x->imag = xfmatalloc(x->row, x->col);

    return;
}

void dmialloc(spDMatrix x)
{
    if (x->imag != NULL) {
	xdmatfree(x->imag, x->row);
    }
    x->imag = xdmatalloc(x->row, x->col);

    return;
}

void smifree(spSMatrix x)
{
    if (x->imag != NULL) {
	xsmatfree(x->imag, x->row);
        x->imag = NULL;
    }
    
    return;
}

void lmifree(spLMatrix x)
{
    if (x->imag != NULL) {
	xlmatfree(x->imag, x->row);
        x->imag = NULL;
    }
    
    return;
}

void fmifree(spFMatrix x)
{
    if (x->imag != NULL) {
	xfmatfree(x->imag, x->row);
        x->imag = NULL;
    }
    
    return;
}

void dmifree(spDMatrix x)
{
    if (x->imag != NULL) {
	xdmatfree(x->imag, x->row);
        x->imag = NULL;
    }
    
    return;
}

spSMatrix xsmrialloc(long row, long col)
{
    spSMatrix matrix;

    matrix = xsmalloc(row, col);
    smialloc(matrix);

    return matrix;
}

spLMatrix xlmrialloc(long row, long col)
{
    spLMatrix matrix;

    matrix = xlmalloc(row, col);
    lmialloc(matrix);

    return matrix;
}

spFMatrix xfmrialloc(long row, long col)
{
    spFMatrix matrix;

    matrix = xfmalloc(row, col);
    fmialloc(matrix);

    return matrix;
}

spDMatrix xdmrialloc(long row, long col)
{
    spDMatrix matrix;

    matrix = xdmalloc(row, col);
    dmialloc(matrix);

    return matrix;
}

spSMatrices xsmsalloc(long num)
{
    long k;
    spSMatrices xs;

    xs = xalloc(1, struct _spSMatrices);
    xs->matrix = xalloc(MAX(num, 1), spSMatrix);
    xs->num_matrix = num;
    
    for (k = 0; k < xs->num_matrix; k++) {
	xs->matrix[k] = NODATA;
    }

    return xs;
}

spLMatrices xlmsalloc(long num)
{
    long k;
    spLMatrices xs;

    xs = xalloc(1, struct _spLMatrices);
    xs->matrix = xalloc(MAX(num, 1), spLMatrix);
    xs->num_matrix = num;
    
    for (k = 0; k < xs->num_matrix; k++) {
	xs->matrix[k] = NODATA;
    }

    return xs;
}

spFMatrices xfmsalloc(long num)
{
    long k;
    spFMatrices xs;

    xs = xalloc(1, struct _spFMatrices);
    xs->matrix = xalloc(MAX(num, 1), spFMatrix);
    xs->num_matrix = num;
    
    for (k = 0; k < xs->num_matrix; k++) {
	xs->matrix[k] = NODATA;
    }

    return xs;
}

spDMatrices xdmsalloc(long num)
{
    long k;
    spDMatrices xs;

    xs = xalloc(1, struct _spDMatrices);
    xs->matrix = xalloc(MAX(num, 1), spDMatrix);
    xs->num_matrix = num;
    
    for (k = 0; k < xs->num_matrix; k++) {
	xs->matrix[k] = NODATA;
    }

    return xs;
}

void xsmsfree(spSMatrices xs)
{
    long k;

    if (xs != NULL) {
	if (xs->matrix != NULL) {
	    for (k = 0; k < xs->num_matrix; k++) {
		if (xs->matrix[k] != NODATA) {
		    xsmfree(xs->matrix[k]);
		}
	    }
	    xfree(xs->matrix);
	}
	xfree(xs);
    }

    return;
}

void xlmsfree(spLMatrices xs)
{
    long k;

    if (xs != NULL) {
	if (xs->matrix != NULL) {
	    for (k = 0; k < xs->num_matrix; k++) {
		if (xs->matrix[k] != NODATA) {
		    xlmfree(xs->matrix[k]);
		}
	    }
	    xfree(xs->matrix);
	}
	xfree(xs);
    }

    return;
}

void xfmsfree(spFMatrices xs)
{
    long k;

    if (xs != NULL) {
	if (xs->matrix != NULL) {
	    for (k = 0; k < xs->num_matrix; k++) {
		if (xs->matrix[k] != NODATA) {
		    xfmfree(xs->matrix[k]);
		}
	    }
	    xfree(xs->matrix);
	}
	xfree(xs);
    }

    return;
}

void xdmsfree(spDMatrices xs)
{
    long k;

    if (xs != NULL) {
	if (xs->matrix != NULL) {
	    for (k = 0; k < xs->num_matrix; k++) {
		if (xs->matrix[k] != NODATA) {
		    xdmfree(xs->matrix[k]);
		}
	    }
	    xfree(xs->matrix);
	}
	xfree(xs);
    }

    return;
}

void smset(spSMatrix matrix, short **data, long row, long col)
{
    if (matrix == NODATA) return;

    row = MAX(row, 0);
    col = MAX(col, 0);
    
    matrix->data = data;
    matrix->imag = NULL;
    matrix->row = row;
    matrix->col = col;
    
    return;
}

void lmset(spLMatrix matrix, long **data, long row, long col)
{
    if (matrix == NODATA) return;

    row = MAX(row, 0);
    col = MAX(col, 0);
    
    matrix->data = data;
    matrix->imag = NULL;
    matrix->row = row;
    matrix->col = col;
    
    return;
}

void fmset(spFMatrix matrix, float **data, long row, long col)
{
    if (matrix == NODATA) return;

    row = MAX(row, 0);
    col = MAX(col, 0);
    
    matrix->data = data;
    matrix->imag = NULL;
    matrix->row = row;
    matrix->col = col;
    
    return;
}

void dmset(spDMatrix matrix, double **data, long row, long col)
{
    if (matrix == NODATA) return;

    row = MAX(row, 0);
    col = MAX(col, 0);
    
    matrix->data = data;
    matrix->imag = NULL;
    matrix->row = row;
    matrix->col = col;
    
    return;
}

void smreal(spSMatrix x)
{
    if (x->imag != NULL) {
	xsmatfree(x->imag, x->row);
        x->imag = NULL;
    }
    
    return;
}

void lmreal(spLMatrix x)
{
    if (x->imag != NULL) {
	xlmatfree(x->imag, x->row);
        x->imag = NULL;
    }
    
    return;
}

void fmreal(spFMatrix x)
{
    if (x->imag != NULL) {
	xfmatfree(x->imag, x->row);
        x->imag = NULL;
    }
    
    return;
}

void dmreal(spDMatrix x)
{
    if (x->imag != NULL) {
	xdmatfree(x->imag, x->row);
        x->imag = NULL;
    }
    
    return;
}

void smimag(spSMatrix x)
{
    if (x->imag == NULL) {
	smzeros(x, 0, 0);
	return;
    }

    xfree(x->data);
    x->data = x->imag;
    x->imag = NULL;
    
    return;
}

void lmimag(spLMatrix x)
{
    if (x->imag == NULL) {
	lmzeros(x, 0, 0);
	return;
    }

    xfree(x->data);
    x->data = x->imag;
    x->imag = NULL;
    
    return;
}

void fmimag(spFMatrix x)
{
    if (x->imag == NULL) {
	fmzeros(x, 0, 0);
	return;
    }

    xfree(x->data);
    x->data = x->imag;
    x->imag = NULL;
    
    return;
}

void dmimag(spDMatrix x)
{
    if (x->imag == NULL) {
	dmzeros(x, 0, 0);
	return;
    }

    xfree(x->data);
    x->data = x->imag;
    x->imag = NULL;
    
    return;
}

void smconj(spSMatrix x)
{
    long i, j;
    
    if (x->imag == NULL) {
	return;
    }

    for (i = 0; i < x->row; i++) {
	for (j = 0; j < x->col; j++) {
	    x->imag[i][j] = -x->imag[i][j];
	}
    }
    
    return;
}

void lmconj(spLMatrix x)
{
    long i, j;
    
    if (x->imag == NULL) {
	return;
    }

    for (i = 0; i < x->row; i++) {
	for (j = 0; j < x->col; j++) {
	    x->imag[i][j] = -x->imag[i][j];
	}
    }
    
    return;
}

void fmconj(spFMatrix x)
{
    long i, j;
    
    if (x->imag == NULL) {
	return;
    }

    for (i = 0; i < x->row; i++) {
	for (j = 0; j < x->col; j++) {
	    x->imag[i][j] = -x->imag[i][j];
	}
    }
    
    return;
}

void dmconj(spDMatrix x)
{
    long i, j;
    
    if (x->imag == NULL) {
	return;
    }

    for (i = 0; i < x->row; i++) {
	for (j = 0; j < x->col; j++) {
	    x->imag[i][j] = -x->imag[i][j];
	}
    }
    
    return;
}

void smriswap(spSMatrix x)
{
    short **p;
    
    if (x->imag == NULL) {
	smizeros(x, 0, 0);
    }

    p = x->data;
    x->data = x->imag;
    x->imag = p;
    
    return;
}

void lmriswap(spLMatrix x)
{
    long **p;
    
    if (x->imag == NULL) {
	lmizeros(x, 0, 0);
    }

    p = x->data;
    x->data = x->imag;
    x->imag = p;
    
    return;
}

void fmriswap(spFMatrix x)
{
    float **p;
    
    if (x->imag == NULL) {
	fmizeros(x, 0, 0);
    }

    p = x->data;
    x->data = x->imag;
    x->imag = p;
    
    return;
}

void dmriswap(spDMatrix x)
{
    double **p;
    
    if (x->imag == NULL) {
	dmizeros(x, 0, 0);
    }

    p = x->data;
    x->data = x->imag;
    x->imag = p;
    
    return;
}

void smcopy(spSMatrix y, spSMatrix x)
{
    long i;
    long row, col;

    if (x == NODATA || y == NODATA) return;

    row = MIN(x->row, y->row);
    col = MIN(x->col, y->col);
    
    for (i = 0; i < row; i++) {
	memcpy(y->data[i], x->data[i], col * sizeof(short));
	if (y->imag != NULL) {
	    memcpy(y->imag[i], x->imag[i], col * sizeof(short));
	}
    }

    return;
}

void lmcopy(spLMatrix y, spLMatrix x)
{
    long i;
    long row, col;

    if (x == NODATA || y == NODATA) return;

    row = MIN(x->row, y->row);
    col = MIN(x->col, y->col);
    
    for (i = 0; i < row; i++) {
	memcpy(y->data[i], x->data[i], col * sizeof(long));
	if (y->imag != NULL) {
	    memcpy(y->imag[i], x->imag[i], col * sizeof(long));
	}
    }

    return;
}

void fmcopy(spFMatrix y, spFMatrix x)
{
    long i;
    long row, col;

    if (x == NODATA || y == NODATA) return;

    row = MIN(x->row, y->row);
    col = MIN(x->col, y->col);
    
    for (i = 0; i < row; i++) {
	memcpy(y->data[i], x->data[i], col * sizeof(float));
	if (y->imag != NULL) {
	    memcpy(y->imag[i], x->imag[i], col * sizeof(float));
	}
    }

    return;
}

void dmcopy(spDMatrix y, spDMatrix x)
{
    long i;
    long row, col;

    if (x == NODATA || y == NODATA) return;

    row = MIN(x->row, y->row);
    col = MIN(x->col, y->col);
    
    for (i = 0; i < row; i++) {
	memcpy(y->data[i], x->data[i], col * sizeof(double));
	if (y->imag != NULL) {
	    memcpy(y->imag[i], x->imag[i], col * sizeof(double));
	}
    }

    return;
}

spSMatrix xsmclone(spSMatrix x)
{
    spSMatrix y;

    if (x == NODATA) return NODATA;

    if (x->imag != NULL) {
	y = xsmrialloc(x->row, x->col);
    } else {
	y = xsmalloc(x->row, x->col);
    }

    smcopy(y, x);

    return y;
}

spLMatrix xlmclone(spLMatrix x)
{
    spLMatrix y;

    if (x == NODATA) return NODATA;

    if (x->imag != NULL) {
	y = xlmrialloc(x->row, x->col);
    } else {
	y = xlmalloc(x->row, x->col);
    }

    lmcopy(y, x);

    return y;
}

spFMatrix xfmclone(spFMatrix x)
{
    spFMatrix y;

    if (x == NODATA) return NODATA;

    if (x->imag != NULL) {
	y = xfmrialloc(x->row, x->col);
    } else {
	y = xfmalloc(x->row, x->col);
    }

    fmcopy(y, x);

    return y;
}

spDMatrix xdmclone(spDMatrix x)
{
    spDMatrix y;

    if (x == NODATA) return NODATA;
    
    if (x->imag != NULL) {
	y = xdmrialloc(x->row, x->col);
    } else {
	y = xdmalloc(x->row, x->col);
    }

    dmcopy(y, x);

    return y;
}

spSMatrix xsmnums(long row, long col, short value)
{
    spSMatrix mat;

    if (row <= 0 || col <= 0) {
	fprintf(stderr, "wrong value\n");
#if 0
	mat = xsmnull();
	return mat;
#else
	return NODATA;
#endif
    }

    /* memory allocate */
    mat = xsmalloc(row, col);

    smnums(mat, row, col, value);

    return mat;
}

spLMatrix xlmnums(long row, long col, long value)
{
    spLMatrix mat;

    if (row <= 0 || col <= 0) {
	fprintf(stderr, "wrong value\n");
#if 0
	mat = xlmnull();
	return mat;
#else
	return NODATA;
#endif
    }

    /* memory allocate */
    mat = xlmalloc(row, col);

    lmnums(mat, row, col, value);

    return mat;
}

spFMatrix xfmnums(long row, long col, float value)
{
    spFMatrix mat;

    if (row <= 0 || col <= 0) {
	fprintf(stderr, "wrong value\n");
#if 0
	mat = xfmnull();
	return mat;
#else
	return NODATA;
#endif
    }

    /* memory allocate */
    mat = xfmalloc(row, col);

    fmnums(mat, row, col, value);

    return mat;
}

spDMatrix xdmnums(long row, long col, double value)
{
    spDMatrix mat;

    if (row <= 0 || col <= 0) {
	fprintf(stderr, "wrong value\n");
#if 0
	mat = xdmnull();
	return mat;
#else
	return NODATA;
#endif
    }

    /* memory allocate */
    mat = xdmalloc(row, col);

    dmnums(mat, row, col, value);

    return mat;
}

void smnums(spSMatrix mat, long row, long col, short value)
{
    long k, l;

    if (row <= 0 || row > mat->row) {
	row = mat->row;
    }
    if (col <= 0 || col > mat->col) {
	col = mat->col;
    }

    if (value == 0) {
#if 0
	for (k = 0; k < row; k++) {
	    memset(mat->data[k], 0, col * sizeof(short));
	}
#else
        memset(mat->data[0], 0, row * col * sizeof(short));
#endif
    } else {
	/* initailize data */
	for (k = 0; k < row; k++) {
	    for (l = 0; l < col; l++) {
		mat->data[k][l] = value;
	    }
	}
    }

    return;
}

void lmnums(spLMatrix mat, long row, long col, long value)
{
    long k, l;

    if (row <= 0 || row > mat->row) {
	row = mat->row;
    }
    if (col <= 0 || col > mat->col) {
	col = mat->col;
    }

    if (value == 0) {
#if 0
	for (k = 0; k < row; k++) {
	    memset(mat->data[k], 0, col * sizeof(long));
	}
#else
        memset(mat->data[0], 0, row * col * sizeof(long));
#endif
    } else {
	/* initailize data */
	for (k = 0; k < row; k++) {
	    for (l = 0; l < col; l++) {
		mat->data[k][l] = value;
	    }
	}
    }

    return;
}

void fmnums(spFMatrix mat, long row, long col, float value)
{
    long k, l;

    if (row <= 0 || row > mat->row) {
	row = mat->row;
    }
    if (col <= 0 || col > mat->col) {
	col = mat->col;
    }

    if (value == 0.0f) {
#if 0
	for (k = 0; k < row; k++) {
	    memset(mat->data[k], 0, col * sizeof(float));
	}
#else
        memset(mat->data[0], 0, row * col * sizeof(float));
#endif
    } else {
	/* initailize data */
	for (k = 0; k < row; k++) {
	    for (l = 0; l < col; l++) {
		mat->data[k][l] = value;
	    }
	}
    }

    return;
}

void dmnums(spDMatrix mat, long row, long col, double value)
{
    long k, l;

    if (row <= 0 || row > mat->row) {
	row = mat->row;
    }
    if (col <= 0 || col > mat->col) {
	col = mat->col;
    }

    if (value == 0.0) {
#if 0
	for (k = 0; k < row; k++) {
	    memset(mat->data[k], 0, col * sizeof(double));
	}
#else
        memset(mat->data[0], 0, row * col * sizeof(double));
#endif
    } else {
	/* initailize data */
	for (k = 0; k < row; k++) {
	    for (l = 0; l < col; l++) {
		mat->data[k][l] = value;
	    }
	}
    }

    return;
}

void sminums(spSMatrix mat, long row, long col, short value)
{
    long k, l;

    if (row <= 0 || row > mat->row) {
	row = mat->row;
    }
    if (col <= 0 || col > mat->col) {
	col = mat->col;
    }

    if (mat->imag == NULL) {
	/* memory allocate */
        smialloc(mat);
    }

    if (value == 0) {
#if 0
	for (k = 0; k < row; k++) {
	    memset(mat->imag[k], 0, col * sizeof(short));
	}
#else
        memset(mat->imag[0], 0, row * col * sizeof(short));
#endif
    } else {
	/* initailize data */
	for (k = 0; k < row; k++) {
	    for (l = 0; l < col; l++) {
		mat->imag[k][l] = value;
	    }
	}
    }

    return;
}

void lminums(spLMatrix mat, long row, long col, long value)
{
    long k, l;

    if (row <= 0 || row > mat->row) {
	row = mat->row;
    }
    if (col <= 0 || col > mat->col) {
	col = mat->col;
    }

    if (mat->imag == NULL) {
	/* memory allocate */
        lmialloc(mat);
    }

    if (value == 0) {
#if 0
	for (k = 0; k < row; k++) {
	    memset(mat->imag[k], 0, col * sizeof(long));
	}
#else
        memset(mat->imag[0], 0, row * col * sizeof(long));
#endif
    } else {
	/* initailize data */
	for (k = 0; k < row; k++) {
	    for (l = 0; l < col; l++) {
		mat->imag[k][l] = value;
	    }
	}
    }

    return;
}

void fminums(spFMatrix mat, long row, long col, float value)
{
    long k, l;

    if (row <= 0 || row > mat->row) {
	row = mat->row;
    }
    if (col <= 0 || col > mat->col) {
	col = mat->col;
    }

    if (mat->imag == NULL) {
	/* memory allocate */
        fmialloc(mat);
    }

    if (value == 0.0f) {
#if 0
	for (k = 0; k < row; k++) {
	    memset(mat->imag[k], 0, col * sizeof(float));
	}
#else
        memset(mat->imag[0], 0, row * col * sizeof(float));
#endif
    } else {
	/* initailize data */
	for (k = 0; k < row; k++) {
	    for (l = 0; l < col; l++) {
		mat->imag[k][l] = value;
	    }
	}
    }

    return;
}

void dminums(spDMatrix mat, long row, long col, double value)
{
    long k, l;

    if (row <= 0 || row > mat->row) {
	row = mat->row;
    }
    if (col <= 0 || col > mat->col) {
	col = mat->col;
    }

    if (mat->imag == NULL) {
	/* memory allocate */
        dmialloc(mat);
    }

    if (value == 0.0) {
#if 0
	for (k = 0; k < row; k++) {
	    memset(mat->imag[k], 0, col * sizeof(double));
	}
#else
        memset(mat->imag[0], 0, row * col * sizeof(double));
#endif
    } else {
	/* initailize data */
	for (k = 0; k < row; k++) {
	    for (l = 0; l < col; l++) {
		mat->imag[k][l] = value;
	    }
	}
    }

    return;
}

spSMatrix xsmrinums(long row, long col, short value)
{
    spSMatrix mat;

    mat = xsmnums(row, col, value);
    smialloc(mat);
    sminums(mat, row, col, value);

    return mat;
}

spLMatrix xlmrinums(long row, long col, long value)
{
    spLMatrix mat;

    mat = xlmnums(row, col, value);
    lmialloc(mat);
    lminums(mat, row, col, value);

    return mat;
}

spFMatrix xfmrinums(long row, long col, float value)
{
    spFMatrix mat;

    mat = xfmnums(row, col, value);
    fmialloc(mat);
    fminums(mat, row, col, value);

    return mat;
}

spDMatrix xdmrinums(long row, long col, double value)
{
    spDMatrix mat;

    mat = xdmnums(row, col, value);
    dmialloc(mat);
    dminums(mat, row, col, value);

    return mat;
}

void smeye(spSMatrix mat)
{
    long k, l;

    /* initailize data */
    for (k = 0; k < mat->row; k++) {
	for (l = 0; l < mat->col; l++) {
	    if (k == l) {
		mat->data[k][l] = 1;
	    } else {
		mat->data[k][l] = 0;
	    }
            if (mat->imag != NULL) {
		mat->imag[k][l] = 0;
            }
	}
    }

    return;
}

void lmeye(spLMatrix mat)
{
    long k, l;

    /* initailize data */
    for (k = 0; k < mat->row; k++) {
	for (l = 0; l < mat->col; l++) {
	    if (k == l) {
		mat->data[k][l] = 1;
	    } else {
		mat->data[k][l] = 0;
	    }
            if (mat->imag != NULL) {
		mat->imag[k][l] = 0;
            }
	}
    }

    return;
}

void fmeye(spFMatrix mat)
{
    long k, l;

    /* initailize data */
    for (k = 0; k < mat->row; k++) {
	for (l = 0; l < mat->col; l++) {
	    if (k == l) {
		mat->data[k][l] = 1.0f;
	    } else {
		mat->data[k][l] = 0.0f;
	    }
            if (mat->imag != NULL) {
		mat->imag[k][l] = 0.0f;
            }
	}
    }

    return;
}

void dmeye(spDMatrix mat)
{
    long k, l;

    /* initailize data */
    for (k = 0; k < mat->row; k++) {
	for (l = 0; l < mat->col; l++) {
	    if (k == l) {
		mat->data[k][l] = 1.0;
	    } else {
		mat->data[k][l] = 0.0;
	    }
            if (mat->imag != NULL) {
		mat->imag[k][l] = 0.;
            }
	}
    }

    return;
}

spSMatrix xsmeye(long row, long col)
{
    spSMatrix mat;

    if (row <= 0 || col <= 0) {
	fprintf(stderr, "wrong value\n");
	return NODATA;
    }

    /* memory allocate */
    mat = xsmalloc(row, col);

    smeye(mat);

    return mat;
}

spLMatrix xlmeye(long row, long col)
{
    spLMatrix mat;

    if (row <= 0 || col <= 0) {
	fprintf(stderr, "wrong value\n");
	return NODATA;
    }

    /* memory allocate */
    mat = xlmalloc(row, col);

    lmeye(mat);

    return mat;
}

spFMatrix xfmeye(long row, long col)
{
    spFMatrix mat;

    if (row <= 0 || col <= 0) {
	fprintf(stderr, "wrong value\n");
	return NODATA;
    }

    /* memory allocate */
    mat = xfmalloc(row, col);

    fmeye(mat);

    return mat;
}

spDMatrix xdmeye(long row, long col)
{
    spDMatrix mat;

    if (row <= 0 || col <= 0) {
	fprintf(stderr, "wrong value\n");
	return NODATA;
    }

    /* memory allocate */
    mat = xdmalloc(row, col);

    dmeye(mat);

    return mat;
}

spFMatrix xfmpascal(long n)
{
    long i, j, k, m;
    float num, den;
    spFMatrix A;

    A = xfmalloc(n, n);
    for (i = 0; i < n; i++) {
	num = den = 1.0f;
	for (m = i, k = 2 * i; m >= 1; m--, k--) {
	    num *= (float)k;
	    den *= (float)m;
	}
	A->data[i][i] = num / den;
	for (j = i + 1; j < n; j++) {
	    A->data[i][j] = A->data[i][j - 1] * (float)(i + j) / (float)j;
	    A->data[j][i] = A->data[i][j];
	}
    }

    return A;
}

spDMatrix xdmpascal(long n)
{
    long i, j, k, m;
    double num, den;
    spDMatrix A;

    A = xdmalloc(n, n);
    for (i = 0; i < n; i++) {
	num = den = 1.0;
	for (m = i, k = 2 * i; m >= 1; m--, k--) {
	    num *= (double)k;
	    den *= (double)m;
	}
	A->data[i][i] = num / den;
	for (j = i + 1; j < n; j++) {
	    A->data[i][j] = A->data[i][j - 1] * (double)(i + j) / (double)j;
	    A->data[j][i] = A->data[i][j];
	}
    }

    return A;
}

/*
 *	initialize each rows
 */
spSMatrix xsminitrow(long nrow, long j, long incr, long n)
{
    long k, l;
    long num;
    spSMatrix mat;

    if ((num = lvgetinitcount(nrow, j, incr, n)) < 0) {
#if 0
	mat = xsmnull();
	return mat;
#else
	return NODATA;
#endif
    }
    
    /* memory allocate */
    mat = xsmalloc(nrow, num);

    /* initailize data */
    for (k = 0; k < mat->row; k++) {
	for (l = 0; l < mat->col; l++) {
	    mat->data[k][l] = (short)(j + (l * incr));
	}
    }

    return mat;
}

spLMatrix xlminitrow(long nrow, long j, long incr, long n)
{
    long k, l;
    long num;
    spLMatrix mat;

    if ((num = lvgetinitcount(nrow, j, incr, n)) < 0) {
#if 0
	mat = xlmnull();
	return mat;
#else
	return NODATA;
#endif
    }
    
    /* memory allocate */
    mat = xlmalloc(nrow, num);

    /* initailize data */
    for (k = 0; k < mat->row; k++) {
	for (l = 0; l < mat->col; l++) {
	    mat->data[k][l] = j + (l * incr);
	}
    }

    return mat;
}

spFMatrix xfminitrow(long nrow, float j, float incr, float n)
{
    long k, l;
    long num;
    spFMatrix mat;

    if ((num = fvgetinitcount(nrow, j, incr, n)) < 0) {
#if 0
	mat = xfmnull();
	return mat;
#else
	return NODATA;
#endif
    }
    
    /* memory allocate */
    mat = xfmalloc(nrow, num);

    /* initailize data */
    for (k = 0; k < mat->row; k++) {
	for (l = 0; l < mat->col; l++) {
	    mat->data[k][l] = j + (l * incr);
	}
    }

    return mat;
}

spDMatrix xdminitrow(long nrow, double j, double incr, double n)
{
    long k, l;
    long num;
    spDMatrix mat;

    if ((num = dvgetinitcount(nrow, j, incr, n)) < 0) {
#if 0
	mat = xdmnull();
	return mat;
#else
	return NODATA;
#endif
    }
    
    /* memory allocate */
    mat = xdmalloc(nrow, num);

    /* initailize data */
    for (k = 0; k < mat->row; k++) {
	for (l = 0; l < mat->col; l++) {
	    mat->data[k][l] = j + (l * incr);
	}
    }

    return mat;
}

/*
 *	initialize each columns
 */
spSMatrix xsminitcol(long ncol, long j, long incr, long n)
{
    long k, l;
    long num;
    spSMatrix mat;

    if ((num = lvgetinitcount(ncol, j, incr, n)) < 0) {
#if 0
	mat = xsmnull();
	return mat;
#else
	return NODATA;
#endif
    }
    
    /* memory allocate */
    mat = xsmalloc(num, ncol);

    /* initailize data */
    for (l = 0; l < mat->col; l++) {
	for (k = 0; k < mat->row; k++) {
	    mat->data[k][l] = (short)(j + (k * incr));
	}
    }

    return mat;
}

spLMatrix xlminitcol(long ncol, long j, long incr, long n)
{
    long k, l;
    long num;
    spLMatrix mat;

    if ((num = lvgetinitcount(ncol, j, incr, n)) < 0) {
#if 0
	mat = xlmnull();
	return mat;
#else
	return NODATA;
#endif
    }
    
    /* memory allocate */
    mat = xlmalloc(num, ncol);

    /* initailize data */
    for (l = 0; l < mat->col; l++) {
	for (k = 0; k < mat->row; k++) {
	    mat->data[k][l] = j + (k * incr);
	}
    }

    return mat;
}

spFMatrix xfminitcol(long ncol, float j, float incr, float n)
{
    long k, l;
    long num;
    spFMatrix mat;

    if ((num = fvgetinitcount(ncol, j, incr, n)) < 0) {
#if 0
	mat = xfmnull();
	return mat;
#else
	return NODATA;
#endif
    }
    
    /* memory allocate */
    mat = xfmalloc(num, ncol);

    /* initailize data */
    for (l = 0; l < mat->col; l++) {
	for (k = 0; k < mat->row; k++) {
	    mat->data[k][l] = j + (k * incr);
	}
    }

    return mat;
}

spDMatrix xdminitcol(long ncol, double j, double incr, double n)
{
    long k, l;
    long num;
    spDMatrix mat;

    if ((num = dvgetinitcount(ncol, j, incr, n)) < 0) {
#if 0
	mat = xdmnull();
	return mat;
#else
	return NODATA;
#endif
    }
    
    /* memory allocate */
    mat = xdmalloc(num, ncol);

    /* initailize data */
    for (l = 0; l < mat->col; l++) {
	for (k = 0; k < mat->row; k++) {
	    mat->data[k][l] = j + (k * incr);
	}
    }

    return mat;
}

void smeraserow(spSMatrix mat, long row, long offset, long length, spBool inv)
{
    long k;

    if (length <= 0) {
        length = mat->col - offset;
    } else {
        length = MIN(length, mat->col - offset);
    }

    if (inv) {
        for (k = 0; k < offset; k++) {
            mat->data[row][k] = 0;
        }
        for (k = offset + length; k < mat->col; k++) {
            mat->data[row][k] = 0;
        }
    } else {
        for (k = MAX(-offset, 0); k < length; k++) {
            mat->data[row][offset + k] = 0;
        }
    }

    return;
}

void lmeraserow(spLMatrix mat, long row, long offset, long length, spBool inv)
{
    long k;

    if (length <= 0) {
        length = mat->col - offset;
    } else {
        length = MIN(length, mat->col - offset);
    }

    if (inv) {
        for (k = 0; k < offset; k++) {
            mat->data[row][k] = 0;
        }
        for (k = offset + length; k < mat->col; k++) {
            mat->data[row][k] = 0;
        }
    } else {
        for (k = MAX(-offset, 0); k < length; k++) {
            mat->data[row][offset + k] = 0;
        }
    }

    return;
}

void fmeraserow(spFMatrix mat, long row, long offset, long length, spBool inv)
{
    long k;

    if (length <= 0) {
        length = mat->col - offset;
    } else {
        length = MIN(length, mat->col - offset);
    }

    if (inv) {
        for (k = 0; k < offset; k++) {
            mat->data[row][k] = 0.0f;
        }
        for (k = offset + length; k < mat->col; k++) {
            mat->data[row][k] = 0.0f;
        }
    } else {
        for (k = MAX(-offset, 0); k < length; k++) {
            mat->data[row][offset + k] = 0.0f;
        }
    }

    return;
}

void dmeraserow(spDMatrix mat, long row, long offset, long length, spBool inv)
{
    long k;

    if (length <= 0) {
        length = mat->col - offset;
    } else {
        length = MIN(length, mat->col - offset);
    }

    if (inv) {
        for (k = 0; k < offset; k++) {
            mat->data[row][k] = 0.0;
        }
        for (k = offset + length; k < mat->col; k++) {
            mat->data[row][k] = 0.0;
        }
    } else {
        for (k = MAX(-offset, 0); k < length; k++) {
            mat->data[row][offset + k] = 0.0;
        }
    }

    return;
}

spSMatrix xsmeraserow(spSMatrix mat, long row, long offset, long length, spBool inv)
{
    spSMatrix omat;

    omat = xsmclone(mat);
    smeraserow(omat, row, offset, length, inv);

    return omat;
}

spLMatrix xlmeraserow(spLMatrix mat, long row, long offset, long length, spBool inv)
{
    spLMatrix omat;

    omat = xlmclone(mat);
    lmeraserow(omat, row, offset, length, inv);

    return omat;
}

spFMatrix xfmeraserow(spFMatrix mat, long row, long offset, long length, spBool inv)
{
    spFMatrix omat;

    omat = xfmclone(mat);
    fmeraserow(omat, row, offset, length, inv);

    return omat;
}

spDMatrix xdmeraserow(spDMatrix mat, long row, long offset, long length, spBool inv)
{
    spDMatrix omat;

    omat = xdmclone(mat);
    dmeraserow(omat, row, offset, length, inv);

    return omat;
}

void smerasecol(spSMatrix mat, long col, long offset, long length, spBool inv)
{
    long k;

    if (length <= 0) {
        length = mat->row - offset;
    } else {
        length = MIN(length, mat->row - offset);
    }

    if (inv) {
        for (k = 0; k < offset; k++) {
            mat->data[k][col] = 0;
        }
        for (k = offset + length; k < mat->row; k++) {
            mat->data[k][col] = 0;
        }
    } else {
        for (k = MAX(-offset, 0); k < length; k++) {
            mat->data[offset + k][col] = 0;
        }
    }

    return;
}

void lmerasecol(spLMatrix mat, long col, long offset, long length, spBool inv)
{
    long k;

    if (length <= 0) {
        length = mat->row - offset;
    } else {
        length = MIN(length, mat->row - offset);
    }

    if (inv) {
        for (k = 0; k < offset; k++) {
            mat->data[k][col] = 0;
        }
        for (k = offset + length; k < mat->row; k++) {
            mat->data[k][col] = 0;
        }
    } else {
        for (k = MAX(-offset, 0); k < length; k++) {
            mat->data[offset + k][col] = 0;
        }
    }

    return;
}

void fmerasecol(spFMatrix mat, long col, long offset, long length, spBool inv)
{
    long k;

    if (length <= 0) {
        length = mat->row - offset;
    } else {
        length = MIN(length, mat->row - offset);
    }

    if (inv) {
        for (k = 0; k < offset; k++) {
            mat->data[k][col] = 0.0f;
        }
        for (k = offset + length; k < mat->row; k++) {
            mat->data[k][col] = 0.0f;
        }
    } else {
        for (k = MAX(-offset, 0); k < length; k++) {
            mat->data[offset + k][col] = 0.0f;
        }
    }

    return;
}

void dmerasecol(spDMatrix mat, long col, long offset, long length, spBool inv)
{
    long k;

    if (length <= 0) {
        length = mat->row - offset;
    } else {
        length = MIN(length, mat->row - offset);
    }

    if (inv) {
        for (k = 0; k < offset; k++) {
            mat->data[k][col] = 0.0;
        }
        for (k = offset + length; k < mat->row; k++) {
            mat->data[k][col] = 0.0;
        }
    } else {
        for (k = MAX(-offset, 0); k < length; k++) {
            mat->data[offset + k][col] = 0.0;
        }
    }

    return;
}

spSMatrix xsmerasecol(spSMatrix mat, long col, long offset, long length, spBool inv)
{
    spSMatrix omat;

    omat = xsmclone(mat);
    smerasecol(omat, col, offset, length, inv);

    return omat;
}

spLMatrix xlmerasecol(spLMatrix mat, long col, long offset, long length, spBool inv)
{
    spLMatrix omat;

    omat = xlmclone(mat);
    lmerasecol(omat, col, offset, length, inv);

    return omat;
}

spFMatrix xfmerasecol(spFMatrix mat, long col, long offset, long length, spBool inv)
{
    spFMatrix omat;

    omat = xfmclone(mat);
    fmerasecol(omat, col, offset, length, inv);

    return omat;
}

spDMatrix xdmerasecol(spDMatrix mat, long col, long offset, long length, spBool inv)
{
    spDMatrix omat;

    omat = xdmclone(mat);
    dmerasecol(omat, col, offset, length, inv);

    return omat;
}

/*
 *	cut (crop) one row of matrix
 */
long smcutrowto(spSMatrix mat, long row, long offset, long length,
                spSVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (length <= 0) {
	length = vec->length - vec_offset;
    } else {
        length = MIN(length, vec->length - vec_offset);
    }

    for (k = 0, count = 0; k < length; k++) {
        if (k < -vec_offset) continue;
        
	pos = k + offset;
	if (pos >= 0 && pos < mat->col) {
            if (overlap) {
                vec->data[k + vec_offset] += mat->data[row][pos];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] += mat->imag[row][pos];
                    }
                }
            } else {
                vec->data[k + vec_offset] = mat->data[row][pos];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] = mat->imag[row][pos];
                    } else {
                        vec->imag[k + vec_offset] = 0;
                    }
                }
            }
	    ++count;
	} else {
            if (!overlap) {
                vec->data[k + vec_offset] = 0;
                if (vec->imag != NULL) {
                    vec->imag[k + vec_offset] = 0;
                }
                ++count;
            }
	}
    }

    return count;
}

long smcutrow(spSMatrix mat, long row, long offset, long length, spSVector vec)
{
    return smcutrowto(mat, row, offset, length, vec, 0, 0);
}

spSVector xsmcutrow(spSMatrix mat, long row, long offset, long length)
{
    spSVector vec;

    if (row < 0 || row >= mat->row || length <= 0) {
#if 0
	vec = xsvnull();
	return vec;
#else
	return NODATA;
#endif
    }

    if (mat->imag != NULL) {
	vec = xsvrialloc(length);
    } else {
	vec = xsvalloc(length);
    }

    smcutrow(mat, row, offset, length, vec);

    return vec;
}

long lmcutrowto(spLMatrix mat, long row, long offset, long length,
                spLVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (length <= 0) {
	length = vec->length - vec_offset;
    } else {
        length = MIN(length, vec->length - vec_offset);
    }

    for (k = 0, count = 0; k < length; k++) {
        if (k < -vec_offset) continue;
        
	pos = k + offset;
	if (pos >= 0 && pos < mat->col) {
            if (overlap) {
                vec->data[k + vec_offset] += mat->data[row][pos];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] += mat->imag[row][pos];
                    }
                }
            } else {
                vec->data[k + vec_offset] = mat->data[row][pos];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] = mat->imag[row][pos];
                    } else {
                        vec->imag[k + vec_offset] = 0;
                    }
                }
            }
	    ++count;
	} else {
            if (!overlap) {
                vec->data[k + vec_offset] = 0;
                if (vec->imag != NULL) {
                    vec->imag[k + vec_offset] = 0;
                }
                ++count;
            }
	}
    }

    return count;
}

long lmcutrow(spLMatrix mat, long row, long offset, long length, spLVector vec)
{
    return lmcutrowto(mat, row, offset, length, vec, 0, 0);
}

spLVector xlmcutrow(spLMatrix mat, long row, long offset, long length)
{
    spLVector vec;

    if (row < 0 || row >= mat->row || length <= 0) {
#if 0
	vec = xlvnull();
	return vec;
#else
	return NODATA;
#endif
    }

    if (mat->imag != NULL) {
	vec = xlvrialloc(length);
    } else {
	vec = xlvalloc(length);
    }

    lmcutrow(mat, row, offset, length, vec);

    return vec;
}

long fmcutrowto(spFMatrix mat, long row, long offset, long length,
                spFVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (length <= 0) {
	length = vec->length - vec_offset;
    } else {
        length = MIN(length, vec->length - vec_offset);
    }

    for (k = 0, count = 0; k < length; k++) {
        if (k < -vec_offset) continue;
        
	pos = k + offset;
	if (pos >= 0 && pos < mat->col) {
            if (overlap) {
                vec->data[k + vec_offset] += mat->data[row][pos];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] += mat->imag[row][pos];
                    }
                }
            } else {
                vec->data[k + vec_offset] = mat->data[row][pos];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] = mat->imag[row][pos];
                    } else {
                        vec->imag[k + vec_offset] = 0.0f;
                    }
                }
            }
	    ++count;
	} else {
            if (!overlap) {
                vec->data[k + vec_offset] = 0.0f;
                if (vec->imag != NULL) {
                    vec->imag[k + vec_offset] = 0.0f;
                }
                ++count;
            }
	}
    }

    return count;
}

long fmcutrow(spFMatrix mat, long row, long offset, long length, spFVector vec)
{
    return fmcutrowto(mat, row, offset, length, vec, 0, 0);
}

spFVector xfmcutrow(spFMatrix mat, long row, long offset, long length)
{
    spFVector vec;

    if (row < 0 || row >= mat->row || length <= 0) {
#if 0
	vec = xfvnull();
	return vec;
#else
	return NODATA;
#endif
    }

    if (mat->imag != NULL) {
	vec = xfvrialloc(length);
    } else {
	vec = xfvalloc(length);
    }

    fmcutrow(mat, row, offset, length, vec);

    return vec;
}

long dmcutrowto(spDMatrix mat, long row, long offset, long length,
                spDVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (length <= 0) {
	length = vec->length - vec_offset;
    } else {
        length = MIN(length, vec->length - vec_offset);
    }

    for (k = 0, count = 0; k < length; k++) {
        if (k < -vec_offset) continue;
        
	pos = k + offset;
	if (pos >= 0 && pos < mat->col) {
            if (overlap) {
                vec->data[k + vec_offset] += mat->data[row][pos];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] += mat->imag[row][pos];
                    }
                }
            } else {
                vec->data[k + vec_offset] = mat->data[row][pos];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] = mat->imag[row][pos];
                    } else {
                        vec->imag[k + vec_offset] = 0.0;
                    }
                }
            }
	    ++count;
	} else {
            if (!overlap) {
                vec->data[k + vec_offset] = 0;
                if (vec->imag != NULL) {
                    vec->imag[k + vec_offset] = 0.0;
                }
                ++count;
            }
	}
    }

    return count;
}

long dmcutrow(spDMatrix mat, long row, long offset, long length, spDVector vec)
{
    return dmcutrowto(mat, row, offset, length, vec, 0, 0);
}

spDVector xdmcutrow(spDMatrix mat, long row, long offset, long length)
{
    spDVector vec;

    if (row < 0 || row >= mat->row || length <= 0) {
#if 0
	vec = xdvnull();
	return vec;
#else
	return NODATA;
#endif
    }

    if (mat->imag != NULL) {
	vec = xdvrialloc(length);
    } else {
	vec = xdvalloc(length);
    }

    dmcutrow(mat, row, offset, length, vec);

    return vec;
}

/*
 *	cut (crop) one column of matrix
 */
long smcutcolto(spSMatrix mat, long col, long offset, long length,
                spSVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (length <= 0) {
	length = vec->length - vec_offset;
    } else {
        length = MIN(length, vec->length - vec_offset);
    }

    for (k = 0, count = 0; k < length; k++) {
        if (k < -vec_offset) continue;
        
	pos = k + offset;
	if (pos >= 0 && pos < mat->row) {
            if (overlap) {
                vec->data[k + vec_offset] += mat->data[pos][col];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] += mat->imag[pos][col];
                    }
                }
            } else {
                vec->data[k + vec_offset] = mat->data[pos][col];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] = mat->imag[pos][col];
                    } else {
                        vec->imag[k + vec_offset] = 0;
                    }
                }
            }
	    ++count;
	} else {
            if (!overlap) {
                vec->data[k + vec_offset] = 0;
                if (vec->imag != NULL) {
                    vec->imag[k + vec_offset] = 0;
                }
                ++count;
            }
	}
    }
    
    return count;
}

long smcutcol(spSMatrix mat, long col, long offset, long length, spSVector vec)
{
    return smcutcolto(mat, col, offset, length, vec, 0, 0);
}

spSVector xsmcutcol(spSMatrix mat, long col, long offset, long length)
{
    spSVector vec;

    if (col < 0 || col >= mat->col || length <= 0) {
#if 0
	vec = xsvnull();
	return vec;
#else
	return NODATA;
#endif
    }

    if (mat->imag != NULL) {
	vec = xsvrialloc(length);
    } else {
	vec = xsvalloc(length);
    }

    smcutcol(mat, col, offset, length, vec);

    return vec;
}

long lmcutcolto(spLMatrix mat, long col, long offset, long length,
                spLVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (length <= 0) {
	length = vec->length - vec_offset;
    } else {
        length = MIN(length, vec->length - vec_offset);
    }

    for (k = 0, count = 0; k < length; k++) {
        if (k < -vec_offset) continue;
        
	pos = k + offset;
	if (pos >= 0 && pos < mat->row) {
            if (overlap) {
                vec->data[k + vec_offset] += mat->data[pos][col];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] += mat->imag[pos][col];
                    }
                }
            } else {
                vec->data[k + vec_offset] = mat->data[pos][col];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] = mat->imag[pos][col];
                    } else {
                        vec->imag[k + vec_offset] = 0;
                    }
                }
            }
	    ++count;
	} else {
            if (!overlap) {
                vec->data[k + vec_offset] = 0;
                if (vec->imag != NULL) {
                    vec->imag[k + vec_offset] = 0;
                }
                ++count;
            }
	}
    }
    
    return count;
}

long lmcutcol(spLMatrix mat, long col, long offset, long length, spLVector vec)
{
    return lmcutcolto(mat, col, offset, length, vec, 0, 0);
}

spLVector xlmcutcol(spLMatrix mat, long col, long offset, long length)
{
    spLVector vec;

    if (col < 0 || col >= mat->col || length <= 0) {
#if 0
	vec = xlvnull();
	return vec;
#else
	return NODATA;
#endif
    }

    if (mat->imag != NULL) {
	vec = xlvrialloc(length);
    } else {
	vec = xlvalloc(length);
    }

    lmcutcol(mat, col, offset, length, vec);

    return vec;
}

long fmcutcolto(spFMatrix mat, long col, long offset, long length,
                spFVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (length <= 0) {
	length = vec->length - vec_offset;
    } else {
        length = MIN(length, vec->length - vec_offset);
    }

    for (k = 0, count = 0; k < length; k++) {
        if (k < -vec_offset) continue;
        
	pos = k + offset;
	if (pos >= 0 && pos < mat->row) {
            if (overlap) {
                vec->data[k + vec_offset] += mat->data[pos][col];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] += mat->imag[pos][col];
                    }
                }
            } else {
                vec->data[k + vec_offset] = mat->data[pos][col];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] = mat->imag[pos][col];
                    } else {
                        vec->imag[k + vec_offset] = 0.0f;
                    }
                }
            }
	    ++count;
	} else {
            if (!overlap) {
                vec->data[k + vec_offset] = 0;
                if (vec->imag != NULL) {
                    vec->imag[k + vec_offset] = 0.0f;
                }
                ++count;
            }
	}
    }
    
    return count;
}

long fmcutcol(spFMatrix mat, long col, long offset, long length, spFVector vec)
{
    return fmcutcolto(mat, col, offset, length, vec, 0, 0);
}

spFVector xfmcutcol(spFMatrix mat, long col, long offset, long length)
{
    spFVector vec;

    if (col < 0 || col >= mat->col || length <= 0) {
#if 0
	vec = xfvnull();
	return vec;
#else
	return NODATA;
#endif
    }

    if (mat->imag != NULL) {
	vec = xfvrialloc(length);
    } else {
	vec = xfvalloc(length);
    }

    fmcutcol(mat, col, offset, length, vec);

    return vec;
}

long dmcutcolto(spDMatrix mat, long col, long offset, long length,
                spDVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (length <= 0) {
	length = vec->length - vec_offset;
    } else {
        length = MIN(length, vec->length - vec_offset);
    }

    for (k = 0, count = 0; k < length; k++) {
        if (k < -vec_offset) continue;
        
	pos = k + offset;
	if (pos >= 0 && pos < mat->row) {
            if (overlap) {
                vec->data[k + vec_offset] += mat->data[pos][col];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] += mat->imag[pos][col];
                    }
                }
            } else {
                vec->data[k + vec_offset] = mat->data[pos][col];
                if (vec->imag != NULL) {
                    if (mat->imag != NULL) {
                        vec->imag[k + vec_offset] = mat->imag[pos][col];
                    } else {
                        vec->imag[k + vec_offset] = 0.0;
                    }
                }
            }
	    ++count;
	} else {
            if (!overlap) {
                vec->data[k + vec_offset] = 0;
                if (vec->imag != NULL) {
                    vec->imag[k + vec_offset] = 0.0;
                }
                ++count;
            }
	}
    }
    
    return count;
}

long dmcutcol(spDMatrix mat, long col, long offset, long length, spDVector vec)
{
    return dmcutcolto(mat, col, offset, length, vec, 0, 0);
}

spDVector xdmcutcol(spDMatrix mat, long col, long offset, long length)
{
    spDVector vec;

    if (col < 0 || col >= mat->col || length <= 0) {
#if 0
	vec = xdvnull();
	return vec;
#else
	return NODATA;
#endif
    }

    if (mat->imag != NULL) {
	vec = xdvrialloc(length);
    } else {
	vec = xdvalloc(length);
    }

    dmcutcol(mat, col, offset, length, vec);

    return vec;
}

/*
 *	paste vector on the row of matrix 
 */
long smpasterowfrom(spSMatrix mat, long row, long offset, long length,
                    spSVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (row < 0 || row >= mat->row) {
	return 0;
    }
    if (length <= 0) {
	length = vec->length - vec_offset;
#if 0
    } else {
        length = MIN(length, vec->length - vec_offset);
#endif
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= mat->col) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[row][pos] += vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] += vec->imag[k + vec_offset];
                    }
                    ++count;
                }
            } else {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[row][pos] = vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] = vec->imag[k + vec_offset];
                    }
                } else {
                    mat->data[row][pos] = 0;
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] = 0;
                    }
                }
                ++count;
	    }
	}
    }
	
    return count;
}

long smpasterow(spSMatrix mat, long row, spSVector vec, long offset, long length, int overlap)
{
    return smpasterowfrom(mat, row, offset, length, vec, 0, overlap);
}

long lmpasterowfrom(spLMatrix mat, long row, long offset, long length,
                    spLVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (row < 0 || row >= mat->row) {
	return 0;
    }
    if (length <= 0) {
	length = vec->length - vec_offset;
#if 0
    } else {
        length = MIN(length, vec->length - vec_offset);
#endif
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= mat->col) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[row][pos] += vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] += vec->imag[k + vec_offset];
                    }
                    ++count;
                }
            } else {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[row][pos] = vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] = vec->imag[k + vec_offset];
                    }
                } else {
                    mat->data[row][pos] = 0;
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] = 0;
                    }
                }
                ++count;
	    }
	}
    }
	
    return count;
}

long lmpasterow(spLMatrix mat, long row, spLVector vec, long offset, long length, int overlap)
{
    return lmpasterowfrom(mat, row, offset, length, vec, 0, overlap);
}

long fmpasterowfrom(spFMatrix mat, long row, long offset, long length,
                    spFVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (row < 0 || row >= mat->row) {
	return 0;
    }
    if (length <= 0) {
	length = vec->length - vec_offset;
#if 0
    } else {
        length = MIN(length, vec->length - vec_offset);
#endif
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= mat->col) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[row][pos] += vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] += vec->imag[k + vec_offset];
                    }
                    ++count;
                }
            } else {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[row][pos] = vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] = vec->imag[k + vec_offset];
                    }
                } else {
                    mat->data[row][pos] = 0.0f;
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] = 0.0f;
                    }
                }
                ++count;
	    }
	}
    }
	
    return count;
}

long fmpasterow(spFMatrix mat, long row, spFVector vec, long offset, long length, int overlap)
{
    return fmpasterowfrom(mat, row, offset, length, vec, 0, overlap);
}

long dmpasterowfrom(spDMatrix mat, long row, long offset, long length,
                    spDVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (row < 0 || row >= mat->row) {
	return 0;
    }
    if (length <= 0) {
	length = vec->length - vec_offset;
#if 0
    } else {
        length = MIN(length, vec->length - vec_offset);
#endif
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= mat->col) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[row][pos] += vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] += vec->imag[k + vec_offset];
                    }
                    ++count;
                }
            } else {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[row][pos] = vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] = vec->imag[k + vec_offset];
                    }
                } else {
                    mat->data[row][pos] = 0.0;
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[row][pos] = 0.0;
                    }
                }
                ++count;
	    }
	}
    }
	
    return count;
}

long dmpasterow(spDMatrix mat, long row, spDVector vec, long offset, long length, int overlap)
{
    return dmpasterowfrom(mat, row, offset, length, vec, 0, overlap);
}

/*
 *	paste vector on the column of matrix 
 */
long smpastecolfrom(spSMatrix mat, long col, long offset, long length,
                    spSVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (col < 0 || col >= mat->col) {
	return 0;
    }
    if (length <= 0) {
	length = vec->length - vec_offset;
#if 0
    } else {
        length = MIN(length, vec->length - vec_offset);
#endif
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= mat->row) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[pos][col] += vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] += vec->imag[k + vec_offset];
                    }
                    ++count;
                }
            } else {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[pos][col] = vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] = vec->imag[k + vec_offset];
                    }
                } else {
                    mat->data[pos][col] = 0;
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] = 0;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long smpastecol(spSMatrix mat, long col, spSVector vec, long offset, long length, int overlap)
{
    return smpastecolfrom(mat, col, offset, length, vec, 0, overlap);
}

long lmpastecolfrom(spLMatrix mat, long col, long offset, long length,
                    spLVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (col < 0 || col >= mat->col) {
	return 0;
    }
    if (length <= 0) {
	length = vec->length - vec_offset;
#if 0
    } else {
        length = MIN(length, vec->length - vec_offset);
#endif
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= mat->row) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[pos][col] += vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] += vec->imag[k + vec_offset];
                    }
                    ++count;
                }
            } else {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[pos][col] = vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] = vec->imag[k + vec_offset];
                    }
                } else {
                    mat->data[pos][col] = 0;
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] = 0;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long lmpastecol(spLMatrix mat, long col, spLVector vec, long offset, long length, int overlap)
{
    return lmpastecolfrom(mat, col, offset, length, vec, 0, overlap);
}

long fmpastecolfrom(spFMatrix mat, long col, long offset, long length,
                    spFVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (col < 0 || col >= mat->col) {
	return 0;
    }
    if (length <= 0) {
	length = vec->length - vec_offset;
#if 0
    } else {
        length = MIN(length, vec->length - vec_offset);
#endif
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= mat->row) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[pos][col] += vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] += vec->imag[k + vec_offset];
                    }
                    ++count;
                }
            } else {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[pos][col] = vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] = vec->imag[k + vec_offset];
                    }
                } else {
                    mat->data[pos][col] = 0.0f;
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] = 0.0f;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long fmpastecol(spFMatrix mat, long col, spFVector vec, long offset, long length, int overlap)
{
    return fmpastecolfrom(mat, col, offset, length, vec, 0, overlap);
}

long dmpastecolfrom(spDMatrix mat, long col, long offset, long length,
                    spDVector vec, long vec_offset, int overlap)
{
    long k;
    long pos;
    long count;

    if (col < 0 || col >= mat->col) {
	return 0;
    }
    if (length <= 0) {
	length = vec->length - vec_offset;
#if 0
    } else {
        length = MIN(length, vec->length - vec_offset);
#endif
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= mat->row) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[pos][col] += vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] += vec->imag[k + vec_offset];
                    }
                    ++count;
                }
            } else {
                if (k >= -vec_offset && k + vec_offset < vec->length) {
                    mat->data[pos][col] = vec->data[k + vec_offset];
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] = vec->imag[k + vec_offset];
                    }
                } else {
                    mat->data[pos][col] = 0.0;
                    if (vec->imag != NULL && mat->imag != NULL) {
                        mat->imag[pos][col] = 0.0;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long dmpastecol(spDMatrix mat, long col, spDVector vec, long offset, long length, int overlap)
{
    return dmpastecolfrom(mat, col, offset, length, vec, 0, overlap);
}

long smpastematfrom(spSMatrix matto, long rowto, long colto, long rowlength, long collength,
                    spSMatrix matfrom, long rowfrom, long colfrom, int overlap)
{
    long i, j;
    long count;

    if (rowlength <= 0) {
        rowlength = matfrom->row - rowfrom;
    }
    if (collength <= 0) {
        collength = matfrom->col - colfrom;
    }

    for (j = 0, count = 0; j < collength; j++) {
        for (i = 0; i < rowlength; i++) {
            if ((rowto + i >= 0 && rowto + i < matto->row)
                && (colto + j >= 0 && colto + j < matto->col)) {
                if (overlap) {
                    if ((rowfrom + i >= 0 && rowfrom + i < matfrom->row)
                        && (colfrom + j >= 0 && colfrom + j < matfrom->col)) {
                        matto->data[rowto + i][colto + j] += matfrom->data[rowfrom + i][colfrom + j];
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] += matfrom->imag[rowfrom + i][colfrom + j];
                        }
                        ++count;
                    }
                } else {
                    if ((rowfrom + i >= 0 && rowfrom + i < matfrom->row)
                        && (colfrom + j >= 0 && colfrom + j < matfrom->col)) {
                        matto->data[rowto + i][colto + j] = matfrom->data[rowfrom + i][colfrom + j];
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] = matfrom->imag[rowfrom + i][colfrom + j];
                        }
                    } else {
                        matto->data[rowto + i][colto + j] = 0;
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] = 0;
                        }
                    }
                    ++count;
                }
            }
        }
    }
    
    return count;
}

long smpastemat(spSMatrix matto, long rowto, long colto, spSMatrix matfrom,
                long rowlength, long collength, int overlap)
{
    return smpastematfrom(matto, rowto, colto, rowlength, collength,
                          matfrom, 0, 0, overlap);
}

long lmpastematfrom(spLMatrix matto, long rowto, long colto, long rowlength, long collength,
                    spLMatrix matfrom, long rowfrom, long colfrom, int overlap)
{
    long i, j;
    long count;

    if (rowlength <= 0) {
        rowlength = matfrom->row - rowfrom;
    }
    if (collength <= 0) {
        collength = matfrom->col - colfrom;
    }

    for (j = 0, count = 0; j < collength; j++) {
        for (i = 0; i < rowlength; i++) {
            if ((rowto + i >= 0 && rowto + i < matto->row)
                && (colto + j >= 0 && colto + j < matto->col)) {
                if (overlap) {
                    if ((rowfrom + i >= 0 && rowfrom + i < matfrom->row)
                        && (colfrom + j >= 0 && colfrom + j < matfrom->col)) {
                        matto->data[rowto + i][colto + j] += matfrom->data[rowfrom + i][colfrom + j];
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] += matfrom->imag[rowfrom + i][colfrom + j];
                        }
                        ++count;
                    }
                } else {
                    if ((rowfrom + i >= 0 && rowfrom + i < matfrom->row)
                        && (colfrom + j >= 0 && colfrom + j < matfrom->col)) {
                        matto->data[rowto + i][colto + j] = matfrom->data[rowfrom + i][colfrom + j];
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] = matfrom->imag[rowfrom + i][colfrom + j];
                        }
                    } else {
                        matto->data[rowto + i][colto + j] = 0;
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] = 0;
                        }
                    }
                    ++count;
                }
            }
        }
    }
    
    return count;
}

long lmpastemat(spLMatrix matto, long rowto, long colto, spLMatrix matfrom,
                long rowlength, long collength, int overlap)
{
    return lmpastematfrom(matto, rowto, colto, rowlength, collength,
                          matfrom, 0, 0, overlap);
}

long fmpastematfrom(spFMatrix matto, long rowto, long colto, long rowlength, long collength,
                    spFMatrix matfrom, long rowfrom, long colfrom, int overlap)
{
    long i, j;
    long count;

    if (rowlength <= 0) {
        rowlength = matfrom->row - rowfrom;
    }
    if (collength <= 0) {
        collength = matfrom->col - colfrom;
    }

    for (j = 0, count = 0; j < collength; j++) {
        for (i = 0; i < rowlength; i++) {
            if ((rowto + i >= 0 && rowto + i < matto->row)
                && (colto + j >= 0 && colto + j < matto->col)) {
                if (overlap) {
                    if ((rowfrom + i >= 0 && rowfrom + i < matfrom->row)
                        && (colfrom + j >= 0 && colfrom + j < matfrom->col)) {
                        matto->data[rowto + i][colto + j] += matfrom->data[rowfrom + i][colfrom + j];
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] += matfrom->imag[rowfrom + i][colfrom + j];
                        }
                        ++count;
                    }
                } else {
                    if ((rowfrom + i >= 0 && rowfrom + i < matfrom->row)
                        && (colfrom + j >= 0 && colfrom + j < matfrom->col)) {
                        matto->data[rowto + i][colto + j] = matfrom->data[rowfrom + i][colfrom + j];
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] = matfrom->imag[rowfrom + i][colfrom + j];
                        }
                    } else {
                        matto->data[rowto + i][colto + j] = 0;
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] = 0;
                        }
                    }
                    ++count;
                }
            }
        }
    }
    
    return count;
}

long fmpastemat(spFMatrix matto, long rowto, long colto, spFMatrix matfrom,
                long rowlength, long collength, int overlap)
{
    return fmpastematfrom(matto, rowto, colto, rowlength, collength,
                          matfrom, 0, 0, overlap);
}

long dmpastematfrom(spDMatrix matto, long rowto, long colto, long rowlength, long collength,
                    spDMatrix matfrom, long rowfrom, long colfrom, int overlap)
{
    long i, j;
    long count;

    if (rowlength <= 0) {
        rowlength = matfrom->row - rowfrom;
    }
    if (collength <= 0) {
        collength = matfrom->col - colfrom;
    }

    for (j = 0, count = 0; j < collength; j++) {
        for (i = 0; i < rowlength; i++) {
            if ((rowto + i >= 0 && rowto + i < matto->row)
                && (colto + j >= 0 && colto + j < matto->col)) {
                if (overlap) {
                    if ((rowfrom + i >= 0 && rowfrom + i < matfrom->row)
                        && (colfrom + j >= 0 && colfrom + j < matfrom->col)) {
                        matto->data[rowto + i][colto + j] += matfrom->data[rowfrom + i][colfrom + j];
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] += matfrom->imag[rowfrom + i][colfrom + j];
                        }
                        ++count;
                    }
                } else {
                    if ((rowfrom + i >= 0 && rowfrom + i < matfrom->row)
                        && (colfrom + j >= 0 && colfrom + j < matfrom->col)) {
                        matto->data[rowto + i][colto + j] = matfrom->data[rowfrom + i][colfrom + j];
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] = matfrom->imag[rowfrom + i][colfrom + j];
                        }
                    } else {
                        matto->data[rowto + i][colto + j] = 0;
                        if (matfrom->imag != NULL && matto->imag != NULL) {
                            matto->imag[rowto + i][colto + j] = 0;
                        }
                    }
                    ++count;
                }
            }
        }
    }
    
    return count;
}

long dmpastemat(spDMatrix matto, long rowto, long colto, spDMatrix matfrom,
                long rowlength, long collength, int overlap)
{
    return dmpastematfrom(matto, rowto, colto, rowlength, collength,
                          matfrom, 0, 0, overlap);
}

long smpastematrowfrom(spSMatrix matto, long rowto, long offset, long length,
                       spSMatrix matfrom, long rowfrom, long offsetfrom, int overlap)
{
    long k;
    long pos;
    long count;

    if (rowto < 0 || rowto >= matto->row
	|| rowfrom < 0 || rowfrom >= matfrom->row) {
	return 0;
    }
    if (length <= 0) {
	length = matfrom->col - offsetfrom;
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= matto->col) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->col) {
                    matto->data[rowto][pos] += matfrom->data[rowfrom][k + offsetfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] += matfrom->imag[rowfrom][k + offsetfrom];
                    }
                    ++count;
                }
            } else {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->col) {
                    matto->data[rowto][pos] = matfrom->data[rowfrom][k + offsetfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] = matfrom->imag[rowfrom][k + offsetfrom];
                    }
                } else {
                    matto->data[rowto][pos] = 0;
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] = 0;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long smpastematrow(spSMatrix matto, long rowto, spSMatrix matfrom, long rowfrom,
                   long offset, long length, int overlap)
{
    return smpastematrowfrom(matto, rowto, offset, length,
                             matfrom, rowfrom, 0, overlap);
}

long smpastematcolfrom(spSMatrix matto, long colto, long offset, long length,
                       spSMatrix matfrom, long colfrom, long offsetfrom, int overlap)
{
    long k;
    long pos;
    long count;

    if (colto < 0 || colto >= matto->col
	|| colfrom < 0 || colfrom >= matfrom->col) {
	return 0;
    }
    if (length <= 0) {
	length = matfrom->row;
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= matto->row) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->row) {
                    matto->data[pos][colto] += matfrom->data[k + offsetfrom][colfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] += matfrom->imag[k + offsetfrom][colfrom];
                    }
                    ++count;
                }
            } else {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->row) {
                    matto->data[pos][colto] = matfrom->data[k + offsetfrom][colfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] = matfrom->imag[k + offsetfrom][colfrom];
                    }
                } else {
                    matto->data[pos][colto] = 0;
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] = 0;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long smpastematcol(spSMatrix matto, long colto, spSMatrix matfrom, long colfrom,
                   long offset, long length, int overlap)
{
    return smpastematcolfrom(matto, colto, offset, length,
                             matfrom, colfrom, 0, overlap);
}

long lmpastematrowfrom(spLMatrix matto, long rowto, long offset, long length,
                       spLMatrix matfrom, long rowfrom, long offsetfrom, int overlap)
{
    long k;
    long pos;
    long count;

    if (rowto < 0 || rowto >= matto->row
	|| rowfrom < 0 || rowfrom >= matfrom->row) {
	return 0;
    }
    if (length <= 0) {
	length = matfrom->col - offsetfrom;
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= matto->col) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->col) {
                    matto->data[rowto][pos] += matfrom->data[rowfrom][k + offsetfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] += matfrom->imag[rowfrom][k + offsetfrom];
                    }
                    ++count;
                }
            } else {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->col) {
                    matto->data[rowto][pos] = matfrom->data[rowfrom][k + offsetfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] = matfrom->imag[rowfrom][k + offsetfrom];
                    }
                } else {
                    matto->data[rowto][pos] = 0;
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] = 0;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long lmpastematrow(spLMatrix matto, long rowto, spLMatrix matfrom, long rowfrom,
                   long offset, long length, int overlap)
{
    return lmpastematrowfrom(matto, rowto, offset, length,
                             matfrom, rowfrom, 0, overlap);
}

long lmpastematcolfrom(spLMatrix matto, long colto, long offset, long length,
                       spLMatrix matfrom, long colfrom, long offsetfrom, int overlap)
{
    long k;
    long pos;
    long count;

    if (colto < 0 || colto >= matto->col
	|| colfrom < 0 || colfrom >= matfrom->col) {
	return 0;
    }
    if (length <= 0) {
	length = matfrom->row;
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= matto->row) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->row) {
                    matto->data[pos][colto] += matfrom->data[k + offsetfrom][colfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] += matfrom->imag[k + offsetfrom][colfrom];
                    }
                    ++count;
                }
            } else {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->row) {
                    matto->data[pos][colto] = matfrom->data[k + offsetfrom][colfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] = matfrom->imag[k + offsetfrom][colfrom];
                    }
                } else {
                    matto->data[pos][colto] = 0;
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] = 0;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long lmpastematcol(spLMatrix matto, long colto, spLMatrix matfrom, long colfrom,
                   long offset, long length, int overlap)
{
    return lmpastematcolfrom(matto, colto, offset, length,
                             matfrom, colfrom, 0, overlap);
}

long fmpastematrowfrom(spFMatrix matto, long rowto, long offset, long length,
                       spFMatrix matfrom, long rowfrom, long offsetfrom, int overlap)
{
    long k;
    long pos;
    long count;

    if (rowto < 0 || rowto >= matto->row
	|| rowfrom < 0 || rowfrom >= matfrom->row) {
	return 0;
    }
    if (length <= 0) {
	length = matfrom->col - offsetfrom;
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= matto->col) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->col) {
                    matto->data[rowto][pos] += matfrom->data[rowfrom][k + offsetfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] += matfrom->imag[rowfrom][k + offsetfrom];
                    }
                    ++count;
                }
            } else {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->col) {
                    matto->data[rowto][pos] = matfrom->data[rowfrom][k + offsetfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] = matfrom->imag[rowfrom][k + offsetfrom];
                    }
                } else {
                    matto->data[rowto][pos] = 0.0f;
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] = 0.0f;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long fmpastematrow(spFMatrix matto, long rowto, spFMatrix matfrom, long rowfrom,
                   long offset, long length, int overlap)
{
    return fmpastematrowfrom(matto, rowto, offset, length,
                             matfrom, rowfrom, 0, overlap);
}

long fmpastematcolfrom(spFMatrix matto, long colto, long offset, long length,
                       spFMatrix matfrom, long colfrom, long offsetfrom, int overlap)
{
    long k;
    long pos;
    long count;

    if (colto < 0 || colto >= matto->col
	|| colfrom < 0 || colfrom >= matfrom->col) {
	return 0;
    }
    if (length <= 0) {
	length = matfrom->row;
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= matto->row) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->row) {
                    matto->data[pos][colto] += matfrom->data[k + offsetfrom][colfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] += matfrom->imag[k + offsetfrom][colfrom];
                    }
                    ++count;
                }
            } else {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->row) {
                    matto->data[pos][colto] = matfrom->data[k + offsetfrom][colfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] = matfrom->imag[k + offsetfrom][colfrom];
                    }
                } else {
                    matto->data[pos][colto] = 0.0f;
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] = 0.0f;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long fmpastematcol(spFMatrix matto, long colto, spFMatrix matfrom, long colfrom,
                   long offset, long length, int overlap)
{
    return fmpastematcolfrom(matto, colto, offset, length,
                             matfrom, colfrom, 0, overlap);
}

long dmpastematrowfrom(spDMatrix matto, long rowto, long offset, long length,
                       spDMatrix matfrom, long rowfrom, long offsetfrom, int overlap)
{
    long k;
    long pos;
    long count;

    if (rowto < 0 || rowto >= matto->row
	|| rowfrom < 0 || rowfrom >= matfrom->row) {
	return 0;
    }
    if (length <= 0) {
	length = matfrom->col - offsetfrom;
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= matto->col) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->col) {
                    matto->data[rowto][pos] += matfrom->data[rowfrom][k + offsetfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] += matfrom->imag[rowfrom][k + offsetfrom];
                    }
                    ++count;
                }
            } else {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->col) {
                    matto->data[rowto][pos] = matfrom->data[rowfrom][k + offsetfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] = matfrom->imag[rowfrom][k + offsetfrom];
                    }
                } else {
                    matto->data[rowto][pos] = 0.0;
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[rowto][pos] = 0.0;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long dmpastematrow(spDMatrix matto, long rowto, spDMatrix matfrom, long rowfrom,
                   long offset, long length, int overlap)
{
    return dmpastematrowfrom(matto, rowto, offset, length,
                             matfrom, rowfrom, 0, overlap);
}

long dmpastematcolfrom(spDMatrix matto, long colto, long offset, long length,
                       spDMatrix matfrom, long colfrom, long offsetfrom, int overlap)
{
    long k;
    long pos;
    long count;

    if (colto < 0 || colto >= matto->col
	|| colfrom < 0 || colfrom >= matfrom->col) {
	return 0;
    }
    if (length <= 0) {
	length = matfrom->row;
    }

    for (k = 0, count = 0; k < length; k++) {
        pos = k + offset;
        if (pos >= matto->row) {
            break;
        }
        if (pos >= 0) {
            if (overlap) {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->row) {
                    matto->data[pos][colto] += matfrom->data[k + offsetfrom][colfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] += matfrom->imag[k + offsetfrom][colfrom];
                    }
                    ++count;
                }
            } else {
                if (k >= -offsetfrom && k + offsetfrom < matfrom->row) {
                    matto->data[pos][colto] = matfrom->data[k + offsetfrom][colfrom];
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] = matfrom->imag[k + offsetfrom][colfrom];
                    }
                } else {
                    matto->data[pos][colto] = 0.0;
                    if (matfrom->imag != NULL && matto->imag != NULL) {
                        matto->imag[pos][colto] = 0.0;
                    }
                }
                ++count;
            }
        }
    }
	
    return count;
}

long dmpastematcol(spDMatrix matto, long colto, spDMatrix matfrom,
                   long offset, long length, long colfrom, int overlap)
{
    return dmpastematcolfrom(matto, colto, offset, length,
                             matfrom, colfrom, 0, overlap);
}

spSVector xsmrextract(spSMatrix mat, spLVector colidx)
{
    long k;
    spSVector x;

    x = xsvalloc(colidx->length);
    if (mat->imag != NULL) {
	svialloc(x);
    }

    for (k = 0; k < x->length; k++) {
	if (k < mat->row && 
	    colidx->data[k] >= 0 && colidx->data[k] < mat->col) {
	    x->data[k] = mat->data[k][colidx->data[k]];
	    if (x->imag != NULL) {
		x->imag[k] = mat->imag[k][colidx->data[k]];
	    }
	} else {
	    x->data[k] = 0;
	    if (x->imag != NULL) {
		x->imag[k] = 0;
	    }
	}
    }

    return x;
}

spLVector xlmrextract(spLMatrix mat, spLVector colidx)
{
    long k;
    spLVector x;

    x = xlvalloc(colidx->length);
    if (mat->imag != NULL) {
	lvialloc(x);
    }

    for (k = 0; k < x->length; k++) {
	if (k < mat->row && 
	    colidx->data[k] >= 0 && colidx->data[k] < mat->col) {
	    x->data[k] = mat->data[k][colidx->data[k]];
	    if (x->imag != NULL) {
		x->imag[k] = mat->imag[k][colidx->data[k]];
	    }
	} else {
	    x->data[k] = 0;
	    if (x->imag != NULL) {
		x->imag[k] = 0;
	    }
	}
    }

    return x;
}

spFVector xfmrextract(spFMatrix mat, spLVector colidx)
{
    long k;
    spFVector x;

    x = xfvalloc(colidx->length);
    if (mat->imag != NULL) {
	fvialloc(x);
    }

    for (k = 0; k < x->length; k++) {
	if (k < mat->row && 
	    colidx->data[k] >= 0 && colidx->data[k] < mat->col) {
	    x->data[k] = mat->data[k][colidx->data[k]];
	    if (x->imag != NULL) {
		x->imag[k] = mat->imag[k][colidx->data[k]];
	    }
	} else {
	    x->data[k] = 0.0f;
	    if (x->imag != NULL) {
		x->imag[k] = 0.0f;
	    }
	}
    }

    return x;
}

spDVector xdmrextract(spDMatrix mat, spLVector colidx)
{
    long k;
    spDVector x;

    x = xdvalloc(colidx->length);
    if (mat->imag != NULL) {
	dvialloc(x);
    }

    for (k = 0; k < x->length; k++) {
	if (k < mat->row && 
	    colidx->data[k] >= 0 && colidx->data[k] < mat->col) {
	    x->data[k] = mat->data[k][colidx->data[k]];
	    if (x->imag != NULL) {
		x->imag[k] = mat->imag[k][colidx->data[k]];
	    }
	} else {
	    x->data[k] = 0.0;
	    if (x->imag != NULL) {
		x->imag[k] = 0.0;
	    }
	}
    }

    return x;
}

spSVector xsmcextract(spSMatrix mat, spLVector rowidx)
{
    long k;
    spSVector x;

    x = xsvalloc(rowidx->length);
    if (mat->imag != NULL) {
	svialloc(x);
    }

    for (k = 0; k < x->length; k++) {
	if (k < mat->col && 
	    rowidx->data[k] >= 0 && rowidx->data[k] < mat->row) {
	    x->data[k] = mat->data[rowidx->data[k]][k];
	    if (x->imag != NULL) {
		x->imag[k] = mat->imag[rowidx->data[k]][k];
	    }
	} else {
	    x->data[k] = 0;
	    if (x->imag != NULL) {
		x->imag[k] = 0;
	    }
	}
    }

    return x;
}

spLVector xlmcextract(spLMatrix mat, spLVector rowidx)
{
    long k;
    spLVector x;

    x = xlvalloc(rowidx->length);
    if (mat->imag != NULL) {
	lvialloc(x);
    }

    for (k = 0; k < x->length; k++) {
	if (k < mat->col && 
	    rowidx->data[k] >= 0 && rowidx->data[k] < mat->row) {
	    x->data[k] = mat->data[rowidx->data[k]][k];
	    if (x->imag != NULL) {
		x->imag[k] = mat->imag[rowidx->data[k]][k];
	    }
	} else {
	    x->data[k] = 0;
	    if (x->imag != NULL) {
		x->imag[k] = 0;
	    }
	}
    }

    return x;
}

spFVector xfmcextract(spFMatrix mat, spLVector rowidx)
{
    long k;
    spFVector x;

    x = xfvalloc(rowidx->length);
    if (mat->imag != NULL) {
	fvialloc(x);
    }

    for (k = 0; k < x->length; k++) {
	if (k < mat->col && 
	    rowidx->data[k] >= 0 && rowidx->data[k] < mat->row) {
	    x->data[k] = mat->data[rowidx->data[k]][k];
	    if (x->imag != NULL) {
		x->imag[k] = mat->imag[rowidx->data[k]][k];
	    }
	} else {
	    x->data[k] = 0.0f;
	    if (x->imag != NULL) {
		x->imag[k] = 0.0f;
	    }
	}
    }

    return x;
}

spDVector xdmcextract(spDMatrix mat, spLVector rowidx)
{
    long k;
    spDVector x;

    x = xdvalloc(rowidx->length);
    if (mat->imag != NULL) {
	dvialloc(x);
    }

    for (k = 0; k < x->length; k++) {
	if (k < mat->col && 
	    rowidx->data[k] >= 0 && rowidx->data[k] < mat->row) {
	    x->data[k] = mat->data[rowidx->data[k]][k];
	    if (x->imag != NULL) {
		x->imag[k] = mat->imag[rowidx->data[k]][k];
	    }
	} else {
	    x->data[k] = 0.0;
	    if (x->imag != NULL) {
		x->imag[k] = 0.0;
	    }
	}
    }

    return x;
}

spSMatrix xsmextractrows(spSMatrix mat, spLVector rowidx)
{
    long k, l, r;
    spSMatrix x;
    
    x = xsmalloc(rowidx->length, mat->col);
    if (mat->imag != NULL) {
	smialloc(x);
    }
    
    for (k = 0; k < x->row; k++) {
	r = rowidx->data[k];
	
        if (r >= 0 && r < mat->row) {
            memcpy(x->data[k], mat->data[r], x->col * sizeof(short));
            if (x->imag != NULL) {
                memcpy(x->imag[k], mat->imag[r], x->col * sizeof(short));
            }
        } else {
            for (l = 0; l < x->col; l++) {
		x->data[k][l] = 0;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0;
		}
            }
        }
    }

    return x;
}

spLMatrix xlmextractrows(spLMatrix mat, spLVector rowidx)
{
    long k, l, r;
    spLMatrix x;
    
    x = xlmalloc(rowidx->length, mat->col);
    if (mat->imag != NULL) {
	lmialloc(x);
    }
    
    for (k = 0; k < x->row; k++) {
	r = rowidx->data[k];
	
        if (r >= 0 && r < mat->row) {
            memcpy(x->data[k], mat->data[r], x->col * sizeof(long));
            if (x->imag != NULL) {
                memcpy(x->imag[k], mat->imag[r], x->col * sizeof(long));
            }
        } else {
            for (l = 0; l < x->col; l++) {
		x->data[k][l] = 0;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0;
		}
            }
        }
    }

    return x;
}

spFMatrix xfmextractrows(spFMatrix mat, spLVector rowidx)
{
    long k, l, r;
    spFMatrix x;
    
    x = xfmalloc(rowidx->length, mat->col);
    if (mat->imag != NULL) {
	fmialloc(x);
    }
    
    for (k = 0; k < x->row; k++) {
	r = rowidx->data[k];
	
        if (r >= 0 && r < mat->row) {
            memcpy(x->data[k], mat->data[r], x->col * sizeof(float));
            if (x->imag != NULL) {
                memcpy(x->imag[k], mat->imag[r], x->col * sizeof(float));
            }
        } else {
            for (l = 0; l < x->col; l++) {
		x->data[k][l] = 0.0f;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0.0f;
		}
            }
        }
    }

    return x;
}

spDMatrix xdmextractrows(spDMatrix mat, spLVector rowidx)
{
    long k, l, r;
    spDMatrix x;
    
    x = xdmalloc(rowidx->length, mat->col);
    if (mat->imag != NULL) {
	dmialloc(x);
    }
    
    for (k = 0; k < x->row; k++) {
	r = rowidx->data[k];
	
        if (r >= 0 && r < mat->row) {
            memcpy(x->data[k], mat->data[r], x->col * sizeof(double));
            if (x->imag != NULL) {
                memcpy(x->imag[k], mat->imag[r], x->col * sizeof(double));
            }
        } else {
            for (l = 0; l < x->col; l++) {
		x->data[k][l] = 0.0;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0.0;
		}
            }
        }
    }

    return x;
}

spSMatrix xsmextractcols(spSMatrix mat, spLVector colidx)
{
    long k, l, c;
    spSMatrix x;
    
    x = xsmalloc(mat->row, colidx->length);
    if (mat->imag != NULL) {
	smialloc(x);
    }
    
    for (k = 0; k < x->col; k++) {
	c = colidx->data[k];
	
        if (c >= 0 && c < mat->col) {
            for (l = 0; l < x->row; l++) {
		x->data[l][k] = mat->data[l][c];
		if (x->imag != NULL) {
		    x->imag[l][k] = mat->imag[l][c];
		}
            }
        } else {
            for (l = 0; l < x->row; l++) {
		x->data[l][k] = 0;
		if (x->imag != NULL) {
		    x->imag[l][k] = 0;
		}
            }
        }
    }

    return x;
}

spLMatrix xlmextractcols(spLMatrix mat, spLVector colidx)
{
    long k, l, c;
    spLMatrix x;
    
    x = xlmalloc(mat->row, colidx->length);
    if (mat->imag != NULL) {
	lmialloc(x);
    }
    
    for (k = 0; k < x->col; k++) {
	c = colidx->data[k];
	
        if (c >= 0 && c < mat->col) {
            for (l = 0; l < x->row; l++) {
		x->data[l][k] = mat->data[l][c];
		if (x->imag != NULL) {
		    x->imag[l][k] = mat->imag[l][c];
		}
            }
        } else {
            for (l = 0; l < x->row; l++) {
		x->data[l][k] = 0;
		if (x->imag != NULL) {
		    x->imag[l][k] = 0;
		}
            }
        }
    }

    return x;
}

spFMatrix xfmextractcols(spFMatrix mat, spLVector colidx)
{
    long k, l, c;
    spFMatrix x;
    
    x = xfmalloc(mat->row, colidx->length);
    if (mat->imag != NULL) {
	fmialloc(x);
    }
    
    for (k = 0; k < x->col; k++) {
	c = colidx->data[k];
	
        if (c >= 0 && c < mat->col) {
            for (l = 0; l < x->row; l++) {
		x->data[l][k] = mat->data[l][c];
		if (x->imag != NULL) {
		    x->imag[l][k] = mat->imag[l][c];
		}
            }
        } else {
            for (l = 0; l < x->row; l++) {
		x->data[l][k] = 0.0f;
		if (x->imag != NULL) {
		    x->imag[l][k] = 0.0f;
		}
            }
        }
    }

    return x;
}

spDMatrix xdmextractcols(spDMatrix mat, spLVector colidx)
{
    long k, l, c;
    spDMatrix x;
    
    x = xdmalloc(mat->row, colidx->length);
    if (mat->imag != NULL) {
	dmialloc(x);
    }
    
    for (k = 0; k < x->col; k++) {
	c = colidx->data[k];
	
        if (c >= 0 && c < mat->col) {
            for (l = 0; l < x->row; l++) {
		x->data[l][k] = mat->data[l][c];
		if (x->imag != NULL) {
		    x->imag[l][k] = mat->imag[l][c];
		}
            }
        } else {
            for (l = 0; l < x->row; l++) {
		x->data[l][k] = 0.0;
		if (x->imag != NULL) {
		    x->imag[l][k] = 0.0;
		}
            }
        }
    }

    return x;
}

spSMatrix xsmextractrowscols(spSMatrix mat, spLVector rowidx, spLVector colidx)
{
    long k, l, r, c;
    spSMatrix x;

    if (rowidx == NODATA && colidx == NODATA) {
        return NODATA;
    } else if (colidx == NODATA) {
        return xsmextractrows(mat, rowidx);
    } else if (rowidx == NODATA) {
        return xsmextractcols(mat, colidx);
    }
    
    x = xsmalloc(rowidx->length, colidx->length);
    if (mat->imag != NULL) {
	smialloc(x);
    }
    
    for (k = 0; k < x->row; k++) {
        r = rowidx->data[k];
	
        if (r >= 0 && r < mat->row) {
            for (l = 0; l < x->col; l++) {
                c = colidx->data[l];
                
                if (c >= 0 && c < mat->col) {
                    x->data[k][l] = mat->data[r][c];
                    if (x->imag != NULL) {
                        x->imag[k][l] = mat->imag[r][c];
                    }
                } else {
                    x->data[k][l] = 0;
                    if (x->imag != NULL) {
                        x->imag[k][l] = 0;
                    }
                }
            }
        } else {
            for (l = 0; l < x->col; l++) {
		x->data[k][l] = 0;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0;
		}
            }
        }
    }

    return x;
}

spLMatrix xlmextractrowscols(spLMatrix mat, spLVector rowidx, spLVector colidx)
{
    long k, l, r, c;
    spLMatrix x;

    if (rowidx == NODATA && colidx == NODATA) {
        return NODATA;
    } else if (colidx == NODATA) {
        return xlmextractrows(mat, rowidx);
    } else if (rowidx == NODATA) {
        return xlmextractcols(mat, colidx);
    }
    
    x = xlmalloc(rowidx->length, colidx->length);
    if (mat->imag != NULL) {
	lmialloc(x);
    }
    
    for (k = 0; k < x->row; k++) {
        r = rowidx->data[k];
	
        if (r >= 0 && r < mat->row) {
            for (l = 0; l < x->col; l++) {
                c = colidx->data[l];
                
                if (c >= 0 && c < mat->col) {
                    x->data[k][l] = mat->data[r][c];
                    if (x->imag != NULL) {
                        x->imag[k][l] = mat->imag[r][c];
                    }
                } else {
                    x->data[k][l] = 0;
                    if (x->imag != NULL) {
                        x->imag[k][l] = 0;
                    }
                }
            }
        } else {
            for (l = 0; l < x->col; l++) {
		x->data[k][l] = 0;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0;
		}
            }
        }
    }

    return x;
}

spFMatrix xfmextractrowscols(spFMatrix mat, spLVector rowidx, spLVector colidx)
{
    long k, l, r, c;
    spFMatrix x;

    if (rowidx == NODATA && colidx == NODATA) {
        return NODATA;
    } else if (colidx == NODATA) {
        return xfmextractrows(mat, rowidx);
    } else if (rowidx == NODATA) {
        return xfmextractcols(mat, colidx);
    }
    
    x = xfmalloc(rowidx->length, colidx->length);
    if (mat->imag != NULL) {
	fmialloc(x);
    }
    
    for (k = 0; k < x->row; k++) {
        r = rowidx->data[k];
	
        if (r >= 0 && r < mat->row) {
            for (l = 0; l < x->col; l++) {
                c = colidx->data[l];
                
                if (c >= 0 && c < mat->col) {
                    x->data[k][l] = mat->data[r][c];
                    if (x->imag != NULL) {
                        x->imag[k][l] = mat->imag[r][c];
                    }
                } else {
                    x->data[k][l] = 0.0f;
                    if (x->imag != NULL) {
                        x->imag[k][l] = 0.0f;
                    }
                }
            }
        } else {
            for (l = 0; l < x->col; l++) {
		x->data[k][l] = 0.0f;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0.0f;
		}
            }
        }
    }

    return x;
}

spDMatrix xdmextractrowscols(spDMatrix mat, spLVector rowidx, spLVector colidx)
{
    long k, l, r, c;
    spDMatrix x;

    if (rowidx == NODATA && colidx == NODATA) {
        return NODATA;
    } else if (colidx == NODATA) {
        return xdmextractrows(mat, rowidx);
    } else if (rowidx == NODATA) {
        return xdmextractcols(mat, colidx);
    }
    
    x = xdmalloc(rowidx->length, colidx->length);
    if (mat->imag != NULL) {
	dmialloc(x);
    }
    
    for (k = 0; k < x->row; k++) {
        r = rowidx->data[k];
	
        if (r >= 0 && r < mat->row) {
            for (l = 0; l < x->col; l++) {
                c = colidx->data[l];
                
                if (c >= 0 && c < mat->col) {
                    x->data[k][l] = mat->data[r][c];
                    if (x->imag != NULL) {
                        x->imag[k][l] = mat->imag[r][c];
                    }
                } else {
                    x->data[k][l] = 0.0;
                    if (x->imag != NULL) {
                        x->imag[k][l] = 0.0;
                    }
                }
            }
        } else {
            for (l = 0; l < x->col; l++) {
		x->data[k][l] = 0.0;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0.0;
		}
            }
        }
    }

    return x;
}

void smabs(spSMatrix mat)
{
    long k, l;
    
    if (mat->imag == NULL) {
        for (k = 0; k < mat->row; k++) {
            for (l = 0; l < mat->col; l++) {
                mat->data[k][l] = ABS(mat->data[k][l]);
            }
        }
    } else {
        for (k = 0; k < mat->row; k++) {
            for (l = 0; l < mat->col; l++) {
                mat->data[k][l] = (short)CABS(mat->data[k][l], mat->imag[k][l]);
            }
        }
	smifree(mat);
    }

    return;
}

void lmabs(spLMatrix mat)
{
    long k, l;
    
    if (mat->imag == NULL) {
        for (k = 0; k < mat->row; k++) {
            for (l = 0; l < mat->col; l++) {
                mat->data[k][l] = ABS(mat->data[k][l]);
            }
        }
    } else {
        for (k = 0; k < mat->row; k++) {
            for (l = 0; l < mat->col; l++) {
                mat->data[k][l] = (long)CABS(mat->data[k][l], mat->imag[k][l]);
            }
        }
	lmifree(mat);
    }

    return;
}

void fmabs(spFMatrix mat)
{
    long k, l;
    
    if (mat->imag == NULL) {
        for (k = 0; k < mat->row; k++) {
            for (l = 0; l < mat->col; l++) {
                mat->data[k][l] = ABS(mat->data[k][l]);
            }
        }
    } else {
        for (k = 0; k < mat->row; k++) {
            for (l = 0; l < mat->col; l++) {
                mat->data[k][l] = (float)CABS(mat->data[k][l], mat->imag[k][l]);
            }
        }
	fmifree(mat);
    }

    return;
}

void dmabs(spDMatrix mat)
{
    long k, l;
    
    if (mat->imag == NULL) {
        for (k = 0; k < mat->row; k++) {
            for (l = 0; l < mat->col; l++) {
                mat->data[k][l] = ABS(mat->data[k][l]);
            }
        }
    } else {
        for (k = 0; k < mat->row; k++) {
            for (l = 0; l < mat->col; l++) {
                mat->data[k][l] = CABS(mat->data[k][l], mat->imag[k][l]);
            }
        }
	dmifree(mat);
    }

    return;
}

spSMatrix xsmabs(spSMatrix mat)
{
    spSMatrix omat;

    omat = xsmclone(mat);
    smabs(omat);
    
    return omat;
}

spLMatrix xlmabs(spLMatrix mat)
{
    spLMatrix omat;

    omat = xlmclone(mat);
    lmabs(omat);
    
    return omat;
}

spFMatrix xfmabs(spFMatrix mat)
{
    spFMatrix omat;

    omat = xfmclone(mat);
    fmabs(omat);
    
    return omat;
}

spDMatrix xdmabs(spDMatrix mat)
{
    spDMatrix omat;

    omat = xdmclone(mat);
    dmabs(omat);
    
    return omat;
}

spLVector xsmrmax(spSMatrix mat)
{
    long k, l;
    long index;
    short max;
    spLVector x;

    x = xlvalloc(mat->row);

    for (k = 0; k < mat->row; k++) {
	max = mat->data[k][0];
	index = 0;
	for (l = 1; l < mat->col; l++) {
	    if (max < mat->data[k][l]) {
		max = mat->data[k][l];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xlmrmax(spLMatrix mat)
{
    long k, l;
    long index;
    long max;
    spLVector x;

    x = xlvalloc(mat->row);

    for (k = 0; k < mat->row; k++) {
	max = mat->data[k][0];
	index = 0;
	for (l = 1; l < mat->col; l++) {
	    if (max < mat->data[k][l]) {
		max = mat->data[k][l];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xfmrmax(spFMatrix mat)
{
    long k, l;
    long index;
    float max;
    spLVector x;

    x = xlvalloc(mat->row);

    for (k = 0; k < mat->row; k++) {
	max = mat->data[k][0];
	index = 0;
	for (l = 1; l < mat->col; l++) {
	    if (max < mat->data[k][l]) {
		max = mat->data[k][l];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xdmrmax(spDMatrix mat)
{
    long k, l;
    long index;
    double max;
    spLVector x;

    x = xlvalloc(mat->row);

    for (k = 0; k < mat->row; k++) {
	max = mat->data[k][0];
	index = 0;
	for (l = 1; l < mat->col; l++) {
	    if (max < mat->data[k][l]) {
		max = mat->data[k][l];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xsmrmin(spSMatrix mat)
{
    long k, l;
    long index;
    short min;
    spLVector x;

    x = xlvalloc(mat->row);

    for (k = 0; k < mat->row; k++) {
	min = mat->data[k][0];
	index = 0;
	for (l = 1; l < mat->col; l++) {
	    if (min > mat->data[k][l]) {
		min = mat->data[k][l];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xlmrmin(spLMatrix mat)
{
    long k, l;
    long index;
    long min;
    spLVector x;

    x = xlvalloc(mat->row);

    for (k = 0; k < mat->row; k++) {
	min = mat->data[k][0];
	index = 0;
	for (l = 1; l < mat->col; l++) {
	    if (min > mat->data[k][l]) {
		min = mat->data[k][l];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xfmrmin(spFMatrix mat)
{
    long k, l;
    long index;
    float min;
    spLVector x;

    x = xlvalloc(mat->row);

    for (k = 0; k < mat->row; k++) {
	min = mat->data[k][0];
	index = 0;
	for (l = 1; l < mat->col; l++) {
	    if (min > mat->data[k][l]) {
		min = mat->data[k][l];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xdmrmin(spDMatrix mat)
{
    long k, l;
    long index;
    double min;
    spLVector x;

    x = xlvalloc(mat->row);

    for (k = 0; k < mat->row; k++) {
	min = mat->data[k][0];
	index = 0;
	for (l = 1; l < mat->col; l++) {
	    if (min > mat->data[k][l]) {
		min = mat->data[k][l];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xsmcmax(spSMatrix mat)
{
    long k, l;
    long index;
    short max;
    spLVector x;

    x = xlvalloc(mat->col);

    for (k = 0; k < mat->col; k++) {
	max = mat->data[0][k];
	index = 0;
	for (l = 1; l < mat->row; l++) {
	    if (max < mat->data[l][k]) {
		max = mat->data[l][k];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xlmcmax(spLMatrix mat)
{
    long k, l;
    long index;
    long max;
    spLVector x;

    x = xlvalloc(mat->col);

    for (k = 0; k < mat->col; k++) {
	max = mat->data[0][k];
	index = 0;
	for (l = 1; l < mat->row; l++) {
	    if (max < mat->data[l][k]) {
		max = mat->data[l][k];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xfmcmax(spFMatrix mat)
{
    long k, l;
    long index;
    float max;
    spLVector x;

    x = xlvalloc(mat->col);

    for (k = 0; k < mat->col; k++) {
	max = mat->data[0][k];
	index = 0;
	for (l = 1; l < mat->row; l++) {
	    if (max < mat->data[l][k]) {
		max = mat->data[l][k];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xdmcmax(spDMatrix mat)
{
    long k, l;
    long index;
    double max;
    spLVector x;

    x = xlvalloc(mat->col);

    for (k = 0; k < mat->col; k++) {
	max = mat->data[0][k];
	index = 0;
	for (l = 1; l < mat->row; l++) {
	    if (max < mat->data[l][k]) {
		max = mat->data[l][k];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xsmcmin(spSMatrix mat)
{
    long k, l;
    long index;
    short min;
    spLVector x;

    x = xlvalloc(mat->col);

    for (k = 0; k < mat->col; k++) {
	min = mat->data[0][k];
	index = 0;
	for (l = 1; l < mat->row; l++) {
	    if (min > mat->data[l][k]) {
		min = mat->data[l][k];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xlmcmin(spLMatrix mat)
{
    long k, l;
    long index;
    long min;
    spLVector x;

    x = xlvalloc(mat->col);

    for (k = 0; k < mat->col; k++) {
	min = mat->data[0][k];
	index = 0;
	for (l = 1; l < mat->row; l++) {
	    if (min > mat->data[l][k]) {
		min = mat->data[l][k];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xfmcmin(spFMatrix mat)
{
    long k, l;
    long index;
    float min;
    spLVector x;

    x = xlvalloc(mat->col);

    for (k = 0; k < mat->col; k++) {
	min = mat->data[0][k];
	index = 0;
	for (l = 1; l < mat->row; l++) {
	    if (min > mat->data[l][k]) {
		min = mat->data[l][k];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

spLVector xdmcmin(spDMatrix mat)
{
    long k, l;
    long index;
    double min;
    spLVector x;

    x = xlvalloc(mat->col);

    for (k = 0; k < mat->col; k++) {
	min = mat->data[0][k];
	index = 0;
	for (l = 1; l < mat->row; l++) {
	    if (min > mat->data[l][k]) {
		min = mat->data[l][k];
		index = l;
	    }
	}
	x->data[k] = index;
    }

    return x;
}

short smmin(spSMatrix mat, long *row, long *col)
{
    long k, l;
    long mr, mc;
    short min;

    mr = 0; mc = 0;
    min = mat->data[0][0];

    for (k = 0; k < mat->col; k++) {
	for (l = 0; l < mat->row; l++) {
	    if (min > mat->data[l][k]) {
		min = mat->data[l][k];
		mr = l;
		mc = k;
	    }
	}
    }

    if (row != NULL) *row = mr;
    if (col != NULL) *col = mc;

    return min;
}

long lmmin(spLMatrix mat, long *row, long *col)
{
    long k, l;
    long mr, mc;
    long min;

    mr = 0; mc = 0;
    min = mat->data[0][0];

    for (k = 0; k < mat->col; k++) {
	for (l = 0; l < mat->row; l++) {
	    if (min > mat->data[l][k]) {
		min = mat->data[l][k];
		mr = l;
		mc = k;
	    }
	}
    }

    if (row != NULL) *row = mr;
    if (col != NULL) *col = mc;

    return min;
}

float fmmin(spFMatrix mat, long *row, long *col)
{
    long k, l;
    long mr, mc;
    float min;

    mr = 0; mc = 0;
    min = mat->data[0][0];

    for (k = 0; k < mat->col; k++) {
	for (l = 0; l < mat->row; l++) {
	    if (min > mat->data[l][k]) {
		min = mat->data[l][k];
		mr = l;
		mc = k;
	    }
	}
    }

    if (row != NULL) *row = mr;
    if (col != NULL) *col = mc;

    return min;
}

double dmmin(spDMatrix mat, long *row, long *col)
{
    long k, l;
    long mr, mc;
    double min;

    mr = 0; mc = 0;
    min = mat->data[0][0];

    for (k = 0; k < mat->col; k++) {
	for (l = 0; l < mat->row; l++) {
	    if (min > mat->data[l][k]) {
		min = mat->data[l][k];
		mr = l;
		mc = k;
	    }
	}
    }

    if (row != NULL) *row = mr;
    if (col != NULL) *col = mc;

    return min;
}

short smmax(spSMatrix mat, long *row, long *col)
{
    long k, l;
    long mr, mc;
    short max;

    mr = 0; mc = 0;
    max = mat->data[0][0];

    for (k = 0; k < mat->col; k++) {
	for (l = 0; l < mat->row; l++) {
	    if (max < mat->data[l][k]) {
		max = mat->data[l][k];
		mr = l;
		mc = k;
	    }
	}
    }

    if (row != NULL) *row = mr;
    if (col != NULL) *col = mc;

    return max;
}

long lmmax(spLMatrix mat, long *row, long *col)
{
    long k, l;
    long mr, mc;
    long max;

    mr = 0; mc = 0;
    max = mat->data[0][0];

    for (k = 0; k < mat->col; k++) {
	for (l = 0; l < mat->row; l++) {
	    if (max < mat->data[l][k]) {
		max = mat->data[l][k];
		mr = l;
		mc = k;
	    }
	}
    }

    if (row != NULL) *row = mr;
    if (col != NULL) *col = mc;

    return max;
}

float fmmax(spFMatrix mat, long *row, long *col)
{
    long k, l;
    long mr, mc;
    float max;

    mr = 0; mc = 0;
    max = mat->data[0][0];

    for (k = 0; k < mat->col; k++) {
	for (l = 0; l < mat->row; l++) {
	    if (max < mat->data[l][k]) {
		max = mat->data[l][k];
		mr = l;
		mc = k;
	    }
	}
    }

    if (row != NULL) *row = mr;
    if (col != NULL) *col = mc;

    return max;
}

double dmmax(spDMatrix mat, long *row, long *col)
{
    long k, l;
    long mr, mc;
    double max;

    mr = 0; mc = 0;
    max = mat->data[0][0];

    for (k = 0; k < mat->col; k++) {
	for (l = 0; l < mat->row; l++) {
	    if (max < mat->data[l][k]) {
		max = mat->data[l][k];
		mr = l;
		mc = k;
	    }
	}
    }

    if (row != NULL) *row = mr;
    if (col != NULL) *col = mc;

    return max;
}

spSMatrix xsmcrop(spSMatrix mat, long row_offset, long row_length, long col_offset, long col_length)
{
    long k, l, c, r;
    spSMatrix x;

    x = xsmalloc(row_length, col_length);
    if (mat->imag != NULL) {
	smialloc(x);
    }

    for (k = 0; k < x->row; k++) {
	r = k + row_offset;
	
	for (l = 0; l < x->col; l++) {
	    c = l + col_offset;
	    if ((r >= 0 && r < mat->row) && (c >= 0 && c < mat->col)) {
		x->data[k][l] = mat->data[r][c];
		if (x->imag != NULL) {
		    x->imag[k][l] = mat->imag[r][c];
		}
	    } else {
		x->data[k][l] = 0;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0;
		}
	    }
	}
    }

    return x;
}

spLMatrix xlmcrop(spLMatrix mat, long row_offset, long row_length, long col_offset, long col_length)
{
    long k, l, c, r;
    spLMatrix x;

    x = xlmalloc(row_length, col_length);
    if (mat->imag != NULL) {
	lmialloc(x);
    }

    for (k = 0; k < x->row; k++) {
	r = k + row_offset;
	
	for (l = 0; l < x->col; l++) {
	    c = l + col_offset;
	    if ((r >= 0 && r < mat->row) && (c >= 0 && c < mat->col)) {
		x->data[k][l] = mat->data[r][c];
		if (x->imag != NULL) {
		    x->imag[k][l] = mat->imag[r][c];
		}
	    } else {
		x->data[k][l] = 0;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0;
		}
	    }
	}
    }

    return x;
}

spFMatrix xfmcrop(spFMatrix mat, long row_offset, long row_length, long col_offset, long col_length)
{
    long k, l, c, r;
    spFMatrix x;

    x = xfmalloc(row_length, col_length);
    if (mat->imag != NULL) {
	fmialloc(x);
    }

    for (k = 0; k < x->row; k++) {
	r = k + row_offset;
	
	for (l = 0; l < x->col; l++) {
	    c = l + col_offset;
	    if ((r >= 0 && r < mat->row) && (c >= 0 && c < mat->col)) {
		x->data[k][l] = mat->data[r][c];
		if (x->imag != NULL) {
		    x->imag[k][l] = mat->imag[r][c];
		}
	    } else {
		x->data[k][l] = 0.0f;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0.0f;
		}
	    }
	}
    }

    return x;
}

spDMatrix xdmcrop(spDMatrix mat, long row_offset, long row_length, long col_offset, long col_length)
{
    long k, l, c, r;
    spDMatrix x;

    x = xdmalloc(row_length, col_length);
    if (mat->imag != NULL) {
	dmialloc(x);
    }

    for (k = 0; k < x->row; k++) {
	r = k + row_offset;
	
	for (l = 0; l < x->col; l++) {
	    c = l + col_offset;
	    if ((r >= 0 && r < mat->row) && (c >= 0 && c < mat->col)) {
		x->data[k][l] = mat->data[r][c];
		if (x->imag != NULL) {
		    x->imag[k][l] = mat->imag[r][c];
		}
	    } else {
		x->data[k][l] = 0.0;
		if (x->imag != NULL) {
		    x->imag[k][l] = 0.0;
		}
	    }
	}
    }

    return x;
}

spSMatrix xsmhcat(spSMatrix a, spSMatrix b)
{
    spSMatrix x;
    long i;
    
    if (a->row != b->row) return NODATA;

    x = xsmalloc(a->row, a->col + b->col);

    for (i = 0; i < a->row; i++) {
	memcpy(x->data[i], a->data[i], a->col * sizeof(short));
	memcpy(&x->data[i][a->col], b->data[i], b->col * sizeof(short));
    }

    return x;
}

spSMatrix xsmvcat(spSMatrix a, spSMatrix b)
{
    spSMatrix x;
    long i;
    
    if (a->col != b->col) return NODATA;

    x = xsmalloc(a->row + b->row, a->col);

    for (i = 0; i < a->row; i++) {
	memcpy(x->data[i], a->data[i], a->col * sizeof(short));
    }
    for (i = 0; i < b->row; i++) {
	memcpy(x->data[i + a->row], b->data[i], b->col * sizeof(short));
    }

    return x;
}

spLMatrix xlmhcat(spLMatrix a, spLMatrix b)
{
    spLMatrix x;
    long i;
    
    if (a->row != b->row) return NODATA;

    x = xlmalloc(a->row, a->col + b->col);

    for (i = 0; i < a->row; i++) {
	memcpy(x->data[i], a->data[i], a->col * sizeof(long));
	memcpy(&x->data[i][a->col], b->data[i], b->col * sizeof(long));
    }

    return x;
}

spLMatrix xlmvcat(spLMatrix a, spLMatrix b)
{
    spLMatrix x;
    long i;
    
    if (a->col != b->col) return NODATA;

    x = xlmalloc(a->row + b->row, a->col);

    for (i = 0; i < a->row; i++) {
	memcpy(x->data[i], a->data[i], a->col * sizeof(long));
    }
    for (i = 0; i < b->row; i++) {
	memcpy(x->data[i + a->row], b->data[i], b->col * sizeof(long));
    }

    return x;
}

spFMatrix xfmhcat(spFMatrix a, spFMatrix b)
{
    spFMatrix x;
    long i;
    
    if (a->row != b->row) return NODATA;

    x = xfmalloc(a->row, a->col + b->col);

    for (i = 0; i < a->row; i++) {
	memcpy(x->data[i], a->data[i], a->col * sizeof(float));
	memcpy(&x->data[i][a->col], b->data[i], b->col * sizeof(float));
    }

    return x;
}

spFMatrix xfmvcat(spFMatrix a, spFMatrix b)
{
    spFMatrix x;
    long i;
    
    if (a->col != b->col) return NODATA;

    x = xfmalloc(a->row + b->row, a->col);

    for (i = 0; i < a->row; i++) {
	memcpy(x->data[i], a->data[i], a->col * sizeof(float));
    }
    for (i = 0; i < b->row; i++) {
	memcpy(x->data[i + a->row], b->data[i], b->col * sizeof(float));
    }

    return x;
}

spDMatrix xdmhcat(spDMatrix a, spDMatrix b)
{
    spDMatrix x;
    long i;
    
    if (a->row != b->row) return NODATA;

    x = xdmalloc(a->row, a->col + b->col);

    for (i = 0; i < a->row; i++) {
	memcpy(x->data[i], a->data[i], a->col * sizeof(double));
	memcpy(&x->data[i][a->col], b->data[i], b->col * sizeof(double));
    }

    return x;
}

spDMatrix xdmvcat(spDMatrix a, spDMatrix b)
{
    spDMatrix x;
    long i;
    
    if (a->col != b->col) return NODATA;

    x = xdmalloc(a->row + b->row, a->col);

    for (i = 0; i < a->row; i++) {
	memcpy(x->data[i], a->data[i], a->col * sizeof(double));
    }
    for (i = 0; i < b->row; i++) {
	memcpy(x->data[i + a->row], b->data[i], b->col * sizeof(double));
    }

    return x;
}

void smshiftrow(spSMatrix x, long shift)
{
    long k, l;
    long len;
    
    if (shift == 0) return;

    if (shift > 0) {
	for (k = x->row - 1; k >= 0; k--) {
	    if (k >= shift) {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = x->data[k - shift][l];
		}
	    } else {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = 0;
		}
	    }
	}
    } else {
	len = x->row + shift;
	for (k = 0; k < x->row; k++) {
	    if (k < len) {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = x->data[k - shift][l];
		}
	    } else {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = 0;
		}
	    }
	}
    }

    if (x->imag != NULL) {
	if (shift > 0) {
	    for (k = x->row - 1; k >= 0; k--) {
		if (k >= shift) {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = x->imag[k - shift][l];
		    }
		} else {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = 0;
		    }
		}
	    }
	} else {
	    len = x->row + shift;
	    for (k = 0; k < x->row; k++) {
		if (k < len) {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = x->imag[k - shift][l];
		    }
		} else {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = 0;
		    }
		}
	    }
	}
    }

    return;
}

void smshiftcol(spSMatrix x, long shift)
{
    long k, l;
    long len;
    
    if (shift == 0) return;

    if (shift > 0) {
	for (k = x->col - 1; k >= 0; k--) {
	    if (k >= shift) {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = x->data[l][k - shift];
		}
	    } else {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = 0;
		}
	    }
	}
    } else {
	len = x->row + shift;
	for (k = 0; k < x->col; k++) {
	    if (k < len) {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = x->data[l][k - shift];
		}
	    } else {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = 0;
		}
	    }
	}
    }

    if (x->imag != NULL) {
	if (shift > 0) {
	    for (k = x->col - 1; k >= 0; k--) {
		if (k >= shift) {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = x->imag[l][k - shift];
		    }
		} else {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = 0;
		    }
		}
	    }
	} else {
	    len = x->col + shift;
	    for (k = 0; k < x->row; k++) {
		if (k < len) {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = x->imag[l][k - shift];
		    }
		} else {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = 0;
		    }
		}
	    }
	}
    }

    return;
}

void lmshiftrow(spLMatrix x, long shift)
{
    long k, l;
    long len;
    
    if (shift == 0) return;

    if (shift > 0) {
	for (k = x->row - 1; k >= 0; k--) {
	    if (k >= shift) {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = x->data[k - shift][l];
		}
	    } else {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = 0;
		}
	    }
	}
    } else {
	len = x->row + shift;
	for (k = 0; k < x->row; k++) {
	    if (k < len) {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = x->data[k - shift][l];
		}
	    } else {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = 0;
		}
	    }
	}
    }

    if (x->imag != NULL) {
	if (shift > 0) {
	    for (k = x->row - 1; k >= 0; k--) {
		if (k >= shift) {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = x->imag[k - shift][l];
		    }
		} else {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = 0;
		    }
		}
	    }
	} else {
	    len = x->row + shift;
	    for (k = 0; k < x->row; k++) {
		if (k < len) {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = x->imag[k - shift][l];
		    }
		} else {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = 0;
		    }
		}
	    }
	}
    }

    return;
}

void lmshiftcol(spLMatrix x, long shift)
{
    long k, l;
    long len;
    
    if (shift == 0) return;

    if (shift > 0) {
	for (k = x->col - 1; k >= 0; k--) {
	    if (k >= shift) {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = x->data[l][k - shift];
		}
	    } else {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = 0;
		}
	    }
	}
    } else {
	len = x->row + shift;
	for (k = 0; k < x->col; k++) {
	    if (k < len) {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = x->data[l][k - shift];
		}
	    } else {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = 0;
		}
	    }
	}
    }

    if (x->imag != NULL) {
	if (shift > 0) {
	    for (k = x->col - 1; k >= 0; k--) {
		if (k >= shift) {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = x->imag[l][k - shift];
		    }
		} else {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = 0;
		    }
		}
	    }
	} else {
	    len = x->col + shift;
	    for (k = 0; k < x->row; k++) {
		if (k < len) {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = x->imag[l][k - shift];
		    }
		} else {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = 0;
		    }
		}
	    }
	}
    }

    return;
}

void fmshiftrow(spFMatrix x, long shift)
{
    long k, l;
    long len;
    
    if (shift == 0) return;

    if (shift > 0) {
	for (k = x->row - 1; k >= 0; k--) {
	    if (k >= shift) {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = x->data[k - shift][l];
		}
	    } else {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = 0.0f;
		}
	    }
	}
    } else {
	len = x->row + shift;
	for (k = 0; k < x->row; k++) {
	    if (k < len) {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = x->data[k - shift][l];
		}
	    } else {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = 0.0f;
		}
	    }
	}
    }

    if (x->imag != NULL) {
	if (shift > 0) {
	    for (k = x->row - 1; k >= 0; k--) {
		if (k >= shift) {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = x->imag[k - shift][l];
		    }
		} else {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = 0.0f;
		    }
		}
	    }
	} else {
	    len = x->row + shift;
	    for (k = 0; k < x->row; k++) {
		if (k < len) {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = x->imag[k - shift][l];
		    }
		} else {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = 0.0f;
		    }
		}
	    }
	}
    }

    return;
}

void fmshiftcol(spFMatrix x, long shift)
{
    long k, l;
    long len;
    
    if (shift == 0) return;

    if (shift > 0) {
	for (k = x->col - 1; k >= 0; k--) {
	    if (k >= shift) {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = x->data[l][k - shift];
		}
	    } else {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = 0.0f;
		}
	    }
	}
    } else {
	len = x->row + shift;
	for (k = 0; k < x->col; k++) {
	    if (k < len) {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = x->data[l][k - shift];
		}
	    } else {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = 0.0f;
		}
	    }
	}
    }

    if (x->imag != NULL) {
	if (shift > 0) {
	    for (k = x->col - 1; k >= 0; k--) {
		if (k >= shift) {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = x->imag[l][k - shift];
		    }
		} else {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = 0.0f;
		    }
		}
	    }
	} else {
	    len = x->col + shift;
	    for (k = 0; k < x->row; k++) {
		if (k < len) {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = x->imag[l][k - shift];
		    }
		} else {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = 0.0f;
		    }
		}
	    }
	}
    }

    return;
}

void dmshiftrow(spDMatrix x, long shift)
{
    long k, l;
    long len;
    
    if (shift == 0) return;

    if (shift > 0) {
	for (k = x->row - 1; k >= 0; k--) {
	    if (k >= shift) {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = x->data[k - shift][l];
		}
	    } else {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = 0.0;
		}
	    }
	}
    } else {
	len = x->row + shift;
	for (k = 0; k < x->row; k++) {
	    if (k < len) {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = x->data[k - shift][l];
		}
	    } else {
		for (l = 0; l < x->col; l++) {
		    x->data[k][l] = 0.0;
		}
	    }
	}
    }

    if (x->imag != NULL) {
	if (shift > 0) {
	    for (k = x->row - 1; k >= 0; k--) {
		if (k >= shift) {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = x->imag[k - shift][l];
		    }
		} else {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = 0.0;
		    }
		}
	    }
	} else {
	    len = x->row + shift;
	    for (k = 0; k < x->row; k++) {
		if (k < len) {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = x->imag[k - shift][l];
		    }
		} else {
		    for (l = 0; l < x->col; l++) {
			x->imag[k][l] = 0.0;
		    }
		}
	    }
	}
    }

    return;
}

void dmshiftcol(spDMatrix x, long shift)
{
    long k, l;
    long len;
    
    if (shift == 0) return;

    if (shift > 0) {
	for (k = x->col - 1; k >= 0; k--) {
	    if (k >= shift) {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = x->data[l][k - shift];
		}
	    } else {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = 0.0;
		}
	    }
	}
    } else {
	len = x->row + shift;
	for (k = 0; k < x->col; k++) {
	    if (k < len) {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = x->data[l][k - shift];
		}
	    } else {
		for (l = 0; l < x->row; l++) {
		    x->data[l][k] = 0.0;
		}
	    }
	}
    }

    if (x->imag != NULL) {
	if (shift > 0) {
	    for (k = x->col - 1; k >= 0; k--) {
		if (k >= shift) {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = x->imag[l][k - shift];
		    }
		} else {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = 0.0;
		    }
		}
	    }
	} else {
	    len = x->col + shift;
	    for (k = 0; k < x->row; k++) {
		if (k < len) {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = x->imag[l][k - shift];
		    }
		} else {
		    for (l = 0; l < x->row; l++) {
			x->imag[l][k] = 0.0;
		    }
		}
	    }
	}
    }

    return;
}

spSVector xsvdiag(spSMatrix A)
{
    long i;
    spSVector d;

    if (A == NODATA) return NODATA;

    d = xsvalloc(MIN(A->row, A->col));
    if (A->imag != NULL) {
	svialloc(d);
    }
    
    for (i = 0; i < d->length; i++) {
	d->data[i] = A->data[i][i];
	if (A->imag != NULL) {
	    d->imag[i] = A->imag[i][i];
	}
    }

    return d;
}

spLVector xlvdiag(spLMatrix A)
{
    long i;
    spLVector d;

    if (A == NODATA) return NODATA;

    d = xlvalloc(MIN(A->row, A->col));
    if (A->imag != NULL) {
	lvialloc(d);
    }
    
    for (i = 0; i < d->length; i++) {
	d->data[i] = A->data[i][i];
	if (A->imag != NULL) {
	    d->imag[i] = A->imag[i][i];
	}
    }

    return d;
}

spFVector xfvdiag(spFMatrix A)
{
    long i;
    spFVector d;

    if (A == NODATA) return NODATA;

    d = xfvalloc(MIN(A->row, A->col));
    if (A->imag != NULL) {
	fvialloc(d);
    }
    
    for (i = 0; i < d->length; i++) {
	d->data[i] = A->data[i][i];
	if (A->imag != NULL) {
	    d->imag[i] = A->imag[i][i];
	}
    }

    return d;
}

spDVector xdvdiag(spDMatrix A)
{
    long i;
    spDVector d;

    if (A == NODATA) return NODATA;

    d = xdvalloc(MIN(A->row, A->col));
    if (A->imag != NULL) {
	dvialloc(d);
    }
    
    for (i = 0; i < d->length; i++) {
	d->data[i] = A->data[i][i];
	if (A->imag != NULL) {
	    d->imag[i] = A->imag[i][i];
	}
    }

    return d;
}

spSVector xsvsupdiag(spSMatrix A, long shift)
{
    long i;
    spSVector d;

    if (A == NODATA) return NODATA;

    if (shift >= 0) {
        d = xsvalloc(MIN(A->row, A->col - shift));
    } else {
        d = xsvalloc(MIN(A->row + shift, A->col));
    }
    if (A->imag != NULL) {
	svialloc(d);
    }
    
    for (i = 0; i < d->length; i++) {
        if (shift >= 0) {
            d->data[i] = A->data[i][i + shift];
        } else {
            d->data[i] = A->data[i - shift][i];
        }
	if (A->imag != NULL) {
            if (shift >= 0) {
                d->imag[i] = A->imag[i][i + shift];
            } else {
                d->imag[i] = A->imag[i - shift][i];
            }
	}
    }

    return d;
}

spLVector xlvsupdiag(spLMatrix A, long shift)
{
    long i;
    spLVector d;

    if (A == NODATA) return NODATA;

    if (shift >= 0) {
        d = xlvalloc(MIN(A->row, A->col - shift));
    } else {
        d = xlvalloc(MIN(A->row + shift, A->col));
    }
    if (A->imag != NULL) {
	lvialloc(d);
    }
    
    for (i = 0; i < d->length; i++) {
        if (shift >= 0) {
            d->data[i] = A->data[i][i + shift];
        } else {
            d->data[i] = A->data[i - shift][i];
        }
	if (A->imag != NULL) {
            if (shift >= 0) {
                d->imag[i] = A->imag[i][i + shift];
            } else {
                d->imag[i] = A->imag[i - shift][i];
            }
	}
    }

    return d;
}

spFVector xfvsupdiag(spFMatrix A, long shift)
{
    long i;
    spFVector d;

    if (A == NODATA) return NODATA;

    if (shift >= 0) {
        d = xfvalloc(MIN(A->row, A->col - shift));
    } else {
        d = xfvalloc(MIN(A->row + shift, A->col));
    }
    if (A->imag != NULL) {
	fvialloc(d);
    }
    
    for (i = 0; i < d->length; i++) {
        if (shift >= 0) {
            d->data[i] = A->data[i][i + shift];
        } else {
            d->data[i] = A->data[i - shift][i];
        }
	if (A->imag != NULL) {
            if (shift >= 0) {
                d->imag[i] = A->imag[i][i + shift];
            } else {
                d->imag[i] = A->imag[i - shift][i];
            }
	}
    }

    return d;
}

spDVector xdvsupdiag(spDMatrix A, long shift)
{
    long i;
    spDVector d;

    if (A == NODATA) return NODATA;

    if (shift >= 0) {
        d = xdvalloc(MIN(A->row, A->col - shift));
    } else {
        d = xdvalloc(MIN(A->row + shift, A->col));
    }
    if (A->imag != NULL) {
	dvialloc(d);
    }
    
    for (i = 0; i < d->length; i++) {
        if (shift >= 0) {
            d->data[i] = A->data[i][i + shift];
        } else {
            d->data[i] = A->data[i - shift][i];
        }
	if (A->imag != NULL) {
            if (shift >= 0) {
                d->imag[i] = A->imag[i][i + shift];
            } else {
                d->imag[i] = A->imag[i - shift][i];
            }
	}
    }

    return d;
}

void smdiag(spSMatrix A)
{
    smsupdiag(A, 0);
    return;
}

void lmdiag(spLMatrix A)
{
    lmsupdiag(A, 0);
    return;
}

void fmdiag(spFMatrix A)
{
    fmsupdiag(A, 0);
    return;
}

void dmdiag(spDMatrix A)
{
    dmsupdiag(A, 0);
    return;
}

void smsupdiag(spSMatrix A, long shift)
{
    long i, j;

    if (A == NODATA) return;

    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i + shift != j) {
		A->data[i][j] = 0;
		if (A->imag != NULL) {
		    A->imag[i][j] = 0;
		}
	    }
	}
    }

    return;
}

void lmsupdiag(spLMatrix A, long shift)
{
    long i, j;

    if (A == NODATA) return;

    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i + shift != j) {
		A->data[i][j] = 0;
		if (A->imag != NULL) {
		    A->imag[i][j] = 0;
		}
	    }
	}
    }

    return;
}

void fmsupdiag(spFMatrix A, long shift)
{
    long i, j;

    if (A == NODATA) return;

    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i + shift != j) {
		A->data[i][j] = 0.0f;
		if (A->imag != NULL) {
		    A->imag[i][j] = 0.0f;
		}
	    }
	}
    }

    return;
}

void dmsupdiag(spDMatrix A, long shift)
{
    long i, j;

    if (A == NODATA) return;

    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i + shift != j) {
		A->data[i][j] = 0.0;
		if (A->imag != NULL) {
		    A->imag[i][j] = 0.0;
		}
	    }
	}
    }

    return;
}

spSMatrix xsmdiag(spSMatrix A)
{
    long i, j;
    spSMatrix D;

    if (A == NODATA) return NODATA;

    if (A->imag != NULL) {
	D = xsmrialloc(A->row, A->col);
    } else {
	D = xsmalloc(A->row, A->col);
    }
    
    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i == j) {
		D->data[i][j] = A->data[i][j];
		if (A->imag != NULL) {
		    D->imag[i][j] = A->imag[i][j];
		}
	    } else {
		D->data[i][j] = 0;
		if (A->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spLMatrix xlmdiag(spLMatrix A)
{
    long i, j;
    spLMatrix D;

    if (A == NODATA) return NODATA;

    if (A->imag != NULL) {
	D = xlmrialloc(A->row, A->col);
    } else {
	D = xlmalloc(A->row, A->col);
    }
    
    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i == j) {
		D->data[i][j] = A->data[i][j];
		if (A->imag != NULL) {
		    D->imag[i][j] = A->imag[i][j];
		}
	    } else {
		D->data[i][j] = 0;
		if (A->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spFMatrix xfmdiag(spFMatrix A)
{
    long i, j;
    spFMatrix D;

    if (A == NODATA) return NODATA;

    if (A->imag != NULL) {
	D = xfmrialloc(A->row, A->col);
    } else {
	D = xfmalloc(A->row, A->col);
    }
    
    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i == j) {
		D->data[i][j] = A->data[i][j];
		if (A->imag != NULL) {
		    D->imag[i][j] = A->imag[i][j];
		}
	    } else {
		D->data[i][j] = 0.0f;
		if (A->imag != NULL) {
		    D->imag[i][j] = 0.0f;
		}
	    }
	}
    }

    return D;
}

spDMatrix xdmdiag(spDMatrix A)
{
    long i, j;
    spDMatrix D;

    if (A == NODATA) return NODATA;

    if (A->imag != NULL) {
	D = xdmrialloc(A->row, A->col);
    } else {
	D = xdmalloc(A->row, A->col);
    }
    
    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i == j) {
		D->data[i][j] = A->data[i][j];
		if (A->imag != NULL) {
		    D->imag[i][j] = A->imag[i][j];
		}
	    } else {
		D->data[i][j] = 0.0;
		if (A->imag != NULL) {
		    D->imag[i][j] = 0.0;
		}
	    }
	}
    }

    return D;
}

spSMatrix xsmsupdiag(spSMatrix A, long shift)
{
    long i, j;
    spSMatrix D;

    if (A == NODATA) return NODATA;

    if (A->imag != NULL) {
	D = xsmrialloc(A->row, A->col);
    } else {
	D = xsmalloc(A->row, A->col);
    }
    
    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i + shift == j) {
		D->data[i][j] = A->data[i][j];
		if (A->imag != NULL) {
		    D->imag[i][j] = A->imag[i][j];
		}
	    } else {
		D->data[i][j] = 0;
		if (A->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spLMatrix xlmsupdiag(spLMatrix A, long shift)
{
    long i, j;
    spLMatrix D;

    if (A == NODATA) return NODATA;

    if (A->imag != NULL) {
	D = xlmrialloc(A->row, A->col);
    } else {
	D = xlmalloc(A->row, A->col);
    }
    
    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i + shift == j) {
		D->data[i][j] = A->data[i][j];
		if (A->imag != NULL) {
		    D->imag[i][j] = A->imag[i][j];
		}
	    } else {
		D->data[i][j] = 0;
		if (A->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spFMatrix xfmsupdiag(spFMatrix A, long shift)
{
    long i, j;
    spFMatrix D;

    if (A == NODATA) return NODATA;

    if (A->imag != NULL) {
	D = xfmrialloc(A->row, A->col);
    } else {
	D = xfmalloc(A->row, A->col);
    }
    
    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i + shift == j) {
		D->data[i][j] = A->data[i][j];
		if (A->imag != NULL) {
		    D->imag[i][j] = A->imag[i][j];
		}
	    } else {
		D->data[i][j] = 0;
		if (A->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spDMatrix xdmsupdiag(spDMatrix A, long shift)
{
    long i, j;
    spDMatrix D;

    if (A == NODATA) return NODATA;

    if (A->imag != NULL) {
	D = xdmrialloc(A->row, A->col);
    } else {
	D = xdmalloc(A->row, A->col);
    }
    
    for (i = 0; i < A->row; i++) {
	for (j = 0; j < A->col; j++) {
	    if (i + shift == j) {
		D->data[i][j] = A->data[i][j];
		if (A->imag != NULL) {
		    D->imag[i][j] = A->imag[i][j];
		}
	    } else {
		D->data[i][j] = 0;
		if (A->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spSMatrix xsmmakediag(spSVector d)
{
    long i, j;
    spSMatrix D;

    if (d == NODATA || d->length <= 0) return NODATA;

    D = xsmalloc(d->length, d->length);
    if (d->imag != NULL) {
	smialloc(D);
    }

    for (i = 0; i < d->length; i++) {
	for (j = 0; j < d->length; j++) {
	    if (j == i) {
		D->data[i][i] = d->data[i];
		if (d->imag != NULL) {
		    D->imag[i][i] = d->imag[i];
		}
	    } else {
		D->data[i][j] = 0;
		if (D->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spLMatrix xlmmakediag(spLVector d)
{
    long i, j;
    spLMatrix D;

    if (d == NODATA || d->length <= 0) return NODATA;

    D = xlmalloc(d->length, d->length);
    if (d->imag != NULL) {
	lmialloc(D);
    }

    for (i = 0; i < d->length; i++) {
	for (j = 0; j < d->length; j++) {
	    if (j == i) {
		D->data[i][i] = d->data[i];
		if (d->imag != NULL) {
		    D->imag[i][i] = d->imag[i];
		}
	    } else {
		D->data[i][j] = 0;
		if (D->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spFMatrix xfmmakediag(spFVector d)
{
    long i, j;
    spFMatrix D;

    if (d == NODATA || d->length <= 0) return NODATA;

    D = xfmalloc(d->length, d->length);
    if (d->imag != NULL) {
	fmialloc(D);
    }

    for (i = 0; i < d->length; i++) {
	for (j = 0; j < d->length; j++) {
	    if (j == i) {
		D->data[i][i] = d->data[i];
		if (d->imag != NULL) {
		    D->imag[i][i] = d->imag[i];
		}
	    } else {
		D->data[i][j] = 0.0f;
		if (D->imag != NULL) {
		    D->imag[i][j] = 0.0f;
		}
	    }
	}
    }

    return D;
}

spDMatrix xdmmakediag(spDVector d)
{
    long i, j;
    spDMatrix D;

    if (d == NODATA || d->length <= 0) return NODATA;

    D = xdmalloc(d->length, d->length);
    if (d->imag != NULL) {
	dmialloc(D);
    }

    for (i = 0; i < d->length; i++) {
	for (j = 0; j < d->length; j++) {
	    if (j == i) {
		D->data[i][i] = d->data[i];
		if (d->imag != NULL) {
		    D->imag[i][i] = d->imag[i];
		}
	    } else {
		D->data[i][j] = 0.0;
		if (D->imag != NULL) {
		    D->imag[i][j] = 0.0;
		}
	    }
	}
    }

    return D;
}

spSMatrix xsmmakesupdiag(spSVector d, long shift)
{
    long i, j;
    spSMatrix D;

    if (d == NODATA || d->length <= 0) return NODATA;

    if (shift >= 0) {
        D = xsmalloc(d->length + shift, d->length + shift);
    } else {
        D = xsmalloc(d->length - shift, d->length - shift);
    }
    if (d->imag != NULL) {
	smialloc(D);
    }

    for (i = 0; i < D->row; i++) {
	for (j = 0; j < D->col; j++) {
	    if (j == i + shift) {
                if (shift >= 0) {
                    D->data[i][j] = d->data[i];
                    if (d->imag != NULL) {
                        D->imag[i][j] = d->imag[i];
                    }
                } else {
                    D->data[i][j] = d->data[j];
                    if (d->imag != NULL) {
                        D->imag[i][j] = d->imag[j];
                    }
                }
	    } else {
		D->data[i][j] = 0;
		if (D->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spLMatrix xlmmakesupdiag(spLVector d, long shift)
{
    long i, j;
    spLMatrix D;

    if (d == NODATA || d->length <= 0) return NODATA;

    if (shift >= 0) {
        D = xlmalloc(d->length + shift, d->length + shift);
    } else {
        D = xlmalloc(d->length - shift, d->length - shift);
    }
    if (d->imag != NULL) {
	lmialloc(D);
    }

    for (i = 0; i < D->row; i++) {
	for (j = 0; j < D->col; j++) {
	    if (j == i + shift) {
                if (shift >= 0) {
                    D->data[i][j] = d->data[i];
                    if (d->imag != NULL) {
                        D->imag[i][j] = d->imag[i];
                    }
                } else {
                    D->data[i][j] = d->data[j];
                    if (d->imag != NULL) {
                        D->imag[i][j] = d->imag[j];
                    }
                }
	    } else {
		D->data[i][j] = 0;
		if (D->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spFMatrix xfmmakesupdiag(spFVector d, long shift)
{
    long i, j;
    spFMatrix D;

    if (d == NODATA || d->length <= 0) return NODATA;

    if (shift >= 0) {
        D = xfmalloc(d->length + shift, d->length + shift);
    } else {
        D = xfmalloc(d->length - shift, d->length - shift);
    }
    if (d->imag != NULL) {
	fmialloc(D);
    }

    for (i = 0; i < D->row; i++) {
	for (j = 0; j < D->col; j++) {
	    if (j == i + shift) {
                if (shift >= 0) {
                    D->data[i][j] = d->data[i];
                    if (d->imag != NULL) {
                        D->imag[i][j] = d->imag[i];
                    }
                } else {
                    D->data[i][j] = d->data[j];
                    if (d->imag != NULL) {
                        D->imag[i][j] = d->imag[j];
                    }
                }
	    } else {
		D->data[i][j] = 0;
		if (D->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spDMatrix xdmmakesupdiag(spDVector d, long shift)
{
    long i, j;
    spDMatrix D;

    if (d == NODATA || d->length <= 0) return NODATA;

    if (shift >= 0) {
        D = xdmalloc(d->length + shift, d->length + shift);
    } else {
        D = xdmalloc(d->length - shift, d->length - shift);
    }
    if (d->imag != NULL) {
	dmialloc(D);
    }

    for (i = 0; i < D->row; i++) {
	for (j = 0; j < D->col; j++) {
	    if (j == i + shift) {
                if (shift >= 0) {
                    D->data[i][j] = d->data[i];
                    if (d->imag != NULL) {
                        D->imag[i][j] = d->imag[i];
                    }
                } else {
                    D->data[i][j] = d->data[j];
                    if (d->imag != NULL) {
                        D->imag[i][j] = d->imag[j];
                    }
                }
	    } else {
		D->data[i][j] = 0;
		if (D->imag != NULL) {
		    D->imag[i][j] = 0;
		}
	    }
	}
    }

    return D;
}

spSMatrix xsmmakeperm(spLVector index)
{
    long i, j;
    spSMatrix P;

    if (index == NODATA) return NODATA;

    P = xsmalloc(index->length, index->length);

    for (i = 0; i < P->row; i++) {
	for (j = i; i < P->col; i++) {
	    if (j == index->data[i]) {
		P->data[i][j] = 1;
	    } else {
		P->data[i][j] = 0;
	    }
	    P->data[j][i] = P->data[i][j];
	}
    }

    return P;
}

spLMatrix xlmmakeperm(spLVector index)
{
    long i, j;
    spLMatrix P;

    if (index == NODATA) return NODATA;

    P = xlmalloc(index->length, index->length);

    for (i = 0; i < P->row; i++) {
	for (j = i; i < P->col; i++) {
	    if (j == index->data[i]) {
		P->data[i][j] = 1;
	    } else {
		P->data[i][j] = 0;
	    }
	    P->data[j][i] = P->data[i][j];
	}
    }

    return P;
}

spFMatrix xfmmakeperm(spLVector index)
{
    long i, j;
    spFMatrix P;

    if (index == NODATA) return NODATA;

    P = xfmalloc(index->length, index->length);

    for (i = 0; i < P->row; i++) {
	for (j = i; i < P->col; i++) {
	    if (j == index->data[i]) {
		P->data[i][j] = 1.0f;
	    } else {
		P->data[i][j] = 0.0f;
	    }
	    P->data[j][i] = P->data[i][j];
	}
    }

    return P;
}

spDMatrix xdmmakeperm(spLVector index)
{
    long i, j;
    spDMatrix P;

    if (index == NODATA) return NODATA;

    P = xdmalloc(index->length, index->length);

    for (i = 0; i < P->row; i++) {
	for (j = i; i < P->col; i++) {
	    if (j == index->data[i]) {
		P->data[i][j] = 1.0;
	    } else {
		P->data[i][j] = 0.0;
	    }
	    P->data[j][i] = P->data[i][j];
	}
    }

    return P;
}

spBool smtranspose(spSMatrix A)
{
    long i, j;
    short temp;

    if (A == NODATA || A->row != A->col) return SP_FALSE;

    for (i = 0; i < A->row; i++) {
	for (j = i + 1; j < A->col; j++) {
	    temp = A->data[i][j];
	    A->data[i][j] = A->data[j][i];
	    A->data[j][i] = temp;
	    if (A->imag != NULL) {
		temp = A->imag[i][j];
		A->imag[i][j] = A->imag[j][i];
		A->imag[j][i] = temp;
	    }
	}
    }

    return SP_TRUE;
}
    
spSMatrix xsmtranspose(spSMatrix A)
{
    long i, j;
    spSMatrix AT;

    AT = xsmalloc(A->col, A->row);
    if (A->imag != NULL) {
	smialloc(AT);
    }

    for (i = 0; i < AT->row; i++) {
	for (j = 0; j < AT->col; j++) {
	    AT->data[i][j] = A->data[j][i];
	    if (A->imag != NULL) {
		AT->imag[i][j] = A->imag[j][i];
	    }
	}
    }

    return AT;
}

spBool smconjtranspose(spSMatrix A)
{
    long i, j;
    short temp;

    if (A == NODATA || A->row != A->col) return SP_FALSE;

    for (i = 0; i < A->row; i++) {
	for (j = i; j < A->col; j++) {
	    temp = A->data[i][j];
	    A->data[i][j] = A->data[j][i];
	    A->data[j][i] = temp;
	    if (A->imag != NULL) {
		temp = -A->imag[i][j];
		A->imag[i][j] = -A->imag[j][i];
		A->imag[j][i] = temp;
	    }
	}
    }

    return SP_TRUE;
}
    
spSMatrix xsmconjtranspose(spSMatrix A)
{
    long i, j;
    spSMatrix AT;

    AT = xsmalloc(A->col, A->row);
    if (A->imag != NULL) {
	smialloc(AT);
    }

    for (i = 0; i < AT->row; i++) {
	for (j = 0; j < AT->col; j++) {
	    AT->data[i][j] = A->data[j][i];
	    if (A->imag != NULL) {
		AT->imag[i][j] = -A->imag[j][i];
	    }
	}
    }

    return AT;
}

spBool lmtranspose(spLMatrix A)
{
    long i, j;
    long temp;

    if (A == NODATA || A->row != A->col) return SP_FALSE;

    for (i = 0; i < A->row; i++) {
	for (j = i + 1; j < A->col; j++) {
	    temp = A->data[i][j];
	    A->data[i][j] = A->data[j][i];
	    A->data[j][i] = temp;
	    if (A->imag != NULL) {
		temp = A->imag[i][j];
		A->imag[i][j] = A->imag[j][i];
		A->imag[j][i] = temp;
	    }
	}
    }

    return SP_TRUE;
}
    
spLMatrix xlmtranspose(spLMatrix A)
{
    long i, j;
    spLMatrix AT;

    AT = xlmalloc(A->col, A->row);
    if (A->imag != NULL) {
	lmialloc(AT);
    }

    for (i = 0; i < AT->row; i++) {
	for (j = 0; j < AT->col; j++) {
	    AT->data[i][j] = A->data[j][i];
	    if (A->imag != NULL) {
		AT->imag[i][j] = A->imag[j][i];
	    }
	}
    }

    return AT;
}

spBool lmconjtranspose(spLMatrix A)
{
    long i, j;
    long temp;

    if (A == NODATA || A->row != A->col) return SP_FALSE;

    for (i = 0; i < A->row; i++) {
	for (j = i; j < A->col; j++) {
	    temp = A->data[i][j];
	    A->data[i][j] = A->data[j][i];
	    A->data[j][i] = temp;
	    if (A->imag != NULL) {
		temp = -A->imag[i][j];
		A->imag[i][j] = -A->imag[j][i];
		A->imag[j][i] = temp;
	    }
	}
    }

    return SP_TRUE;
}
    
spLMatrix xlmconjtranspose(spLMatrix A)
{
    long i, j;
    spLMatrix AT;

    AT = xlmalloc(A->col, A->row);
    if (A->imag != NULL) {
	lmialloc(AT);
    }

    for (i = 0; i < AT->row; i++) {
	for (j = 0; j < AT->col; j++) {
	    AT->data[i][j] = A->data[j][i];
	    if (A->imag != NULL) {
		AT->imag[i][j] = -A->imag[j][i];
	    }
	}
    }

    return AT;
}

spBool fmtranspose(spFMatrix A)
{
    long i, j;
    float temp;

    if (A == NODATA || A->row != A->col) return SP_FALSE;

    for (i = 0; i < A->row; i++) {
	for (j = i + 1; j < A->col; j++) {
	    temp = A->data[i][j];
	    A->data[i][j] = A->data[j][i];
	    A->data[j][i] = temp;
	    if (A->imag != NULL) {
		temp = A->imag[i][j];
		A->imag[i][j] = A->imag[j][i];
		A->imag[j][i] = temp;
	    }
	}
    }

    return SP_TRUE;
}
    
spFMatrix xfmtranspose(spFMatrix A)
{
    long i, j;
    spFMatrix AT;

    AT = xfmalloc(A->col, A->row);
    if (A->imag != NULL) {
	fmialloc(AT);
    }

    for (i = 0; i < AT->row; i++) {
	for (j = 0; j < AT->col; j++) {
	    AT->data[i][j] = A->data[j][i];
	    if (A->imag != NULL) {
		AT->imag[i][j] = A->imag[j][i];
	    }
	}
    }

    return AT;
}

spBool fmconjtranspose(spFMatrix A)
{
    long i, j;
    float temp;

    if (A == NODATA || A->row != A->col) return SP_FALSE;

    for (i = 0; i < A->row; i++) {
	for (j = i; j < A->col; j++) {
	    temp = A->data[i][j];
	    A->data[i][j] = A->data[j][i];
	    A->data[j][i] = temp;
	    if (A->imag != NULL) {
		temp = -A->imag[i][j];
		A->imag[i][j] = -A->imag[j][i];
		A->imag[j][i] = temp;
	    }
	}
    }

    return SP_TRUE;
}
    
spFMatrix xfmconjtranspose(spFMatrix A)
{
    long i, j;
    spFMatrix AT;

    AT = xfmalloc(A->col, A->row);
    if (A->imag != NULL) {
	fmialloc(AT);
    }

    for (i = 0; i < AT->row; i++) {
	for (j = 0; j < AT->col; j++) {
	    AT->data[i][j] = A->data[j][i];
	    if (A->imag != NULL) {
		AT->imag[i][j] = -A->imag[j][i];
	    }
	}
    }

    return AT;
}

spBool dmtranspose(spDMatrix A)
{
    long i, j;
    double temp;

    if (A == NODATA || A->row != A->col) return SP_FALSE;

    for (i = 0; i < A->row; i++) {
	for (j = i + 1; j < A->col; j++) {
	    temp = A->data[i][j];
	    A->data[i][j] = A->data[j][i];
	    A->data[j][i] = temp;
	    if (A->imag != NULL) {
		temp = A->imag[i][j];
		A->imag[i][j] = A->imag[j][i];
		A->imag[j][i] = temp;
	    }
	}
    }

    return SP_TRUE;
}
    
spDMatrix xdmtranspose(spDMatrix A)
{
    long i, j;
    spDMatrix AT;

    AT = xdmalloc(A->col, A->row);
    if (A->imag != NULL) {
	dmialloc(AT);
    }

    for (i = 0; i < AT->row; i++) {
	for (j = 0; j < AT->col; j++) {
	    AT->data[i][j] = A->data[j][i];
	    if (A->imag != NULL) {
		AT->imag[i][j] = A->imag[j][i];
	    }
	}
    }

    return AT;
}

spBool dmconjtranspose(spDMatrix A)
{
    long i, j;
    double temp;

    if (A == NODATA || A->row != A->col) return SP_FALSE;

    for (i = 0; i < A->row; i++) {
	for (j = i; j < A->col; j++) {
	    temp = A->data[i][j];
	    A->data[i][j] = A->data[j][i];
	    A->data[j][i] = temp;
	    if (A->imag != NULL) {
		temp = -A->imag[i][j];
		A->imag[i][j] = -A->imag[j][i];
		A->imag[j][i] = temp;
	    }
	}
    }

    return SP_TRUE;
}
    
spDMatrix xdmconjtranspose(spDMatrix A)
{
    long i, j;
    spDMatrix AT;

    AT = xdmalloc(A->col, A->row);
    if (A->imag != NULL) {
	dmialloc(AT);
    }

    for (i = 0; i < AT->row; i++) {
	for (j = 0; j < AT->col; j++) {
	    AT->data[i][j] = A->data[j][i];
	    if (A->imag != NULL) {
		AT->imag[i][j] = -A->imag[j][i];
	    }
	}
    }

    return AT;
}

#if 0
#if 1
#define SP_MATRIX_ENGINE_ALLOC_IN_UNLOCK
#else
#undef SP_MATRIX_ENGINE_ALLOC_IN_UNLOCK
#endif

spBool dmlockcore(spDMatrix x, int lock_switch)
{
#if defined(SP_MATRIX_ENGINE_ALLOC_IN_UNLOCK)
    if (SpMatrixPluginRec(x)->getrptr(x->instance) == NULL) {
	SpMatrixPluginRec(x)->ralloc(x->instance, x->data, x->length);
    }
#endif
    if (SpMatrixPluginRec(x)->rlock != NULL) {
        SpMatrixPluginRec(x)->rlock(x->instance, x->data, x->length, lock_switch);
    }
    
    if (x->imag != NULL) {
#if defined(SP_MATRIX_ENGINE_ALLOC_IN_UNLOCK)
	if (SpMatrixPluginRec(x)->getiptr(x->instance) == NULL) {
	    SpMatrixPluginRec(x)->ialloc(x->instance, x->imag, x->length);
	}
#endif
        if (SpMatrixPluginRec(x)->ilock != NULL) {
            SpMatrixPluginRec(x)->ilock(x->instance, x->imag, x->length, lock_switch);
        }
    }

    return SP_TRUE;
}

spBool dmunlockcore(spDMatrix x, int lock_switch)
{
    spDebug(100, "dmunlockcore", "in\n");
#if defined(SP_MATRIX_ENGINE_ALLOC_IN_UNLOCK)
    if (SpMatrixPluginRec(x)->getrptr(x->instance) == NULL) {
	spDebug(100, "dmunlockcore", "call first ralloc\n");
	SpMatrixPluginRec(x)->ralloc(x->instance, x->data, x->length);
    }
#endif
    if (SpMatrixPluginRec(x)->runlock != NULL) {
        SpMatrixPluginRec(x)->runlock(x->instance, x->data, x->length, lock_switch);
    }
    
    if (x->imag != NULL) {
#if defined(SP_MATRIX_ENGINE_ALLOC_IN_UNLOCK)
	if (SpMatrixPluginRec(x)->getiptr(x->instance) == NULL) {
	    spDebug(100, "dmunlockcore", "call first ialloc\n");
	    SpMatrixPluginRec(x)->ialloc(x->instance, x->imag, x->length);
	}
#endif
        if (SpMatrixPluginRec(x)->iunlock != NULL) {
            SpMatrixPluginRec(x)->iunlock(x->instance, x->imag, x->length, lock_switch);
        }
    }

    return SP_TRUE;
}

spBool dmisplugincomputable(spDMatrix x)
{
#ifdef SP_USE_MATRIX_ENGINE
    if (x != NODATA && x->instance != NULL
        && (x->locked == SP_FALSE || SpMatrixPluginRec(x)->rlock == NULL)) {
	return SP_TRUE;
    } else {
	return SP_FALSE;
    }
#else
    return SP_FALSE;
#endif
}

spBool dmislocked(spDMatrix x)
{
#ifdef SP_USE_MATRIX_ENGINE
    if (x != NODATA && x->locked) {
	return SP_TRUE;
    } else {
	return SP_FALSE;
    }
#else
    return SP_TRUE;
#endif
}

spBool dmlock(spDMatrix x)
{
#ifdef SP_USE_MATRIX_ENGINE
    if (x == NODATA || x->locked == SP_TRUE) return SP_FALSE;
    
    if (x->instance != NULL) {
	dmlockcore(x, SP_MATRIX_LOCK_SWITCH_NORMAL);
	x->locked = SP_TRUE;
    }
    
    return SP_TRUE;
#else
    return SP_FALSE;
#endif
}

spBool dmunlock(spDMatrix x)
{
#ifdef SP_USE_MATRIX_ENGINE
    if (x == NODATA || x->locked == SP_FALSE) return SP_FALSE;
    
    if (x->instance != NULL) {
	dmunlockcore(x, SP_MATRIX_LOCK_SWITCH_NORMAL);
	x->locked = SP_FALSE;
    }
    
    return SP_TRUE;
#else
    return SP_FALSE;
#endif
}

spBool dmsync(spDMatrix x)
{
#ifdef SP_USE_MATRIX_ENGINE
    if (x == NODATA || x->instance == NULL) return SP_FALSE;
    
    if (x->locked == SP_TRUE) {
	return dmunlockcore(x, SP_MATRIX_LOCK_SWITCH_SYNC_ONLY);
    } else {
	return dmlockcore(x, SP_MATRIX_LOCK_SWITCH_SYNC_ONLY);
    }
#else
    return SP_FALSE;
#endif
}

void *dmgetrptr(spDMatrix x)
{
    if (x == NODATA) return NULL;
    
#ifdef SP_USE_MATRIX_ENGINE
    spDebug(100, "dmgetrptr", "locked = %d\n", x->locked);
    
    if (x->instance != NULL && x->locked == SP_FALSE) {
	void *ptr;
	ptr = SpMatrixPluginRec(x)->getrptr(x->instance);
	spDebug(100, "dmgetrptr", "ptr = %ld\n", (long)ptr);
	return ptr;
    }
#endif
    
    return x->data;
}

void *dmgetiptr(spDMatrix x)
{
    if (x == NODATA) return NULL;
    
#ifdef SP_USE_MATRIX_ENGINE
    if (x->instance != NULL && x->locked == SP_FALSE) {
	return SpMatrixPluginRec(x)->getiptr(x->instance);
    }
#endif
    
    return x->imag;
}

void dmplus(spDMatrix x, spDMatrix y)
{
    long k;
#ifdef SP_USE_MATRIX_ENGINE
    spMatrixPluginInternalFuncList *flist;
#endif
    
    if (y->imag != NULL && x->imag == NULL) {
	dmizeros(x, x->length);
    }

#ifdef SP_USE_MATRIX_ENGINE
    if (dmisplugincomputable(x) == SP_TRUE && y->instance != NULL
        && (flist = SpGetDMatrixPluginInternalFuncList(x)) != NULL && flist->plus != NULL) {
	if (dmislocked(x) == SP_FALSE && dmislocked(y) == SP_TRUE) {
	    dmsync(y);
	}

	flist->plus(x->instance, y->instance, MIN(x->length, y->length));
	
	if (dmisplugincomputable(x) == SP_TRUE && dmisplugincomputable(y) == SP_TRUE) {
	    return;
	}
    }

    if (dmislocked(x) == SP_FALSE) {
        dmsync(x);
    }
    if (dmislocked(y) == SP_FALSE) {
        dmsync(y);
    }
#endif
    
#ifdef USE_SPL
    nspdbAdd2(y->data, x->data, MIN(x->length, y->length));
    if (x->imag != NULL && y->imag != NULL) {
	nspdbAdd2(y->imag, x->imag, MIN(x->length, y->length));
    }
#else
    for (k = 0; k < x->length; k++) {
	if (k < y->length) {
	    x->data[k] = x->data[k] + y->data[k];
	    if (x->imag != NULL) {
		if (y->imag != NULL) {
		    x->imag[k] = x->imag[k] + y->imag[k];
		}
	    }
	}
    }
#endif

#ifdef SP_USE_MATRIX_ENGINE
    if (dmislocked(x) == SP_FALSE) {
        dmunlockcore(x, SP_MATRIX_LOCK_SWITCH_SYNC_ONLY);
    }
#endif
    
    return;
}

void dmminus(spDMatrix x, spDMatrix y, spBool reverse)
{
    long k;
#ifdef SP_USE_MATRIX_ENGINE
    spMatrixPluginInternalFuncList *flist;
#endif
    
    if (y->imag != NULL && x->imag == NULL) {
	dmizeros(x, x->length);
    }

#ifdef SP_USE_MATRIX_ENGINE
    if (dmisplugincomputable(x) == SP_TRUE && y->instance != NULL
        && (flist = SpGetDMatrixPluginInternalFuncList(x)) != NULL && flist->minus != NULL) {
	if (dmislocked(x) == SP_FALSE && dmislocked(y) == SP_TRUE) {
	    dmsync(y);
	}

	flist->minus(x->instance, y->instance, MIN(x->length, y->length), reverse);
	
	if (dmisplugincomputable(x) == SP_TRUE && dmisplugincomputable(y) == SP_TRUE) {
	    return;
	}
    }

    if (dmislocked(x) == SP_FALSE) {
        dmsync(x);
    }
    if (dmislocked(y) == SP_FALSE) {
        dmsync(y);
    }
#endif
    
    if (reverse) {
	for (k = 0; k < x->length; k++) {
	    if (k < y->length) {
		x->data[k] = y->data[k] - x->data[k];
		if (x->imag != NULL) {
		    if (y->imag != NULL) {
			x->imag[k] = y->imag[k] - x->imag[k];
		    } else {
			x->imag[k] = -x->imag[k];
		    }
		}
	    } else {
		x->data[k] = -x->data[k];
		if (x->imag != NULL) {
		    x->imag[k] = -x->imag[k];
		}
	    }
	}
    } else {
#ifdef USE_SPL
	nspdbSub2(y->data, x->data, MIN(x->length, y->length));
	if (x->imag != NULL && y->imag != NULL) {
	    nspdbSub2(y->imag, x->imag, MIN(x->length, y->length));
	}
#else
	for (k = 0; k < x->length; k++) {
	    if (k < y->length) {
		x->data[k] = x->data[k] - y->data[k];
		if (x->imag != NULL) {
		    if (y->imag != NULL) {
			x->imag[k] = x->imag[k] - y->imag[k];
		    }
		}
	    }
	}
#endif
    }

#ifdef SP_USE_MATRIX_ENGINE
    if (dmislocked(x) == SP_FALSE) {
        dmunlockcore(x, SP_MATRIX_LOCK_SWITCH_SYNC_ONLY);
    }
#endif
    
    return;
}

void dmtimes(spDMatrix x, spDMatrix y)
{
    long k;
    double xr, xi;
#ifdef SP_USE_MATRIX_ENGINE
    spMatrixPluginInternalFuncList *flist;
#endif
    
    if (y->imag != NULL && x->imag == NULL) {
	dmizeros(x, x->length);
    }

#ifdef SP_USE_MATRIX_ENGINE
    if (dmisplugincomputable(x) == SP_TRUE && y->instance != NULL
        && (flist = SpGetDMatrixPluginInternalFuncList(x)) != NULL && flist->times != NULL) {
	if (dmislocked(x) == SP_FALSE && dmislocked(y) == SP_TRUE) {
	    dmsync(y);
	}

	flist->times(x->instance, y->instance, MIN(x->length, y->length));
	
	if (dmisplugincomputable(x) == SP_TRUE && dmisplugincomputable(y) == SP_TRUE) {
	    return;
	}
    }

    if (dmislocked(x) == SP_FALSE) {
        dmsync(x);
    }
    if (dmislocked(y) == SP_FALSE) {
        dmsync(y);
    }
#endif
    
    for (k = 0; k < x->length; k++) {
	if (k < y->length) {
	    if (x->imag != NULL) {
		if (y->imag != NULL) {
		    xr = x->data[k] * y->data[k] - x->imag[k] * y->imag[k];
		    xi = x->data[k] * y->imag[k] + x->imag[k] * y->data[k];
		    x->data[k] = xr;
		    x->imag[k] = xi;
		} else {
		    x->data[k] = x->data[k] * y->data[k];
		    x->imag[k] = x->imag[k] * y->data[k];
		}
	    } else {
		x->data[k] = x->data[k] * y->data[k];
	    }
	} else {
	    x->data[k] = 0.0;
	    if (x->imag != NULL) {
		x->imag[k] = 0.0;
	    }
	}
    }

#ifdef SP_USE_MATRIX_ENGINE
    if (dmislocked(x) == SP_FALSE) {
        dmunlockcore(x, SP_MATRIX_LOCK_SWITCH_SYNC_ONLY);
    }
#endif
    
    return;
}

void dmdivide(spDMatrix x, spDMatrix y, spBool reverse)
{
    long k;
    double a;
    double xr, xi;
#ifdef SP_USE_MATRIX_ENGINE
    spMatrixPluginInternalFuncList *flist;
#endif
    
    if (y->imag != NULL && x->imag == NULL) {
	dmizeros(x, x->length);
    }

#ifdef SP_USE_MATRIX_ENGINE
    if (dmisplugincomputable(x) == SP_TRUE && y->instance != NULL
        && (flist = SpGetDMatrixPluginInternalFuncList(x)) != NULL && flist->divide != NULL) {
	if (dmislocked(x) == SP_FALSE && dmislocked(y) == SP_TRUE) {
	    dmsync(y);
	}

	flist->divide(x->instance, y->instance, MIN(x->length, y->length), reverse);
	
	if (dmisplugincomputable(x) == SP_TRUE && dmisplugincomputable(y) == SP_TRUE) {
	    return;
	}
    }

    if (dmislocked(x) == SP_FALSE) {
        dmsync(x);
    }
    if (dmislocked(y) == SP_FALSE) {
        dmsync(y);
    }
#endif
    
    if (reverse) {
	for (k = 0; k < x->length; k++) {
	    if (k < y->length) {
		if (x->imag != NULL) {
		    if (x->data[k] == 0.0 && x->imag[k] == 0.0) {
			spwarning("warning: dmoper: divide by zero\n");

			if (y->data[k] == 0.0) {
			    x->data[k] = 0.0;
			} else {
			    x->data[k] = y->data[k] / SP_TINY_NUMBER;
			}
			if (y->imag != NULL) {
			    if (y->imag[k] == 0.0) {
				x->imag[k] = 0.0;
			    } else {
				x->imag[k] = y->imag[k] / SP_TINY_NUMBER;
			    }
			} else {
			    x->imag[k] = 0.0;
			}
		    } else {
			a = CSQUARE(x->data[k], x->imag[k]);
			if (y->imag != NULL) {
			    xr = x->data[k] * y->data[k] + x->imag[k] * y->imag[k];
			    xi = x->data[k] * y->imag[k] - x->imag[k] * y->data[k];
			    x->data[k] = xr / a;
			    x->imag[k] = xi / a;
			} else {
			    x->data[k] =  x->data[k] * y->data[k] / a;
			    x->imag[k] = -x->imag[k] * y->data[k] / a;
			}
		    }
		} else {
		    if (x->data[k] != 0.0) {
			x->data[k] = y->data[k] / x->data[k];
		    } else {
			spwarning("warning: dmoper: divide by zero\n");

			if (y->data[k] == 0.0) {
			    x->data[k] = 0.0;
			} else {
			    x->data[k] = y->data[k] / SP_TINY_NUMBER;
			}
		    }
		}
	    } else {
		x->data[k] = 0.0;
		if (x->imag != NULL) {
		    x->imag[k] = 0.0;
		}
	    }
	}
    } else {
	for (k = 0; k < x->length; k++) {
	    if (k < y->length) {
		if (x->imag != NULL && y->imag != NULL) {
		    if (y->data[k] == 0.0 && y->imag[k] == 0.0) {
			spwarning("warning: dmoper: divide by zero\n");

			if (x->data[k] == 0.0) {
			    x->data[k] = 0.0;
			} else {
			    x->data[k] = x->data[k] / SP_TINY_NUMBER;
			}
			if (x->imag[k] == 0.0) {
			    x->imag[k] = 0.0;
			} else {
			    x->imag[k] = x->imag[k] / SP_TINY_NUMBER;
			}
		    } else {
			a = CSQUARE(y->data[k], y->imag[k]);
			xr = x->data[k] * y->data[k] + x->imag[k] * y->imag[k];
			xi = -x->data[k] * y->imag[k] + x->imag[k] * y->data[k];
			x->data[k] = xr / a;
			x->imag[k] = xi / a;
		    }
		} else {
		    if (y->data[k] == 0.0) {
			spwarning("warning: dmoper: divide by zero\n");

			if (x->data[k] == 0.0) {
			    x->data[k] = 0.0;
			} else {
			    x->data[k] = x->data[k] / SP_TINY_NUMBER;
			}
			if (x->imag != NULL) {
			    if (x->imag[k] == 0.0) {
				x->imag[k] = 0.0;
			    } else {
				x->imag[k] = x->imag[k] / SP_TINY_NUMBER;
			    }
			}
		    } else {
			x->data[k] = x->data[k] / y->data[k];
			if (x->imag != NULL) {
			    x->imag[k] = x->imag[k] / y->data[k];
			}
		    }
		}
	    } else {
		x->data[k] = 0.0;
		if (x->imag != NULL) {
		    x->imag[k] = 0.0;
		}
	    }
	}
    }

#ifdef SP_USE_MATRIX_ENGINE
    if (dmislocked(x) == SP_FALSE) {
        dmunlockcore(x, SP_MATRIX_LOCK_SWITCH_SYNC_ONLY);
    }
#endif
    
    return;
}

void dmpower(spDMatrix x, spDMatrix y, spBool reverse)
{
    long k;
    double xr, xi;
    double yr, yi;
#ifdef SP_USE_MATRIX_ENGINE
    spMatrixPluginInternalFuncList *flist;
#endif
    
    if (y->imag != NULL && x->imag == NULL) {
	dmizeros(x, x->length);
    }

#ifdef SP_USE_MATRIX_ENGINE
    if (dmisplugincomputable(x) == SP_TRUE && y->instance != NULL
        && (flist = SpGetDMatrixPluginInternalFuncList(x)) != NULL && flist->power != NULL) {
	if (dmislocked(x) == SP_FALSE && dmislocked(y) == SP_TRUE) {
	    dmsync(y);
	}

	flist->power(x->instance, y->instance, MIN(x->length, y->length), reverse);
	
	if (dmisplugincomputable(x) == SP_TRUE && dmisplugincomputable(y) == SP_TRUE) {
	    return;
	}
    }

    if (dmislocked(x) == SP_FALSE) {
        dmsync(x);
    }
    if (dmislocked(y) == SP_FALSE) {
        dmsync(y);
    }
#endif
    
    if (reverse) {
	if (x->imag != NULL) {
	    for (k = 0; k < x->length; k++) {
		if (k < y->length) {
		    if (y->imag == NULL) {
			yr = y->data[k];
			yi = 0.0;
		    } else {
			yr = y->data[k];
			yi = y->imag[k];
		    }
		    if (yr == 0.0 && yi == 0.0) {
			x->data[k] = 0.0;
			x->imag[k] = 0.0;
		    } else if (x->imag[k] == 0.0 && yi == 0.0) {
			x->data[k] = pow(y->data[k], x->data[k]);
		    } else {
			clog(&yr, &yi);
			xr = x->data[k] * yr - x->imag[k] * yi;
			xi = x->data[k] * yi + x->imag[k] * yr;
			cexp(&xr, &xi);
			x->data[k] = xr;
			x->imag[k] = xi;
		    }
		} else {
		    x->data[k] = 0.0;
		    x->imag[k] = 0.0;
		}
	    }
	} else {
	    for (k = 0; k < x->length; k++) {
		if (k < y->length) {
		    x->data[k] = pow(y->data[k], x->data[k]);
		} else {
		    x->data[k] = 0.0;
		}
	    }
	}
    } else {
	if (x->imag != NULL) {
	    for (k = 0; k < x->length; k++) {
		if (k < y->length) {
		    if (x->data[k] == 0.0 && x->imag[k] == 0.0) {
			x->data[k] = 0.0;
			x->imag[k] = 0.0;
		    } else {
			if (y->imag == NULL) {
			    yr = y->data[k];
			    yi = 0.0;
			} else {
			    yr = y->data[k];
			    yi = y->imag[k];
			}
			if (x->imag[k] == 0.0 && yi == 0.0) {
			    x->data[k] = pow(x->data[k], y->data[k]);
			} else {
			    clog(&x->data[k], &x->imag[k]);
			    xr = x->data[k] * yr - x->imag[k] * yi;
			    xi = x->data[k] * yi + x->imag[k] * yr;
			    cexp(&xr, &xi);
			    x->data[k] = xr;
			    x->imag[k] = xi;
			}
		    }
		} else {
		    x->data[k] = 1.0;
		    x->imag[k] = 1.0;
		}
	    }
	} else {
	    for (k = 0; k < x->length; k++) {
		if (k < y->length) {
		    x->data[k] = pow(x->data[k], y->data[k]);
		} else {
		    x->data[k] = 1.0;
		}
	    }
	}
    }

#ifdef SP_USE_MATRIX_ENGINE
    if (dmislocked(x) == SP_FALSE) {
        dmunlockcore(x, SP_MATRIX_LOCK_SWITCH_SYNC_ONLY);
    }
#endif
    
    return;
}

void dmoper(spDMatrix x, const char *op, spDMatrix y)
{
    spBool reverse = 0;
    const char *op2 = op;

    if (strveq(op2, "!")) {
	reverse = 1;
	op2++;
    }

    if (y->imag != NULL && x->imag == NULL) {
	dmizeros(x, x->length);
    }

    if (strveq(op2, "+")) {
	dmplus(x, y);
    } else if (strveq(op2, "-")) {
	dmminus(x, y, reverse);
    } else if (strveq(op2, "*")) {
	dmtimes(x, y);
    } else if (strveq(op2, "/")) {
	dmdivide(x, y, reverse);
    } else if (strveq(op2, "^")) {
	dmpower(x, y, reverse);
    } else {
	fprintf(stderr, "dmoper: unknouwn operation: %s\n", op2);
	exit(1);
    }

    return;
}

spDMatrix xdmoper(spDMatrix a, const char *op, spDMatrix b)
{
    spDMatrix c;

    c = xdmclone(a);
    dmoper(c, op, b);

    return c;
}
#endif


void dmscoper(spDMatrix x, const char *op, double t)
{
    long k, l;
    int reverse = 0;
    const char *op2 = op;

    if (strveq(op2, "!")) {
	reverse = 1;
	op2++;
    }

    if (strveq(op2, "+")) {
	for (k = 0; k < x->row; k++) {
#ifdef USE_SPL
	    nspdbAdd1(t, x->data[k], x->col);
#else
	    for (l = 0; l < x->col; l++) {
		x->data[k][k] = x->data[k][k] + t;
	    }
#endif
	}
    } else if (strveq(op2, "-")) {
	for (k = 0; k < x->row; k++) {
	    for (l = 0; l < x->col; l++) {
		if (reverse) {
		    x->data[k][l] = t - x->data[k][l];
		    if (x->imag != NULL) {
			x->imag[k][l] = -x->imag[k][l];
		    }
		} else {
		    x->data[k][l] = x->data[k][l] - t;
		}
	    }
	}
    } else if (strveq(op2, "*")) {
	for (k = 0; k < x->row; k++) {
#ifdef USE_SPL
	    nspdbMpy1(t, x->data[k], x->col);
	    if (x->imag != NULL) {
		nspdbMpy1(t, x->imag[k], x->col);
	    }
#else
	    for (l = 0; l < x->col; l++) {
		x->data[k][l] = x->data[k][l] * t;
		if (x->imag != NULL) {
		    x->imag[k][l] = x->imag[k][l] * t;
		}
	    }
#endif
	}
    } else if (strveq(op2, "/")) {
	double a;
	for (k = 0; k < x->row; k++) {
	    for (l = 0; l < x->col; l++) {
		if (reverse) {
		    if (x->imag != NULL) {
			if (x->data[k][l] == 0.0 && x->imag[k][l] == 0.0) {
			    spwarning("warning: dmscoper: divide by zero\n");

			    if (t == 0.0) {
				x->data[k][l] = 0.0;
			    } else {
				x->data[k][l] = t / SP_TINY_NUMBER;
			    }
			    x->imag[k][l] = 0.0;
			} else {
			    a = CSQUARE(x->data[k][l], x->imag[k][l]);
			    x->data[k][l] = x->data[k][l] * t / a;
			    x->imag[k][l] = -x->imag[k][l] * t / a;
			}
		    } else {
			if (x->data[k][l] != 0.0) {
			    x->data[k][l] = t / x->data[k][l];
			} else {
			    spwarning("warning: dmscoper: divide by zero\n");

			    if (t == 0.0) {
				x->data[k][l] = 0.0;
			    } else {
				x->data[k][l] = t / SP_TINY_NUMBER;
			    }
			}
		    }
		} else {
		    if (t != 0.0) {
			x->data[k][l] = x->data[k][l] / t;
			if (x->imag != NULL) {
			    x->imag[k][l] = x->imag[k][l] / t;
			}
		    } else {
			if (x->data[k][l] == 0.0) {
			    x->data[k][l] = 0.0;
			} else {
			    x->data[k][l] = x->data[k][l] / SP_TINY_NUMBER;
			}
			if (x->imag != NULL) {
			    if (x->imag[k][l] == 0.0) {
				x->imag[k][l] = 0.0;
			    } else {
				x->imag[k][l] = x->imag[k][l] / SP_TINY_NUMBER;
			    }
			}
		    }
		}
	    }
	}
    } else if (strveq(op2, "^")) {
	double a; 
	for (k = 0; k < x->row; k++) {
	    for (l = 0; l < x->col; l++) {
		if (reverse) {
		    if (x->imag != NULL && x->imag[k][l] != 0.0) {
			a = log(t);
			x->data[k][l] *= a;
			x->imag[k][l] *= a;
			cexp(&x->data[k][l], &x->imag[k][l]);
		    } else {
			x->data[k][l] = pow(t, x->data[k][k]);
		    }
		} else {
		    if (x->imag != NULL && x->imag[k][l] != 0.0) {
			clog(&x->data[k][l], &x->imag[k][l]);
			x->data[k][l] *= t;
			x->imag[k][l] *= t;
			cexp(&x->data[k][l], &x->imag[k][l]);
		    } else {
			x->data[k][l] = pow(x->data[k][l], t);
		    }
		}
	    }
	}
    } else {
	fprintf(stderr, "dmscoper: unknouwn operation: %s\n", op2);
	exit(1);
    }

    return;
}

spDMatrix xdmscoper(spDMatrix x, const char *op, double t)
{
    spDMatrix c;

    c = xdmclone(x);
    dmscoper(c, op, t);

    return c;
}

static double dvabssumexcept(spDVector a, long k)
{
    double as;
    
    as = dvabssum(a);
    if (a->imag != NULL) {
        as -= spPreciseCplxAbs(a->data[k], a->imag[k]);
    } else {
        as -= FABS(a->data[k]);
    }

    return as;
}

spBool dmbalance(spDMatrix A /* input/output */, spDMatrix D /* outputA=inv(D)*A*D */)
{
    long k, i;
    double beta;
    double r, s, c, f;
    spBool converged = SP_FALSE;
    spDVector ac, ar;
    
    if (A->row != A->col || (D != NODATA && (D->row < A->row || D->col < A->col))) {
        return SP_FALSE;
    }

    if (A->imag != NULL) {
        ac = xdvrialloc(A->row);
        ar = xdvrialloc(A->col);
    } else {
        ac = xdvalloc(A->row);
        ar = xdvalloc(A->col);
    }

    if (D != NODATA) dmeye(D);

    k = 0;
    converged = SP_FALSE;
    beta = 2.0;
    
    while (converged == SP_FALSE) {
        converged = SP_TRUE;
        spDebug(100, "dmbalance", "k = %ld\n", k);

        for (i = 0; i < A->row; i++) {
            dmextractcol(A, i, ac);
            c = dvabssumexcept(ac, i);
            
            dmextractrow(A, i, ar);
            r = dvabssumexcept(ar, i);
            
            s = c + r;
            spDebug(100, "dmbalance", "k = %ld, i = %ld, original: c = %f, r = %f, s = %f\n", k, i, c, r, s);
            
            f = 1.0;

            while (c < r / beta) {
                c *= beta;
                r /= beta;
                f *= beta;
            }
            while (c >= r * beta) {
                c /= beta;
                r *= beta;
                f /= beta;
            }

            spDebug(100, "dmbalance", "k = %ld, i = %ld, updated: c = %f, r = %f, f = %f\n", k, i, c, r, f);
            
            if (c + r < 0.95 * s) {
                converged = SP_FALSE;
                spDebug(80, "dmbalance", "k = %ld, i = %ld, not converged\n", k, i);
                
                if (D != NODATA) {
                    D->data[i][i] *= f;
                }

                dvscoper(ac, "*", f);
                dmpastecol(A, i, ac, 0, ac->length, 0);

                ar->data[i] = ac->data[i];
                if (ar->imag != NULL) {
                    ar->imag[i] = ac->imag[i];
                }
                dvscoper(ar, "*", 1.0 / f);
                dmpasterow(A, i, ar, 0, ar->length, 0);

                //dmdump(A);
            } else {
                spDebug(80, "dmbalance", "k = %ld, i = %ld, converged\n", k, i);
            }
        }

        ++k;
    }

    xdvfree(ac);
    xdvfree(ar);
    
    return SP_TRUE;
}

spDMatrix xdmbalance(spDMatrix A, spDMatrix *xoD /* outputA=inv(D)*A*D. can be NULL, must be freed */)
{
    spDMatrix oA;
    spDMatrix D = NODATA;

    if (A->row != A->col) return NODATA;

    oA = xdmclone(A);
    if (xoD != NULL) {
        *xoD = xdmalloc(A->row, A->col);
        D = *xoD;
    }
    dmbalance(oA, D);

    return oA;
}

/*
 * functions for householder matrix
 * x --> v (normalized householder vector)
 * H = eye(n) - 2 * v * v'; (H: householder matrix)
 * H * x --> [x(0)*x0weight, 0, 0, ...];
 * dvhouse return: x0weight
 * complex vector/matrix is supported 
 */
#define SP_HOUSE_MIN_NORM /*1.0e-12*/0.0
static double dvcplxhouse(spDVector x)
{
    double nrmx;
    double x0weight;
    double ralpha, ialpha;
    double xr0, xi0;
    double x0abs;
    double ur0, ui0;

    x0weight = 1.0;
    nrmx = spPreciseCplxVectorNorm(x->data, x->imag, x->length);
    if (nrmx > SP_HOUSE_MIN_NORM) {
        xr0 = x->data[0];
        if (x->imag != NULL) {
            xi0 = x->imag[0];
        } else {
            xi0 = 0.0;
        }
        x0abs = spPreciseCplxAbs(xr0, xi0);
        if (x0abs == 0.0) {
            ralpha = -nrmx;
            ialpha = 0.0;
        } else {
            x0weight = -nrmx / x0abs;
            ralpha = x0weight * xr0;
            ialpha = x0weight * xi0;
            spDebug(80, "dvcplxhouse", "x0abs = %f, x0weight = %f, alpha = %f + %fi\n",
                    x0abs, x0weight, ralpha, ialpha);
        }

        x->data[0] -= ralpha;
        ur0 = x->data[0];
        if (x->imag != NULL) {
            x->imag[0] -= ialpha;
            ui0 = x->imag[0];
        } else {
            ui0 = 0.0;
        }

        /* for very small nrmx */
        xr0 /= nrmx;
        xi0 /= nrmx;
        ur0 /= nrmx;
        ui0 /= nrmx;

        nrmx *= sqrt(1.0 - xr0 * xr0 - xi0 * xi0 + ur0 * ur0 + ui0 * ui0);
        spDebug(80, "dvcplxhouse", "nrmx = %g, alpha = %g + %gi, x0 = %g + %gi, u0 = %g + %gi\n",
                nrmx, ralpha, ialpha, xr0, xi0, ur0, ui0);
    }

    if (nrmx > SP_HOUSE_MIN_NORM) {
        dvscoper(x, "/", nrmx);
    } else {
        dvrizeros(x, x->length);
        x0weight = 0.0;
    }

    return x0weight;
}

double dvhouse(spDVector x)
{
    double alpha, x0;
    double alpha2, x02;
    double nrmx;

    if (x->imag != NULL) {
        return dvcplxhouse(x);
    }

    alpha = 0.0;
    
    //dvdump(x);
    nrmx = spPreciseVectorNorm(x->data, x->length);
    if (nrmx > SP_HOUSE_MIN_NORM) {
        alpha = -nrmx;
        if (x->data[0] < 0) alpha = -alpha;

        x0 = x->data[0];
        x->data[0] -= alpha;
        /* for very small nrmx */
        alpha2 = alpha / nrmx;
        x02 = x0 / nrmx;
        spDebug(80, "dvhouse", "alpha = %g, alpha2 = %g, x02 = %g\n", alpha, alpha2, x02);
        nrmx *= sqrt(1.0 + alpha2 * (alpha2 - 2.0 * x02));
    }

    if (nrmx > SP_HOUSE_MIN_NORM) {
        dvscoper(x, "/", nrmx);
        spDebug(80, "dvhouse", "nrmx = %g, alpha = %g\n", nrmx, alpha);
    } else {
        spDebug(10, "dvhouse", "nrmx (%g) is too small\n", nrmx);
        x->data[0] = 0.0;
        alpha = 0.0;
    }

#if 0
    {
        long k;
        for (k = 0; k < x->length; k++) {
            spDebug(100, "dvhouse", "result x->data[%ld] = %g\n", k, x->data[k]);
        }
    }
#endif

    if (x->data[0] == 0.0) {
        dvzeros(x, x->length);
        return 0.0;
    } else {
        return alpha / x->data[0];
    }
}

spDVector xdvhouse(spDVector x,
                 double *x0weight /* output weight for x(0) (conversion result of x(0)) */)
{
    double tx0weight;
    spDVector v;

    v = xdvclone(x);
    tx0weight = dvhouse(v);
    
    if (x0weight != NULL) *x0weight = tx0weight;

    return v;
}

double dvcplxtorealhouse(spDVector x, double *orbeta, double*oibeta)
{
    double sqsumx_non0;
    double nrmx;
    double alpha;
    double xr0, xi0;
    double ur0, ui0;
    double rnum, inum, rden, iden, den;
    double rbeta, ibeta;

    alpha = 0.0;
    nrmx = spPreciseCplxVectorNorm(x->data, x->imag, x->length);
    if (nrmx > SP_HOUSE_MIN_NORM) {
        xr0 = x->data[0];
        if (x->imag != NULL) {
            xi0 = x->imag[0];
        } else {
            xi0 = 0.0;
        }
        alpha = -nrmx;
        if (x->data[0] < 0) alpha = -alpha;

        x->data[0] -= alpha;
        ur0 = x->data[0];
        if (x->imag != NULL) {
            ui0 = x->imag[0];
        } else {
            ui0 = 0.0;
        }

        /* for very small nrmx */
        xr0 /= nrmx;
        xi0 /= nrmx;
        ur0 /= nrmx;
        ui0 /= nrmx;

        sqsumx_non0 = 1.0 - xr0 * xr0 - xi0 * xi0;
        nrmx *= sqrt(sqsumx_non0 + ur0 * ur0 + ui0 * ui0);
        spDebug(80, "dvcplxtorealhouse", "nrmx = %g, alpha = %g, x0 = %g + %gi, u0 = %g + %gi\n",
                nrmx, alpha, xr0, xi0, ur0, ui0);

        if (nrmx > SP_HOUSE_MIN_NORM) {
            rnum = rden = sqsumx_non0 + xr0 * ur0 + xi0 * ui0;
            inum = xr0 * ui0 - xi0 * ur0;
            iden = -inum;
            den = rden * rden + iden * iden;
            rbeta = 1.0 + (rden * rnum + iden * inum) / den;
            ibeta = 2.0 * rnum * inum / den; // (rden * inum - iden * rnum) / den;
            spDebug(80, "dvcplxtorealhouse", "num = %g + %gi, den = %g + %gi, beta = %g + %gi\n",
                    rnum, inum, rden, iden, rbeta, ibeta);

            if (orbeta != NULL) *orbeta = rbeta;
            if (oibeta != NULL) *oibeta = ibeta;
        }
    }
        
    if (nrmx > SP_HOUSE_MIN_NORM) {
        dvscoper(x, "/", nrmx);
    } else {
        dvrizeros(x, x->length);
        alpha = 0.0;
        if (orbeta != NULL) *orbeta = 2.0;
        if (oibeta != NULL) *oibeta = 0.0;
    }

    return alpha;
}

static spBool dmcplxhouse(spDMatrix H /* output householder matrix */,
                          spDVector v /* input hyperplane normal vector */)
{
    long i, j;

    if (H->imag == NULL) {
        dmialloc(H);
    }

    for (i = 0; i < v->length; i++) {
        for (j = 0; j < v->length; j++) {
            if (v->imag == NULL) {
                H->data[i][j] = -2.0 * v->data[i] * v->data[j];
                H->imag[i][j] = 0.0;
            } else {
                /* -2.0 * v * v' */
                H->data[i][j] = -2.0 * (v->data[i] * v->data[j] + v->imag[i] * v->imag[j]);
                H->imag[i][j] = -2.0 * (-v->data[i] * v->imag[j] + v->imag[i] * v->data[j]);
            }
            if (i == j) {
                H->data[i][j] += 1.0;
            }

        }
    }

    return SP_TRUE;
}

spBool dmhouse(spDMatrix H /* output householder matrix */,
               spDVector v /* input hyperplane normal vector */)
{
    long i, j;
    
    if (v->length > H->row || v->length > H->col) {
        return SP_FALSE;
    }

    if (v->imag != NULL) {
        return dmcplxhouse(H, v);
    }

    for (i = 0; i < v->length; i++) {
        for (j = 0; j < v->length; j++) {
            H->data[i][j] = -2.0 * v->data[i] * v->data[j];
            if (i == j) {
                H->data[i][j] += 1.0;
            }
        }
    }

    return SP_TRUE;
}

spDMatrix xdmhouse(/* output householder matrix */
                 spDVector v /* input hyperplane normal vector */)
{
    spDMatrix H;
    
    if (v->imag != NULL) {
        H = xdmrialloc(v->length, v->length);
    } else {
        H = xdmalloc(v->length, v->length);
    }
    dmhouse(H, v);

    return H;
}

static void dmhesscore(spDMatrix A /* input/output */, spDMatrix B /* buffer only */, spDVector x /* buffer only */,
                       spDMatrix P /* inputA=P*outputA*P' */)
{
    long j, k, l, m, u, n;
    long row_offset, col_offset;
    long nloop;
    long xlen;
    double wre, wim;
    double bre, bim;
    double pre, pim;
    
    n = A->row;
    nloop = n - 2;
    
    dmeye(P);

    for (j = 0; j < nloop; j++) {
        xlen = n - (j + 1);
        spDebug(80, "dmhesscore", "j = %ld, xlen = %ld\n", j, xlen);
        dmcutcol(A, j, j + 1, xlen, x);
        x->length = xlen;
        //dvdump(x);
        dvhouse(x);
        //dvdump(x);

        for (k = 0; k <= j; k++) {
            for (l = 0; l < n; l++) {
                B->data[k][l] = A->data[k][l];
                if (A->imag != NULL) {
                    B->imag[k][l] = A->imag[k][l];
                }
            }
        }
        for (k = j + 1; k < n; k++) {
            u = k - (j + 1);
            if (A->imag != NULL) {
                for (l = 0; l < n; l++) {
                    bre = bim = 0.0;
                    for (m = 0; m < xlen; m++) {
                        /* -2*x*x' */
                        wre = -2.0 * (x->data[u] * x->data[m] + x->imag[u] * x->imag[m]);
                        wim = -2.0 * (-x->data[u] * x->imag[m] + x->imag[u] * x->data[m]);
                        if (u == m) {
                            wre += 1.0;
                        }
                        spDebug(100, "dmhesscore", "first loop: w[%ld][%ld] = %f + %fi\n", u, m, wre, wim);
                        /* W*A */
                        bre += wre * A->data[j + 1 + m][l] - wim * A->imag[j + 1 + m][l];
                        bim += wre * A->imag[j + 1 + m][l] + wim * A->data[j + 1 + m][l];
                    }
                    B->data[k][l] = bre;
                    B->imag[k][l] = bim;
                }
            } else {
                for (l = 0; l < n; l++) {
                    bre = 0.0;
                    for (m = 0; m < xlen; m++) {
                        wre = -2.0 * x->data[u] * x->data[m];
                        if (u == m) {
                            wre += 1.0;
                        }
                        /* W*A */
                        bre += wre * A->data[j + 1 + m][l];
                    }
                    B->data[k][l] = bre;
                }
            }
        }
        //dmdump(B);
        
        for (k = 0; k < n; k++) {
            /* since B->data[k][0] (and B->imag[k][0]) is used for buffer */
            A->data[k][0] = B->data[k][0];
            if (A->imag != NULL) {
                A->imag[k][0] = B->imag[k][0];
            }
        }

        col_offset = j + 1;
        
        for (k = 0; k < n; k++) {
            row_offset = k - 1;
            
            for (l = 1; l <= j; l++) {
                A->data[k][l] = B->data[k][l];
                if (A->imag != NULL) {
                    A->imag[k][l] = B->imag[k][l];
                }
            }
            if (A->imag != NULL) {
                for (l = col_offset; l < n; l++) {
                    u = l - col_offset;
                    bre = bim = pre = pim = 0.0;
                    for (m = 0; m < xlen; m++) {
                        /* -2*x*x' */
                        wre = -2.0 * (x->data[m] * x->data[u]  + x->imag[m] * x->imag[u]);
                        wim = -2.0 * (-x->data[m] * x->imag[u] + x->imag[m] * x->data[u]);
                        if (u == m) {
                            wre += 1.0;
                        }
                        spDebug(100, "dmhesscore", "second loop: w[%ld][%ld] = %f + %fi\n", m, u, wre, wim);

                        /* B*W */
                        bre += B->data[k][col_offset + m] * wre - B->imag[k][col_offset + m] * wim;
                        bim += B->data[k][col_offset + m] * wim + B->imag[k][col_offset + m] * wre;
                        if (P != NODATA) {
                            /* P*W */
                            pre += P->data[k][col_offset + m] * wre - P->imag[k][col_offset + m] * wim;
                            pim += P->data[k][col_offset + m] * wim + P->imag[k][col_offset + m] * wre;
                        }
                    }
                    A->data[k][l] = bre;
                    A->imag[k][l] = bim;
                    if (P != NODATA) {
                        if (row_offset < 0) {
                            B->data[l][0] = pre;
                            B->imag[l][0] = pim;
                        } else {
                            B->data[row_offset][l] = pre;
                            B->imag[row_offset][l] = pim;
                        }
                    }
                }
            } else {
                for (l = col_offset; l < n; l++) {
                    u = l - col_offset;
                    bre = pre = 0.0;
                    for (m = 0; m < xlen; m++) {
                        wre = -2.0 * x->data[u] * x->data[m];
                        if (u == m) {
                            wre += 1.0;
                        }
                        /* B*W */
                        bre += B->data[k][col_offset + m] * wre;
                        if (P != NODATA) {
                            /* P*W */
                            pre += P->data[k][col_offset + m] * wre;
                        }
                    }
                    A->data[k][l] = bre;
                    if (P != NODATA) {
                        if (row_offset < 0) {
                            B->data[l][0] = pre;
                        } else {
                            B->data[row_offset][l] = pre;
                        }
                    }
                }
            }
            
            if (P != NODATA) {
                for (l = col_offset; l < n; l++) {
                    if (row_offset < 0) {
                        P->data[k][l] = B->data[l][0];
                        if (A->imag != NULL) {
                            P->imag[k][l] = B->imag[l][0];
                        }
                    } else {
                        P->data[k][l] = B->data[row_offset][l];
                        if (A->imag != NULL) {
                            P->imag[k][l] = B->imag[row_offset][l];
                        }
                    }
                }
            }
        }

        //dmdump(A);
        //dmdump(P);
    }

    return;
}

spBool dmhess(spDMatrix A /* input/output */, spDMatrix P /* inputA=P*outputA*P' */)
{
    spDMatrix B;
    spDVector x;
    
    if (A->row != A->col || (P != NODATA && (P->row < A->row || P->col < A->col))) {
        return SP_FALSE;
    }
    
    if (A->imag != NULL) {
        B = xdmrialloc(A->row, A->col);
        x = xdvrialloc(A->row);
    } else {
        B = xdmalloc(A->row, A->col);
        x = xdvalloc(A->row);
    }
    
    dmhesscore(A, B, x, P);

    xdmfree(B);
    xdvfree(x);

    return SP_TRUE;
}

spDMatrix xdmhess(spDMatrix A, spDMatrix *xoP /* A=P*output*P' */)
{
    spDMatrix H;
    spDMatrix P = NODATA;
    
    if (A->row != A->col) {
        return NODATA;
    }
    
    if (xoP != NULL) {
        if (A->imag != NULL) {
            P = xdmrialloc(A->row, A->col);
        } else {
            P = xdmalloc(A->row, A->col);
        }
        *xoP = P;
    }

    H = xdmclone(A);
    
    dmhess(H, P);

    return H;
}

static void dmbidiagcore(spDMatrix A /* input/output */, spDMatrix B /* buffer only */, spDVector x /* buffer only */,
                         spDMatrix U, spDMatrix V, spDMatrix UVbuf)
{
    long j, k, l, m, u, n;
    long row;
    long xlen;
    long offset;
    long maxrowcol;
    double xrr, xii, xri, xir;
    double wre, wim;
    double bre, bim;
    double ure, uim;
    double vre, vim;
    
    n = A->col;
    maxrowcol = MAX(A->row, A->col);

    if (U != NODATA) {
        dmeye(U);
    }
    if (V != NODATA) {
        dmeye(V);
    }
    
    for (j = 0; j < n; j++) {
        xlen = A->row - j;
        spDebug(80, "dmbidiagcore", "first: j = %ld, xlen = %ld\n", j, xlen);
        x->length = xlen;
        dmcutcol(A, j, j, xlen, x);
        //dvdump(x);
        dvhouse(x);
        //dvdump(x);

        if (U != NODATA) {
            for (l = 0; l < A->row; l++) {
                for (m = 0; m < xlen; m++) {
                    UVbuf->data[l][j + m] = U->data[l][j + m];
                    if (U->imag != NULL) {
                        UVbuf->imag[l][j + m] = U->imag[l][j + m];
                    }
                }
            }
        }

        row = MIN(j, A->row);
        for (k = 0; k < row; k++) {
            for (l = 0; l < A->col; l++) {
                B->data[k][l] = A->data[k][l];
                if (A->imag != NULL) {
                    B->imag[k][l] = A->imag[k][l];
                }
            }
        }
        for (k = j; k < A->row; k++) {
            u = k - j;
            if (A->imag != NULL) {
                for (l = 0; l < maxrowcol; l++) {
                    bre = bim = 0.0;
                    ure = uim = 0.0;
                    for (m = 0; m < xlen; m++) {
                        /* -2*x*x' */
                        xrr = x->data[u] * x->data[m];
                        xii = x->imag[u] * x->imag[m];
                        xri = x->data[u] * x->imag[m];
                        xir = x->imag[u] * x->data[m];
                        wre = -2.0 * (xrr + xii);
                        wim = -2.0 * (-xri + xir);
                        if (u == m) {
                            wre += 1.0;
                        }
                        spDebug(100, "dmbidiagcore", "first loop: w[%ld][%ld] = %f + %fi\n", u, m, wre, wim);

                        if (l < A->col) {
                            /* W*A */
                            bre += wre * A->data[j + m][l] - wim * A->imag[j + m][l];
                            bim += wre * A->imag[j + m][l] + wim * A->data[j + m][l];
                        }
                        
                        if (U != NODATA && l < A->row) {
                            /* U*W */
                            ure += UVbuf->data[l][j + m] * wre - UVbuf->imag[l][j + m] * wim;
                            uim += UVbuf->data[l][j + m] * wim + UVbuf->imag[l][j + m] * wre;
                        }
                    }
                    if (l < A->col) {
                        B->data[k][l] = bre;
                        B->imag[k][l] = bim;
                    }
                    if (U != NODATA && l < A->row) {
                        U->data[l][k] = ure;
                        U->imag[l][k] = uim;
                    }
                }
            } else {
                for (l = 0; l < maxrowcol; l++) {
                    bre = 0.0;
                    ure = 0.0;
                    for (m = 0; m < xlen; m++) {
                        wre = -2.0 * x->data[u] * x->data[m];
                        if (u == m) {
                            wre += 1.0;
                        }
                        
                        if (l < A->col) {
                            bre += wre * A->data[j + m][l];
                        }
                        if (U != NODATA && l < A->row) {
                            ure += UVbuf->data[l][j + m] * wre;
                        }
                    }
                    if (l < A->col) {
                        B->data[k][l] = bre;
                    }
                    if (U != NODATA && l < A->row) {
                        U->data[l][k] = ure;
                    }
                }
            }
        }
        //dmdump(B);

        if (j < n - 2) {
            offset = j + 1;
            xlen = A->col - offset;
            spDebug(80, "dmbidiagcore", "second: j = %ld, offset = %ld, xlen = %ld\n", j, offset, xlen);
            x->length = xlen;
            dmcutrow(B, j, offset, xlen, x);
            //dvdump(x);
            dvhouse(x);
            //dvdump(x);
        
            if (V != NODATA) {
                for (k = 0; k < A->col; k++) {
                    for (m = 0; m < xlen; m++) {
                        UVbuf->data[k][offset + m] = V->data[k][offset + m];
                        if (V->imag != NULL) {
                            UVbuf->imag[k][offset + m] = V->imag[k][offset + m];
                        }
                    }
                }
            }
        
            for (k = 0; k < maxrowcol; k++) {
                if (k < A->row) {
                    for (l = 0; l <= j; l++) {
                        A->data[k][l] = B->data[k][l];
                        if (A->imag != NULL) {
                            A->imag[k][l] = B->imag[k][l];
                        }
                    }
                }
                
                if (A->imag != NULL) {
                    for (l = offset; l < A->col; l++) {
                        u = l - offset;
                        bre = bim = 0.0;
                        vre = vim = 0.0;
                        for (m = 0; m < xlen; m++) {
                            /* -2*x*x' */
                            xrr = x->data[u] * x->data[m];
                            xii = x->imag[u] * x->imag[m];
                            xri = x->data[u] * x->imag[m];
                            xir = x->imag[u] * x->data[m];
                            wre = -2.0 * (xrr + xii);
                            wim = -2.0 * (-xri + xir);
                            if (u == m) {
                                wre += 1.0;
                            }
                            spDebug(100, "dmbidiagcore", "second loop: w[%ld][%ld] = %f + %fi\n", u, m, wre, wim);

                            if (k < A->row) {
                                /* B*W' */
                                bre += B->data[k][offset + m] * wre - B->imag[k][offset + m] * wim;
                                bim += B->data[k][offset + m] * wim + B->imag[k][offset + m] * wre;
                            }
                            if (V != NODATA && k < A->col) {
                                /* V*W */
                                vre += UVbuf->data[k][offset + m] * wre - UVbuf->imag[k][offset + m] * (-wim);
                                vim += UVbuf->data[k][offset + m] * (-wim) + UVbuf->imag[k][offset + m] * wre;
                            }
                        }
                        if (k < A->row) {
                            A->data[k][l] = bre;
                            A->imag[k][l] = bim;
                            spDebug(100, "dmbidiagcore", "second loop: A[%ld][%ld] = %f + %fi\n",
                                    k, l, A->data[k][l], A->imag[k][l]);
                        }
                        if (V != NODATA && k < A->col) {
                            V->data[k][l] = vre;
                            V->imag[k][l] = vim;
                        }
                    }
                } else {
                    for (l = offset; l < A->col; l++) {
                        u = l - offset;
                        bre = 0.0;
                        vre = 0.0;
                        for (m = 0; m < xlen; m++) {
                            wre = -2.0 * x->data[u] * x->data[m];
                            if (u == m) {
                                wre += 1.0;
                            }
                            spDebug(100, "dmbidiagcore", "second loop: w[%ld][%ld] = %f\n", u, m, wre);
                            
                            if (k < A->row) {
                                bre += B->data[k][offset + m] * wre;
                            }
                            if (V != NODATA && k < A->col) {
                                vre += UVbuf->data[k][offset + m] * wre;
                            }
                        }
                        if (k < A->row) {
                            A->data[k][l] = bre;
                        }
                        if (V != NODATA && k < A->col) {
                            V->data[k][l] = vre;
                        }
                    }
                }
            }

            //dmdump(A);
            //dmdump(V);
        } else {
            dmcopy(A, B);
        }
    }

#if 0
    {
        for (i = 0; i < n; i++) {
            spDebug(10, "bmbidiagcore", "result diag %ld = %g\n", i, A->data[i][i]);
        }
        for (i = 0; i < n - 1; i++) {
            spDebug(10, "bmbidiagcore", "result supdiag %ld = %g\n", i, A->data[i][i + 1]);
        }
    }
#endif

    //dmdump(A);
    
    return;
}

spBool dmbidiag(spDMatrix A /* input/output */, spDMatrix U, spDMatrix V) /* inputA = U*outputA*V'. U and V can be NODATA */
{
    spDMatrix B;
    spDMatrix UVbuf;
    spDVector x;
    
    if ((U != NODATA && (U->col < A->row || U->row < A->row))
        || (V != NODATA && (V->row < A->col || V->col < A->col))) {
        return SP_FALSE;
    }

    UVbuf = NODATA;
    
    if (A->imag != NULL) {
        B = xdmrialloc(A->row, A->col);
        x = xdvrialloc(MAX(A->row, A->col));
        if (U != NODATA || V != NODATA) {
            UVbuf = xdmrialloc(x->length, x->length);
        }
    } else {
        B = xdmalloc(A->row, A->col);
        x = xdvalloc(MAX(A->row, A->col));
        if (U != NODATA || V != NODATA) {
            UVbuf = xdmalloc(x->length, x->length);
        }
    }
    
    dmbidiagcore(A, B, x, U, V, UVbuf);

    if (UVbuf != NODATA) xdmfree(UVbuf);
    xdmfree(B);
    xdvfree(x);

    return SP_TRUE;
}

spDMatrix xdmbidiag(spDMatrix A, spDMatrix *xoU, spDMatrix *xoV) /* inputA = U*outputA*V'. xoU and xoV can be NULL, must be freed */
{
    spDMatrix H;
    spDMatrix U = NODATA;
    spDMatrix V = NODATA;
    
    if (xoU != NULL) {
        if (A->imag != NULL) {
            U = xdmrialloc(A->row, A->row);
        } else {
            U = xdmalloc(A->row, A->row);
        }
        *xoU = U;
    }
    if (xoV != NULL) {
        if (A->imag != NULL) {
            V = xdmrialloc(A->col, A->col);
        } else {
            V = xdmalloc(A->col, A->col);
        }
        *xoV = V;
    }

    H = xdmclone(A);
    
    dmbidiag(H, U, V);

    return H;
}

spBool dmqr(spDMatrix A, spDMatrix Q, spDMatrix R)
{
    long i, j, p;
    long n;
    long k;
    long loop_end;
    long maxrowcol;
    double rbeta, ibeta;
    double v, a, u;
    double tre, tim;
    double vre, vim, are, aim, ure, uim;
    spDVector x;
    spDMatrix Rp, Qp;
    spBool cplx_flag;

    if (A == NODATA || Q == NODATA || R == NODATA
        || Q->row < A->row || Q->col < A->row 
        || R->row < A->row || R->col < A->col) return SP_FALSE;

    n = A->row;
    maxrowcol = MAX(A->row, A->col);

    if (/*1 ||*/ A->imag != NULL) {
#if 0
        if (A->imag == NULL) {
            dmizeros(A, 0, 0);
        }
#endif
        cplx_flag = SP_TRUE;
        if (Q->imag == NULL) dmizeros(Q, 0, 0);
        if (R->imag == NULL) dmizeros(R, 0, 0);
        Rp = xdmrialloc(n, A->col);
        Qp = xdmrialloc(n, n);
        x = xdvrialloc(n);

        loop_end = n - 1;
    } else {
        cplx_flag = SP_FALSE;
        Rp = xdmalloc(n, A->col);
        Qp = xdmalloc(n, n);
        x = xdvalloc(n);
        
        loop_end = n - 2;
    }

    dmeye(Q);
    dmcopy(R, A);
    
    for (k = 0; k <= loop_end; k++) {
        spDebug(100, "dmqr", "k = %ld\n", k);
	dmcopy(Rp, R);
	dmcopy(Qp, Q);
	
        x->length = n - k;
        dmcutcol(Rp, k, k, n - k, x);
	//dvdump(x);

        rbeta = ibeta = 0.0;
        
        if (cplx_flag) {
            dvcplxtorealhouse(x, &rbeta, &ibeta);
        } else {
            dvhouse(x);
        }

	for (i = k; i < n; i++) {
            if (cplx_flag) {
                for (j = 0; j < maxrowcol; j++) {
                    are = aim = 0.0;
                    ure = uim = 0.0;

                    for (p = 0; p < x->length; p++) {
                        /* -beta * x * x' */
                        tre = x->data[i - k] * x->data[p] + x->imag[i - k] * x->imag[p];
                        tim = -x->data[i - k] * x->imag[p] + x->imag[i - k] * x->data[p];
                        vre = -(rbeta * tre - ibeta * tim);
                        vim = -(rbeta * tim + ibeta * tre);
                        if (p + k == i) {
                            vre += 1.0;
                        }
                        spDebug(100, "dmqr", "%ld, %ld: k = %ld, p = %ld / %ld, t = %f + %fi, v = %f + %fi\n",
                                i, j, k, p, x->length, tre, tim, vre, vim);
		    
                        if (j >= k && j < R->col) {
                            /* P * Rp */
                            are += vre * Rp->data[k + p][j] - vim * Rp->imag[k + p][j];
                            aim += vre * Rp->imag[k + p][j] + vim * Rp->data[k + p][j];
                        }
                        if (j < Q->col) {
                            /* Qp * P */
                            spDebug(100, "dmqr", "%ld, %ld: k = %ld, p = %ld / %ld, Q(%ld, %ld) = %f + %fi, v = %f + %fi\n",
                                    i, j, k, p, x->length, k + p, j, Qp->data[k + p][j], Qp->imag[k + p][j], vre, vim);
                            ure += Qp->data[k + p][j] * vre - Qp->imag[k + p][j] * vim;
                            uim += Qp->data[k + p][j] * vim + Qp->imag[k + p][j] * vre;
                        }
                    }
                    
                    spDebug(100, "dmqr", "i = %ld, j = %ld, a = %f + %fi, u = %f + %fi\n",
                            i, j, are, aim, ure, uim);
		
                    if (j >= k && j < R->col) {
                        R->data[i][j] = are;
                        R->imag[i][j] = aim;
                    }
                    if (j < Q->col) {
                        Q->data[i][j] = ure;
                        Q->imag[i][j] = uim;
                    }
                }
            } else {
                for (j = 0; j < maxrowcol; j++) {
                    a = 0.0;
                    u = 0.0;

                    for (p = 0; p < x->length; p++) {
                        v = -2.0 * x->data[i - k] * x->data[p];
                        if (p + k == i) {
                            v += 1.0;
                        }
		    
                        if (j >= k && j < R->col) {
                            a += v * Rp->data[k + p][j];
                        }
                        if (j < Q->col) {
                            u += v * Qp->data[k + p][j];
                        }
                    }
		
                    if (j >= k && j < R->col) {
                        R->data[i][j] = a;
                    }
                    if (j < Q->col) {
                        Q->data[i][j] = u;
                    }
                }
            }
	}
#if 0
	//dmdump(R);
	dmdump(Q);
#endif
    }

    xdmfree(Rp);
    xdmfree(Qp);
    xdvfree(x);

    dmconjtranspose(Q);

    return SP_TRUE;
}

/* reference: http://www.cs.cornell.edu/~bindel/class/cs6210-f09/lec29.pdf */
static void dsqrpolycplx(spDMatrix H, long n, double *br, double *bi, double *cr, double *ci)
{
    double trHHre, trHHim, detHHre, detHHim;
    
    trHHre = H->data[n - 2][n - 2] + H->data[n - 1][n - 1];
    trHHim = H->imag[n - 2][n - 2] + H->imag[n - 1][n - 1];
    
    detHHre = (H->data[n - 2][n - 2] * H->data[n - 1][n - 1] - H->imag[n - 2][n - 2] * H->imag[n - 1][n - 1])
        - (H->data[n - 2][n - 1] * H->data[n - 1][n - 2] - H->imag[n - 2][n - 1] * H->imag[n - 1][n - 2]);
    detHHim = (H->data[n - 2][n - 2] * H->imag[n - 1][n - 1] + H->imag[n - 2][n - 2] * H->data[n - 1][n - 1])
        - (H->data[n - 2][n - 1] * H->imag[n - 1][n - 2] + H->imag[n - 2][n - 1] * H->data[n - 1][n - 2]);

    *br = -trHHre;
    *bi = -trHHim;
    *cr = detHHre;
    *ci = detHHim;
    
    return;
}

static void dsqrpolyreal(spDMatrix H, long n, double *b, double *c, spBool *nonreal_flag)
{
    double value;
    double trHH, detHH;
    double lHH1, lHH2;

    trHH = H->data[n - 2][n - 2] + H->data[n - 1][n - 1];
    detHH = H->data[n - 2][n - 2] * H->data[n - 1][n - 1] - H->data[n - 2][n - 1] * H->data[n - 1][n - 2];
    spDebug(80, "dsqrpolyreal", "n = %ld, trHH = %f, detHH = %f\n", n, trHH, detHH);
    //dmdump(H);

    if (trHH * trHH > 4.0 * detHH) { /*% Real eigenvalues
                                      *% Use the one closer to H(n,n) */
        value = sqrt(trHH * trHH - 4.0 * detHH);
        lHH1 = (trHH + value) / 2.0;
        lHH2 = (trHH - value) / 2.0;
        spDebug(80, "dsqrpolyreal", "real eigenvalues: original lHH1 = %f, lHH2 = %f, H->data[%ld][%ld] = %f\n",
                lHH1, lHH2, n - 1, n - 1, H->data[n - 1][n - 1]);

        if (FABS(lHH1 - H->data[n - 1][n - 1]) < FABS(lHH2 - H->data[n - 1][n - 1])) {
            lHH2 = lHH1;
        } else {
            lHH1 = lHH2;
        }
        spDebug(80, "dsqrpolyreal", "real eigenvalues: modified lHH1 = %f, lHH2 = %f\n", lHH1, lHH2);
        
        /*% z^2 + bz + c = (z-sigma_1)(z-sigma_2) */
        *b = -lHH1 - lHH2;
        *c = lHH1 * lHH2;
        if (nonreal_flag != NULL) *nonreal_flag = SP_FALSE;
        spDebug(80, "dsqrpolyreal", "real eigenvalues: b = %f, c = %f\n", *b, *c);
    } else {
        /*% In the complex case, we want the char poly for HH */
        *b = -trHH; /* -(summation of eigenvalues) */
        *c = detHH; /* product of eigenvalues */
        if (nonreal_flag != NULL) *nonreal_flag = SP_TRUE;
        spDebug(80, "dsqrpolyreal", "complex eigenvalues: b = %f, c = %f\n", *b, *c);
    }
    
    return;
}

static void dsqrpoly(spDMatrix H, long n, double *br, double *bi, double *cr, double *ci, spBool *nonreal_flag)
{
    if (H->imag != NULL
        && (FABS(H->imag[n - 2][n - 2]) >= SP_EPSILON || H->imag[n - 1][n - 1] >= SP_EPSILON)) {
        dsqrpolycplx(H, n, br, bi, cr, ci);
        if (nonreal_flag != NULL) *nonreal_flag = SP_TRUE;
    } else {
        dsqrpolyreal(H, n, br, cr, nonreal_flag);
        *bi = *ci = 0.0;
    }

    return;
}

/*
% Implicit QR step using a Francis double shift
% (there should really be some re-scalings for floating point)
% Compute double-shift poly and initial column of H^2 + b*H + c*I
*/
static void dsqrstepcore(spDMatrix ioH, spDMatrix ioQ, long n, spDMatrix P, spDVector v, spDVector vc,
                         long offset)
{
    long i, j, u, m;
    long n2;
    long end;
    double Hr, Hi;
    double Qr, Qi;
    double Wr, Wi;
    double Hbufr[3], Hbufi[3];
    double Qbufr[3], Qbufi[3];
    
    spDebug(100, "dsqrstepcore", "in\n");
    //dmdump(ioH);
    
    if (ioH->imag != NULL) {
        vc->length = v->length;
        dvcopy(vc, v);
        dvconj(vc);
        dmvvtimes(P, v, vc);
    } else {
        dmvvtimes(P, v, v);
    }
    spDebug(100, "dsqrstepcore", "after dmvvtimes\n");
    //dmdump(P);

    end = offset + v->length - 1;

    /* H(j+1:k,:) = W * H(j+1:k,:); */
    for (j = 0; j < n; j++) {
        for (u = 0; u < v->length; u++) {
            Hbufr[u] = ioH->data[offset + u][j];
            if (ioH->imag != NULL) {
                Hbufi[u] = ioH->imag[offset + u][j];
            } else {
                Hbufi[u] = 0.0;
            }
        }
        
        for (i = offset; i <= end; i++) {
            m = i - offset;
            Hr = Hi = 0;
            if (ioH->imag != NULL) {
                for (u = 0; u < v->length; u++) {
                    Wr = -2.0 * P->data[m][u];
                    Wi = -2.0 * P->imag[m][u];
                    if (u == m) {
                        Wr += 1.0;
                    }
                    Hr += Wr * Hbufr[u] - Wi * Hbufi[u];
                    Hi += Wr * Hbufi[u] + Wi * Hbufr[u];
                }
                ioH->data[i][j] = Hr;
                ioH->imag[i][j] = Hi;
            } else {
                for (u = 0; u < v->length; u++) {
                    Wr = -2.0 * P->data[m][u];
                    if (u == m) {
                        Wr += 1.0;
                    }
                    Hr += Wr * Hbufr[u];
                }
                ioH->data[i][j] = Hr;
            }
        }
    }
    spDebug(100, "dsqrstepcore", "after first loop\n");
    //dmdump(ioH);

    if (ioQ != NODATA) {
        n2 = MAX(n, ioQ->row);
    } else {
        n2 = n;
    }
    
    /* H(:,j+1:k) = H(:,j+1:k) * W'; */
    for (i = 0; i < n2; i++) {
        for (u = 0; u < v->length; u++) {
            if (i < n) {
                Hbufr[u] = ioH->data[i][offset + u];
                if (ioH->imag != NULL) {
                    Hbufi[u] = ioH->imag[i][offset + u];
                } else {
                    Hbufi[u] = 0.0;
                }
            }
            if (ioQ != NODATA) {
                Qbufr[u] = ioQ->data[i][offset + u];
                if (ioQ->imag != NULL) {
                    Qbufi[u] = ioQ->imag[i][offset + u];
                } else {
                    Qbufi[u] = 0.0;
                }
            }
        }
        
        for (j = offset; j <= end; j++) {
            m = j - offset;
            Hr = Hi = 0;
            Qr = Qi = 0;
            if (ioH->imag != NULL) {
                for (u = 0; u < v->length; u++) {
                    Wr = -2.0 * P->data[m][u];
                    Wi = 2.0 * P->imag[m][u]; /* conjugate transpose */
                    if (u == m) {
                        Wr += 1.0;
                    }
                    if (i < n) {
                        Hr += Hbufr[u] * Wr - Hbufi[u] * Wi;
                        Hi += Hbufr[u] * Wi + Hbufi[u] * Wr;
                    }
                    if (ioQ != NODATA) {
                        Qr += Qbufr[u] * Wr - Qbufi[u] * Wi;
                        Qi += Qbufr[u] * Wi + Qbufi[u] * Wr;
                    }
                }
                if (i < n) {
                    ioH->data[i][j] = Hr;
                    ioH->imag[i][j] = Hi;
                }
                if (ioQ != NODATA) {
                    ioQ->data[i][j] = Qr;
                    ioQ->imag[i][j] = Qi;
                }
            } else {
                for (u = 0; u < v->length; u++) {
                    Wr = -2.0 * P->data[m][u];
                    if (u == m) {
                        Wr += 1.0;
                    }
                    if (i < n) {
                        Hr += Hbufr[u] * Wr;
                    }
                    if (ioQ != NODATA) {
                        Qr += Qbufr[u] * Wr;
                    }
                }
                if (i < n) {
                    ioH->data[i][j] = Hr;
                }
                if (ioQ != NODATA) {
                    ioQ->data[i][j] = Qr;
                }
            }
        }
    }

    spDebug(100, "dsqrstepcore", "after second loop\n");
    //dmdump(ioH);

#if 0
    if (offset >= 1) {
        ioH->data[end][offset - 1] = 0.0;
        if (ioH->imag != NULL) {
            ioH->imag[end][offset - 1] = 0.0;
        }
    }
#endif

    return;
}

static void dsqrstep(spDMatrix ioH, spDMatrix ioQ, long n, spDMatrix P, spDVector v, spDVector vc, spBool *poly_nonreal_flag)
{
    long i, j, k;
    double br, bi, cr, ci;

    v->length = MIN(3, n);

    dsqrpoly(ioH, n, &br, &bi, &cr, &ci, poly_nonreal_flag);
    spDebug(50, "dsqrstep", "b = %g + %gi, c = %g + %gi, n = %ld, v->length = %ld\n", br, bi, cr, ci,
            n, v->length);

    if (CABS(br, bi) < SP_EPSILON && CABS(cr, ci) < SP_EPSILON) {
        spDebug(50, "dsqrstep", "b and c is too small: b = %g + %gi, c = %g + %gi\n", br, bi, cr, ci);
        dvnums(v, v->length, 1.0 / sqrt((double)v->length));
    } else {
        for (i = 0; i < v->length; i++) {
            v->data[i] = 0.0;
            if (v->imag != NULL) {
                v->imag[i] = 0.0;
            }
            for (j = 0; j < 2; j++) {
                if (ioH->imag != NULL) {
                    v->data[i] += ioH->data[i][j] * ioH->data[j][0] - ioH->imag[i][j] * ioH->imag[j][0];
                    v->imag[i] += ioH->data[i][j] * ioH->imag[j][0] + ioH->imag[i][j] * ioH->data[j][0];
                } else {
                    v->data[i] += ioH->data[i][j] * ioH->data[j][0];
                }
            }
        }
        for (i = 0; i < 2; i++) {
            if (ioH->imag != NULL) {
                v->data[i] += br * ioH->data[i][0] - bi * ioH->imag[i][0];
                v->imag[i] += br * ioH->imag[i][0] + bi * ioH->data[i][0];
            } else {
                v->data[i] += br * ioH->data[i][0];
            }
        }
        v->data[0] += cr;
        if (ioH->imag != NULL) {
            v->imag[0] += ci;
        }
        spDebug(100, "dsqrstep", "before first dvhouse\n");
        //dvdump(v);

        /*% Apply a similarity associated with the first step of QR on C */
        dvhouse(v);
    }

    spDebug(100, "dsqrstep", "after first dvhouse\n");
    //dvdump(v);
    
    dsqrstepcore(ioH, ioQ, n, P, v, vc, 0);

    spDebug(100, "dsqrstep", "after first dsqrstepcore\n");
    //dmdump(ioH);
    
    /*
     *% Do "bulge chasing" to return to Hessenberg form
     */
    
    for (j = 1; j <= n - 2; j++) {
        k = MIN(j + 3, n);
        v->length = k - j;
        spDebug(100, "dsqrstep", "j = %ld, k = %ld, v->length = %ld\n", j, k, v->length);
        dmcutcol(ioH, j - 1, j, v->length, (v));

        /*% -- Find W = I-2vv' to put zeros below H(j+1,j), H := WHW' */
        dvhouse(v);
        //dvdump(v);
        
        dsqrstepcore(ioH, ioQ, n, P, v, vc, j);
        spDebug(100, "dsqrstep", "j = %ld: after dsqrstepcore\n", j);
        //dmdump(ioH);
    }
    
    spDebug(80, "dsqrstep", "done: j = %ld\n", j);
    
    return;
}

static double dsqrgetpreciseabs(spDMatrix H, long r, long c)
{
    double value;
    
    if (H->imag != NULL) {
        value = spPreciseCplxAbs(H->data[r][c], H->imag[r][c]);
    } else {
        value = FABS(H->data[r][c]);
    }

    return value;
}

static spBool dsqrdeflatecheck(spDMatrix H, long n, long r, long c, double tol, double *ohabsvalue, double *otol)
{
    double habsvalue;
    spBool flag = SP_FALSE;

    if (H->imag != NULL) {
#if 1
        double href;
        href = spPreciseCplxAbs(H->data[n - 2][n - 2], H->imag[n - 2][n - 2])
            + spPreciseCplxAbs(H->data[n - 1][n - 1], H->imag[n - 1][n - 1]);
        tol *= href;
#endif
        
        habsvalue = spPreciseCplxAbs(H->data[r][c], H->imag[r][c]);
        
        if (habsvalue < tol) {
            flag = SP_TRUE;
        }
    } else {
#if 1
        double href;
        href = FABS(H->data[n - 2][n - 2]) + FABS(H->data[n - 1][n - 1]);
        tol *= href;
#endif
        
        habsvalue = FABS(H->data[r][c]);
        spDebug(100, "dsqrdeflatecheck", "|H(%ld, %ld)| = %g, tol = %g\n",
                r, c, habsvalue, tol);
        if (habsvalue < tol) {
            flag = SP_TRUE;
        }
    }

    if (ohabsvalue != NULL) *ohabsvalue = habsvalue;
    if (otol != NULL) *otol = tol;

    return flag;
}

static spDMatrix xdmdsqrloop(spDMatrix A, long max_iteration /* 0: no limit */, double tolerance,
                           spDMatrix *xoQ)
{
    long n, dn, k;
    spDMatrix H, P, Q;
    spDVector v, vc;
    spBool poly_nonreal_flag;

    n = A->row;

    Q = NODATA;

    if (A->imag != NULL) {
        H = xdmrialloc(n, n);
        if (xoQ != NULL) Q = xdmrialloc(n, n);
        P = xdmrialloc(3, 3);
        v = xdvrizeros(3);
        vc = xdvrizeros(3);
    } else {
        H = xdmalloc(n, n);
        if (xoQ != NULL) Q = xdmalloc(n, n);
        P = xdmalloc(3, 3);
        v = xdvzeros(3);
        vc = NODATA;
    }
    if (xoQ != NULL) *xoQ = Q;

    if (Q != NODATA) dmeye(Q);
    dmpastemat(H, 0, 0, A, n, n, 0);
    //dmdump(H);
    
    spDebug(50, "xdmdsqrloop", "tolerance = %f\n", tolerance);

    k = 0;
    dn = n;
    poly_nonreal_flag = SP_FALSE;

    while ((k == 0 || dn >= 2) && (max_iteration <= 0 || k < max_iteration)) {
        spDebug(100, "xdmdsqrloop", "n = %ld, dn = %ld, k = %ld, poly_nonreal_flag = %d\n",
                n, dn, k, poly_nonreal_flag);
        if (k > 0 && dn == 2 && poly_nonreal_flag == SP_TRUE) {
            spDebug(50, "xdmdsqrloop", "2x2 matrix with complex eigenvalue: n = %ld, dn = %ld, k = %ld\n",
                    n, dn, k);
            break;
        } else if (1 && dn >= 2 && k > 0 && dsqrdeflatecheck(H, dn, dn - 1, dn - 2, tolerance, NULL, NULL) == SP_TRUE) {
            spDebug(10, "xdmdsqrloop", "At step %ld (n = %ld): Deflated 1-by-1 block\n", k, dn);
#if 0
            H->data[dn - 1][dn - 2] = 0.0;
            if (H->imag != NULL) {
                H->imag[dn - 1][dn - 2] = 0.0;
            }
#endif
            dn = dn - 1;
            poly_nonreal_flag = SP_FALSE;
        } else if (1 && dn >= 3 && k > 0 && dsqrdeflatecheck(H, dn, dn - 2, dn - 3, tolerance, NULL, NULL) == SP_TRUE) {
            spDebug(10, "xdmdsqrloop", "At step %ld (n = %ld): Deflated 2-by-2 block\n", k, dn);
#if 0
            H->data[dn - 2][dn - 3] = 0.0;
            if (H->imag != NULL) {
                H->imag[dn - 2][dn - 3] = 0.0;
            }
#endif
            dn = dn - 2;
            poly_nonreal_flag = SP_FALSE;
        } else {
            poly_nonreal_flag = SP_FALSE;
            dsqrstep(H, Q, dn, P, v, vc, &poly_nonreal_flag);
            k = k + 1;
        }
#if 0
        spDebug(10, "xdmdsqrloop", "---- H ----\n");
        dmdump(H);
        spDebug(10, "xdmdsqrloop", "---- Q ----\n");
        dmdump(Q);
#endif
    }

    spDebug(50, "xdmdsqrloop", "loop end: k = %ld / %ld\n", k, max_iteration);
    
    xdmfree(P);
    xdvfree(v);
    if (vc != NODATA) xdvfree(vc);
    
    return H;
}

static void calcQuadraticFormulaSolutions(double br, double bi, double cr, double ci,
                                          double *ox1r, double *ox1i, double *ox2r, double *ox2i)
{
    double a;
    double dr, di;
    double sr, si;

    /* this function's b is '-b' */
    dr = br * br - bi * bi - 4.0 * cr;
    di = 2.0 * br * bi - 4.0 * ci;
    
    a = CABS(dr, di);
    sr = sqrt((a + dr) / 2.0);
    if (di < 0.0) {
        si = -sqrt((a - dr) / 2.0);
    } else {
        si = sqrt((a - dr) / 2.0);
    }

    *ox1r = (-br + sr) / 2.0;
    *ox1i = (-bi + si) / 2.0;

    *ox2r = (-br - sr) / 2.0;
    *ox2i = (-bi - si) / 2.0;
    
    return;
}

static void calcQuasiEigenVectorSpecialElement(double tsr, double tsi, double tdr, double tdi, 
                                               double *ovr, double *ovi)
{
    double tds, tda;

    spDebug(100, "calcQuasiEigenVectorSpecialElement", "ts = %f + %fi, td = %f + %fi\n",
            tsr, tsi, tdr, tdi);
    
    tds = tdr * tdr + tdi * tdi;
    tda = sqrt(tds);
    spDebug(100, "calcQuasiEigenVectorSpecialElement", "tds = %f, tda = %f, SP_EPSILON = %g\n",
            tds, tda, SP_EPSILON);
    
    if (tda < SP_EPSILON) {
        *ovr = *ovi = 0.0;
    } else {
        *ovr = -(tdr * tsr + tdi * tsi) / tds;
        *ovi = -(tdr * tsi - tdi * tsr) / tds;
    }

    return;
}

static spBool solve2x2LinearEquations(spDMatrix H, long i_offset, long j_offset, double lambda,
                                      double y1, double y2, double *ox1, double *ox2)
{
    double det;
    double h11, h22;

    h11 = H->data[i_offset][j_offset] - lambda;
    h22 = H->data[i_offset + 1][j_offset + 1] - lambda;
    
    det = h11 * h22 - H->data[i_offset][j_offset + 1] * H->data[i_offset + 1][j_offset];

    if (det == 0.0) return SP_FALSE;

    *ox1 = (h22 * y1 - H->data[i_offset][j_offset + 1] * y2) / det;
    *ox2 = (-H->data[i_offset + 1][j_offset] * y1 + h11 * y2) / det;

    return SP_TRUE;
}

static spBool solve2x2LinearEquationsCplx(spDMatrix H, long i_offset, long j_offset, double lambdar, double lambdai,
                                          double y1r, double y1i, double y2r, double y2i,
                                          double *ox1r, double *ox1i, double *ox2r, double *ox2i)
{
    double h11r, h11i;
    double h22r, h22i;
    double detr, deti;
    double adr, adi, bcr, bci;
    double dy1r, dy1i;
    double by2r, by2i;
    double cy1r, cy1i;
    double ay2r, ay2i;

    h11r = H->data[i_offset][j_offset] - lambdar;
    h11i = -lambdai;
    if (H->imag != NULL) {
        h11i += H->imag[i_offset][j_offset];
    }
    h22r = H->data[i_offset + 1][j_offset + 1] - lambdar;
    h22i = -lambdai;
    if (H->imag != NULL) {
        h22i += H->imag[i_offset + 1][j_offset + 1];
    }

    adr = h11r * h22r - h11i * h22i;
    adi = h11r * h22i + h11i * h22r;
    bcr = H->data[i_offset][j_offset + 1] * H->data[i_offset + 1][j_offset];
    if (H->imag != NULL) {
        bcr -= H->imag[i_offset][j_offset + 1] * H->imag[i_offset + 1][j_offset];
    }
    if (H->imag != NULL) {
        bci = H->data[i_offset][j_offset + 1] * H->imag[i_offset + 1][j_offset]
            - H->imag[i_offset][j_offset + 1] * H->data[i_offset + 1][j_offset];
    } else {
        bci = 0.0;
    }

    detr = adr - bcr;
    deti = adi - bci;

    if (detr == 0.0 && deti == 0.0) return SP_FALSE;

    dy1r = h22r * y1r - h22i * y1i;
    dy1i = h22r * y1i + h22i * y1r;
    by2r = H->data[i_offset][j_offset + 1] * y2r;
    by2i = H->data[i_offset][j_offset + 1] * y2i;
    if (H->imag != NULL) {
        by2r -= H->imag[i_offset][j_offset + 1] * y2i;
        by2i += H->imag[i_offset][j_offset + 1] * y2r;
    }
    cy1r = H->data[i_offset + 1][j_offset] * y1r;
    cy1i = H->data[i_offset + 1][j_offset] * y1i;
    if (H->imag != NULL) {
        cy1r -= H->imag[i_offset + 1][j_offset] * y1i;
        cy1i += H->imag[i_offset + 1][j_offset] * y1r;
    }
    ay2r = h11r * y2r - h11i * y2i;
    ay2i = h11r * y2i + h11i * y2r;
    
    *ox1r = dy1r - by2r;
    *ox1i = dy1i - by2i;
    *ox2r = -cy1r + ay2r;
    *ox2i = -cy1i + ay2i;

    return SP_TRUE;
}

static spBool solveQuasiTriangleLinearEquations(spDMatrix H, long n, double lambda, spDVector y, double tolerance, 
                                                spDVector x)
{
    long m, u;
    double c, c2;
    double diag;

    for (m = n - 1; m >= 0; m--) {
        spDebug(100, "solveQuasiTriangleLinearEquations", "m = %ld, n = %ld\n", m, n);
        
        if (m >= 1 && dsqrdeflatecheck(H, m + 1, m, m - 1, tolerance, NULL, NULL) == SP_FALSE) {
            c = c2 = 0.0;
            if (m < n - 1) {
                for (u = m + 1; u < n; u++) {
                    c += H->data[m - 1][u] * x->data[u];
                    c2 += H->data[m][u] * x->data[u];
                }
            }
            spDebug(100, "solveQuasiTriangleLinearEquations", "y->data[%ld] = %f, c = %f, y->data[%ld] = %f, c2 = %f\n",
                    m - 1, y->data[m - 1], c, m, y->data[m], c2);

            if (solve2x2LinearEquations(H, m - 1, m - 1, lambda, y->data[m - 1] - c, y->data[m] - c2,
                                        &x->data[m - 1], &x->data[m]) == SP_FALSE) {
                spDebug(10, "solveQuasiTriangleLinearEquations", "solve2x2LinearEquations failed\n");
                return SP_FALSE;
            }
            spDebug(100, "solveQuasiTriangleLinearEquations", "x->data[%ld] = %f, x->data[%ld] = %f\n",
                    m - 1, x->data[m - 1], m, x->data[m]);

            --m;
        } else {/* subdiagonal is small enough */
            diag = H->data[m][m] - lambda;
            spDebug(100, "solveQuasiTriangleLinearEquations", "T[%ld][%ld] = %f, y->data[%ld] = %f\n",
                    m, m, diag, m, y->data[m]);

            if (diag == 0.0) return SP_FALSE;
            
            if (m == n - 1) {
                x->data[m] = y->data[m] / diag;
            } else {
                for (u = m + 1, c = 0.0; u < n; u++) {
                    c += H->data[m][u] * x->data[u];
                }
                x->data[m] = (y->data[m] - c) / diag;
            }
            spDebug(100, "solveQuasiTriangleLinearEquations", "x->data[%ld] = %f\n",
                    m, x->data[m]);
        }
    }
    
    return SP_TRUE;
}

static spBool solveQuasiTriangleLinearEquationsCplx(spDMatrix H, long n, double lambdar, double lambdai, spDVector y, 
                                                    double tolerance, spDVector x)
{
    long m, u;
    double cr, ci, c2r, c2i;
    double diagr, diagi;
    double den, numr, numi;

    for (m = n - 1; m >= 0; m--) {
        if (m >= 1 && dsqrdeflatecheck(H, m + 1, m, m - 1, tolerance, NULL, NULL) == SP_FALSE) {
            cr = ci = c2r = c2i = 0.0;
            if (m < n - 1) {
                for (u = m + 1; u < n; u++) {
                    if (H->imag != NULL) {
                        cr += H->data[m - 1][u] * x->data[u] - H->imag[m - 1][u] * x->imag[u];
                        ci += H->data[m - 1][u] * x->imag[u] + H->imag[m - 1][u] * x->data[u];
                    
                        c2r += H->data[m][u] * x->data[u] - H->imag[m][u] * x->imag[u];
                        c2i += H->data[m][u] * x->imag[u] + H->imag[m][u] * x->data[u];
                    } else {
                        cr += H->data[m - 1][u] * x->data[u];
                        ci += H->data[m - 1][u] * x->imag[u];
                    
                        c2r += H->data[m][u] * x->data[u];
                        c2i += H->data[m][u] * x->imag[u];
                    }
                }
            }

            solve2x2LinearEquationsCplx(H, m - 1, m - 1, lambdar, lambdai,
                                        y->data[m - 1] - cr, y->imag[m - 1] - ci, y->data[m] - c2r, y->imag[m] - c2i,
                                        &x->data[m - 1], &x->imag[m - 1], &x->data[m], &x->imag[m]);

            --m;
        } else {/* subdiagonal is small enough */
            diagr = H->data[m][m] - lambdar;
            diagi = -lambdai;
            if (H->imag != NULL) {
                diagi += H->imag[m][m];
            }

            if (diagr == 0.0 && diagi == 0.0) return SP_FALSE;

            den = diagr * diagr + diagi * diagi;
            
            if (m == n - 1) {
                x->data[m] = (diagr * y->data[m] + diagi * y->imag[m]) / den;
                x->imag[m] = (diagr * y->imag[m] - diagi * y->data[m]) / den;
            } else {
                for (u = m + 1, cr = ci = 0.0; u < n; u++) {
                    if (H->imag != NULL) {
                        cr += H->data[m][u] * x->data[u] - H->imag[m][u] * x->imag[u];
                        ci += H->data[m][u] * x->imag[u] + H->imag[m][u] * x->data[u];
                    } else {
                        cr += H->data[m][u] * x->data[u];
                        ci += H->data[m][u] * x->imag[u];
                    }
                }
                numr = y->data[m] - cr;
                numi = y->imag[m] - ci;
                x->data[m] = (diagr * numr + diagi * numi) / den;
                x->imag[m] = (diagr * numi - diagi * numr) / den;
            }
        }
    }
    
    return SP_TRUE;
}

static void transformInitialEigenVector(spDMatrix Q, spDVector ieigvec, spDVector oeigvec, spBool real_flag)
{
    long offset;
    double sqsum, norm;
    double tangle;
    double wr, wi;
    
    dvmvtimes(oeigvec, Q, ieigvec);
    dvcopy(ieigvec, oeigvec);
    dvsquare(ieigvec);
    sqsum = dvsum(ieigvec);
    norm = sqrt(sqsum);
    
    tangle = 0.0;
    
    if (real_flag == SP_FALSE) {
        dvmax(ieigvec, &offset);
    
        if (oeigvec->data[offset] != 0.0 || oeigvec->imag[offset] != 0.0) {
            tangle = atan2(oeigvec->imag[offset], oeigvec->data[offset]);
        }
    }

    if (tangle != 0.0) {
        wr = cos(-tangle) / norm;
        wi = sin(-tangle) / norm;
        dvcplxoper(oeigvec, "*", wr, wi);
        oeigvec->imag[offset] = 0.0;
    } else {
        dvscoper(oeigvec, "/", norm);
    }
    
    return;
}

static void calcQuasiEigenVector(spDMatrix H, spDMatrix Q, long k, double lambdar, double lambdai,
                                 double tolerance, spDVector vbuf, spDVector eigvec)
{
    long u;
    double t22r, t22i;
    double t33r, t33i;
    double t23r, t23i;
    double t32r, t32i;
    double t22a;
    double t12r, t12i;
    double t13r, t13i;

    dvrizeros(vbuf, vbuf->length);

    t22r = H->data[k - 1][k - 1] - lambdar;
    t22i = -lambdai;
    if (H->imag != NULL) {
        t22i += H->imag[k - 1][k - 1];
    }
    t33r = H->data[k][k] - lambdar;
    t33i = -lambdai;
    if (H->imag != NULL) {
        t33i += H->imag[k][k];
    }
    spDebug(100, "calcQuasiEigenVector", "k = %ld, t22 = %f + %fi, t33 = %f + %fi\n",
            k, t22r, t22i, t33r, t33i);
    
    t22a = CABS(t22r, t22i);

    if (t22a < SP_EPSILON) {
        t32r = H->data[k][k - 1];
        if (H->imag != NULL) {
            t32i = H->imag[k][k - 1];
        } else {
            t32i = 0.0;
        }
        vbuf->data[k - 1] = 1.0;
        vbuf->imag[k - 1] = 0.0;
        calcQuasiEigenVectorSpecialElement(t32r, t32i, t33r, t33i, 
                                           &vbuf->data[k], &vbuf->imag[k]);
        spDebug(100, "calcQuasiEigenVector", "t32 = %f + %fi, vbuf[%ld] = %f + %fi\n",
                t32r, t32i, k, vbuf->data[k], vbuf->imag[k]);
    } else {
        t23r = H->data[k - 1][k];
        if (H->imag != NULL) {
            t23i = H->imag[k - 1][k];
        } else {
            t23i = 0.0;
        }
        vbuf->data[k] = 1.0;
        vbuf->imag[k] = 0.0;
        calcQuasiEigenVectorSpecialElement(t23r, t23i, t22r, t22i, 
                                           &vbuf->data[k - 1], &vbuf->imag[k - 1]);
        spDebug(100, "calcQuasiEigenVector", "t23 = %f + %fi, vbuf[%ld] = %f + %fi\n",
                t23r, t23i, k - 1, vbuf->data[k - 1], vbuf->imag[k - 1]);
    }

    for (u = 0; u <= k - 2; u++) {
        t12r = H->data[u][k - 1];
        if (H->imag != NULL) {
            t12i = H->imag[u][k - 1];
        } else {
            t12i = 0.0;
        }
        t13r = H->data[u][k];
        if (H->imag != NULL) {
            t13i = H->imag[u][k];
        } else {
            t13i = 0.0;
        }
        spDebug(100, "calcQuasiEigenVector", "u = %ld, t12 = %f + %fi, t13 = %f + %fi\n",
                u, t12r, t12i, t13r, t13i);
        eigvec->data[u] = -(vbuf->data[k - 1] * t12r - vbuf->imag[k - 1] * t12i);
        eigvec->imag[u] = -(vbuf->data[k - 1] * t12i + vbuf->imag[k - 1] * t12r);
        eigvec->data[u] -= (vbuf->data[k] * t13r - vbuf->imag[k] * t13i);
        eigvec->imag[u] -= (vbuf->data[k] * t13i + vbuf->imag[k] * t13r);
        
        spDebug(100, "calcQuasiEigenVector", "eigvec[%ld] = %f + %fi\n", u, eigvec->data[u], eigvec->imag[u]);
    }
    //dvdump(eigvec);

    solveQuasiTriangleLinearEquationsCplx(H, k - 1, lambdar, lambdai, eigvec, tolerance, vbuf);
    //dvdump(vbuf);

    transformInitialEigenVector(Q, vbuf, eigvec, SP_FALSE);

    return;
}

static void calcEigenVectors(spDMatrix H, spDMatrix Q, spDVector e, long k, spDVector vbuf, 
                             double tolerance, spBool quasi_flag, spBool conj_flag,
                             spDVectors eigvecs)
{
    long n;

    n = H->row;
    
    if (quasi_flag == SP_FALSE) {
        if (H->imag != NULL) {
            eigvecs->vector[k] = xdvrizeros(n);
            if (vbuf->imag == NULL) {
                dvizeros(vbuf, vbuf->length);
            }
        } else {
            eigvecs->vector[k] = xdvzeros(n);
        }

        if (e->length == 1) {
            eigvecs->vector[k]->data[0] = 1.0;
        } else if (k == 0) {
            eigvecs->vector[k]->data[0] = -1.0;
        } else {
            dmcutcol(H, k, 0, k, vbuf);
            /*dvscoper(vbuf, "*", -1.0);*/
            
            if (H->imag != NULL) {
                spDebug(100, "calcEigenVectors", "lambda = %f + %fi\n", e->data[k], e->imag[k]);
                solveQuasiTriangleLinearEquationsCplx(H, k, e->data[k], e->imag[k], vbuf, 
                                                      tolerance, eigvecs->vector[k]);
            } else {
                spDebug(100, "calcEigenVectors", "lambda = %f\n", e->data[k]);
                solveQuasiTriangleLinearEquations(H, k, e->data[k], vbuf, 
                                                  tolerance, eigvecs->vector[k]);
            }
            eigvecs->vector[k]->data[k] = -1.0;
            spDebug(100, "calcEigenVectors", "k = %ld, after solveQuasiTriangleLinearEquations\n", k);
            //dvdump(eigvecs->vector[k]);
        }

        if (e->length >= 2) {
            dvcopy(vbuf, eigvecs->vector[k]);
            transformInitialEigenVector(Q, vbuf, eigvecs->vector[k], H->imag == NULL ? SP_TRUE : SP_FALSE);
        }
    } else {
        eigvecs->vector[k] = xdvrizeros(n);
        if (vbuf->imag == NULL) {
            dvizeros(vbuf, vbuf->length);
        }

        calcQuasiEigenVector(H, Q, k, e->data[k], e->imag[k], tolerance, vbuf, eigvecs->vector[k]);

        if (conj_flag == SP_TRUE) {
            eigvecs->vector[k - 1] = xdvconj(eigvecs->vector[k]);
        } else {
            eigvecs->vector[k - 1] = xdvrizeros(n);
            calcQuasiEigenVector(H, Q, k, e->data[k - 1], e->imag[k - 1], tolerance, vbuf, eigvecs->vector[k - 1]);
        }
    }

    return;
}

#define SP_DSQR_DEFAULT_TOLERANCE /*1.0e-10*/1.0e-12
/* double shift QR algorithm. accepts complex matrix */
spDVector xdmeigdsqr(spDMatrix A, long max_iteration /* 0: no limit */, double tolerance,
                   spDVectors *xoeigvecs)
{
    long n;
    long k;
    double br, bi, cr, ci;
    double sigmar, sigmai;
    double x1r, x1i, x2r, x2i;
    double habsvalue;
    spDMatrix H;
    spDMatrix Q;
    spDMatrix *pQ;
    spDVector e;
    spDVector vbuf;
    spDVectors eigvecs;
    spBool quasi_flag, conj_flag;

    if (A->row != A->col) return NODATA;

    if (tolerance <= 0.0) {
        tolerance = SP_DSQR_DEFAULT_TOLERANCE;
    }

    eigvecs = NODATA;
    Q = NODATA;
    vbuf = NODATA;
    
    if (xoeigvecs != NULL) {
        pQ = &Q;
    } else {
        pQ = NULL;
    }

    if (A->row <= 1) {
        H = xdmclone(A);
    } else {
        if ((H = xdmdsqrloop(A, max_iteration, tolerance, pQ)) == NODATA) {
            return NODATA;
        }
    }
    //dmdump(H);

    n = A->row;
    e = xdvdiag(H);
    if (xoeigvecs != NULL) {
        eigvecs = xdvsalloc(e->length);
        if (e->imag != NULL) {
            vbuf = xdvrialloc(n);
        } else {
            vbuf = xdvalloc(n);
        }
        *xoeigvecs = eigvecs;
    }
    
    k = 0;
    while (k < e->length) {
        spDebug(100, "xdmeigdsqr", "k = %ld, tolerance = %g\n", k, tolerance);

        quasi_flag = conj_flag = SP_FALSE;
        
        if (k < e->length - 1
            && dsqrdeflatecheck(H, k + 2, k + 1, k, tolerance, &habsvalue, NULL) == SP_FALSE
            && (k != e->length - 3 || habsvalue > dsqrgetpreciseabs(H, k + 2, k + 1))) {
            quasi_flag = SP_TRUE;
            dsqrpoly(H, k + 2, &br, &bi, &cr, &ci, NULL);
            if (e->imag == NULL) {
                dvizeros(e, e->length);
            }
                    
            if (bi == 0.0 && ci == 0.0) {
                conj_flag = SP_TRUE;
                sigmar = -br / 2.0;
                sigmai = sqrt(cr - sigmar * sigmar);
                spDebug(10, "xdmeigdsqr", "sigma = %f + %fi\n", sigmar, sigmai);

                spDebug(100, "xdmeigdsqr", "k = %ld: real diag, conjugate eigenvalues\n", k);

                e->data[k] = sigmar;
                e->imag[k++] = sigmai;
                        
                e->data[k] = sigmar;
                e->imag[k] = -sigmai;
            } else {
                spDebug(100, "xdmeigdsqr", "k = %ld: complex diag, two complex eigenvalues\n", k);
                
                calcQuadraticFormulaSolutions(br, bi, cr, ci, &x1r, &x1i, &x2r, &x2i);
                
                e->data[k] = x1r;
                e->imag[k++] = x1i;
                        
                e->data[k] = x2r;
                e->imag[k] = x2i;
            }
        }

        if (eigvecs != NODATA) {
            calcEigenVectors(H, Q, e, k, vbuf, tolerance, quasi_flag, conj_flag, eigvecs);
        }
        
        ++k;
    }
    
    spDebug(80, "xdmeigdsqr", "loop done: k = %ld, e->length = %ld\n", k, e->length);
    
    if (Q != NODATA) xdmfree(Q);
    if (vbuf != NODATA) xdvfree(vbuf);

    return e;
}

/*
 * SVD 
 * reference: http://www.math.pitt.edu/~sussmanm/2071Spring09/lab09/
 */
static void svdrot(double f, double g, double *oc, double *os, double *or)
{
    double t, t1;
    
    if (f == 0.0) {
        *oc = 0.0;
        *os = (g >= 0.0 ? 1.0 : -1.0);
        *or = FABS(g);
    } else if (FABS(f) > FABS(g)) {
        t = g / f;
        t1 = sqrt(1.0 + t * t);
        *oc = (f >= 0.0 ? 1.0 : -1.0) / t1;
        *os = t * *oc;
        *or = FABS(f) * t1;
    } else {
        t = f / g;
        t1 = sqrt(1.0 + t * t);
        *os = (g >= 0.0 ? 1.0 : -1.0) / t1;
        *oc = t * *os;
        *or = FABS(g) * t1;
    }

    return;
}

static void svdrotcplx(double fr, double fi, double gr, double gi,
                       double *ocr, double *oci, double *osr, double *osi, double *orr, double *ori)
{
    double fa, ga;
    
    if (fr == 0.0) {
        ga = sqrt(gr * gr + gi * gi);
        *ocr = *oci = 0.0;
        *osr = gr / ga;
        *osi = gi / ga;
        *orr = ga;
        *ori = 0.0;
    } else if (fi != 0.0 || gr != 0.0) {
        fa = sqrt(fr * fr + fi * fi);
        ga = sqrt(gr * gr + gi * gi);
        *orr = sqrt(fa * fa + ga * ga);
        *ori = 0.0;
        *ocr = fr / *orr;
        *oci = -fi / *orr;
        *osr = gr / *orr;
        *osi = -gi / *orr;
    } else {
        *oci = *osi = *ori = 0.0;
        svdrot(fr, gr, ocr, osr, orr);
    }

    return;
}

static void svdmultrot(spDMatrix A, long coffset, double c, double s)
{
    long i;
    double ai1, ai2;

    for (i = 0; i < A->row; i++) {
        ai1 = A->data[i][coffset];
        ai2 = A->data[i][coffset + 1];
        A->data[i][coffset] = ai1 * c + ai2 * s;
        A->data[i][coffset + 1] = ai1 * (-s) + ai2 * c;
    }
    
    return;
}

static void svdmultrotcplx(spDMatrix A, long coffset, double cr, double ci, double sr, double si, spBool V_flag)
{
    long i;
    double ai1r, ai1i, ai2r, ai2i;

    for (i = 0; i < A->row; i++) {
        ai1r = A->data[i][coffset];
        ai1i = A->imag[i][coffset];
        ai2r = A->data[i][coffset + 1];
        ai2i = A->imag[i][coffset + 1];
        if (V_flag == SP_FALSE) {
            /* ai1 * conj(c) + ai2 * conj(s) */
            A->data[i][coffset] = (ai1r * cr + ai1i * ci) + (ai2r * sr + ai2i * si);
            A->imag[i][coffset] = (-ai1r * ci + ai1i * cr) + (-ai2r * si + ai2i * sr);
            /* ai1 * -s + ai2 * c */
            A->data[i][coffset + 1] = -(ai1r * sr - ai1i * si) + (ai2r * cr - ai2i * ci);
            A->imag[i][coffset + 1] = -(ai1r * si + ai1i * sr) + (ai2r * ci + ai2i * cr);
        } else {
            /* ai1 * c + ai2 * s */
            A->data[i][coffset] = (ai1r * cr - ai1i * ci) + (ai2r * sr - ai2i * si);
            A->imag[i][coffset] = (ai1r * ci + ai1i * cr) + (ai2r * si + ai2i * sr);
            /* ai1 * -conj(s) + ai2 * conj(c) */
            A->data[i][coffset + 1] = -(ai1r * sr + ai1i * si) + (ai2r * cr + ai2i * ci);
            A->imag[i][coffset + 1] = -(-ai1r * si + ai1i * sr) + (-ai2r * ci + ai2i * cr);
        }
    }
    
    return;
}

static void svdvsweep(spDMatrix B, long start, long end, spDMatrix U, spDMatrix V)
{
    long i;
    double c, s, r;
    double cold, sold;
    double h;
    
    spDebug(80, "svdvsweep", "start = %ld, end = %ld\n", start, end);
#if 0
    if (U != NODATA) dmdump(U);
    if (V != NODATA) dmdump(V);
#endif
            
    cold = 1.0;
    c = 1.0;
    sold = 1.0;
    r = B->data[end][end];

    for (i = start; i <= end; i++) {
        if (i + 1 >= B->col) {
            break;
        }
        
        svdrot(c * B->data[i][i], B->data[i][i + 1], &c, &s, &r);
        if (i != start) {
            B->data[i - 1][i] = r * sold;
        }
        if (V != NODATA) {
            svdmultrot(V, i, c, s);
        }
    
        if (i == end) {
            break;
        }
        
        svdrot(cold * r, B->data[i + 1][i + 1] * s, &cold, &sold, &B->data[i][i]);
        if (U != NODATA) {
            svdmultrot(U, i, cold, sold);
        }
    }
    if (end + 1 >= B->col) {
        h = c * B->data[end][end];
        B->data[end - 1][end] = h * sold;
        B->data[end][end] = h * cold;
    } else {
        B->data[end][end] = cold * r;
        B->data[end][end + 1] = 0.0;
    }

    if (B->data[end][end] < 0.0) {
        svdrot(B->data[end][end], 0.0, &c, &s, &r);
        B->data[end][end] *= c;
        B->data[end - 1][end] *= c;
        
        if (V != NODATA) {
            for (i = 0; i < V->row; i++) {
                V->data[i][end] *= c;
            }
        }
    }
    
    spDebug(80, "svdvsweep", "done\n");
    
#if 0
    if (U != NODATA) dmdump(U);
    if (V != NODATA) dmdump(V);
#endif
    
    return;
}

static void svdvsweepcplx(spDMatrix B, long start, long end, spDMatrix U, spDMatrix V)
{
    long i;
    double cr, ci, sr, si, rr, ri;
    double coldr, coldi, soldr, soldi;
    double hr, hi;
    double cdr, cdi;
    double crr, cri;
    double dsr, dsi;
    
    spDebug(80, "svdvsweepcplx", "start = %ld, end = %ld\n", start, end);
#if 0
    if (U != NODATA) dmdump(U);
    if (V != NODATA) dmdump(V);
#endif
            
    coldr = 1.0; coldi = 0.0;
    cr = 1.0; ci = 0.0;
    soldr = 1.0; soldi = 0.0;
    rr = B->data[end][end]; ri = B->imag[end][end];

    for (i = start; i <= end; i++) {
        if (i + 1 >= B->col) {
            break;
        }
        
        /* conj(c)*d(i) */
        cdr = cr * B->data[i][i] + ci * B->imag[i][i];
        cdi = cr * B->imag[i][i] - ci * B->data[i][i];
        svdrotcplx(cdr, cdi, B->data[i][i + 1], B->imag[i][i + 1],
                   &cr, &ci, &sr, &si, &rr, &ri);
        if (i != start) {
            B->data[i - 1][i] = rr * soldr - ri * soldi;
            B->imag[i - 1][i] = rr * soldi + ri * soldr;
        }
        if (V != NODATA) {
            spDebug(80, "svdvsweepcplx", "%ld: before svdmultrotcplx\n", i);
            //if (V != NODATA) dmdump(V);
            svdmultrotcplx(V, i, cr, ci, sr, si, SP_TRUE);
            spDebug(80, "svdvsweepcplx", "%ld: after svdmultrotcplx\n", i);
            //if (V != NODATA) dmdump(V);
        }

        if (i == end) {
            break;
        }
        
        /* conj(cold)*r */
        crr = coldr * rr + coldi * ri;
        cri = coldr * ri - coldi * rr;
        dsr = B->data[i + 1][i + 1] * sr - B->imag[i + 1][i + 1] * si;
        dsi = B->data[i + 1][i + 1] * si + B->imag[i + 1][i + 1] * sr;
        svdrotcplx(crr, cri, dsr, dsi, &coldr, &coldi, &soldr, &soldi, &B->data[i][i], &B->imag[i][i]);
        spDebug(80, "svdvsweepcplx", "%ld: d = %f + %fi\n", i, B->data[i][i], B->imag[i][i]);
        if (U != NODATA) {
            svdmultrotcplx(U, i, coldr, coldi, soldr, soldi, SP_FALSE);
        }
    }
    if (end + 1 >= B->col) {
        /* conj(c)*d(end) */
        hr = cr * B->data[end][end] + ci * B->imag[end][end];
        hi = cr * B->imag[end][end] - ci * B->data[end][end];
        B->data[end - 1][end] = hr * soldr - hi * soldi;
        B->imag[end - 1][end] = hr * soldi + hi * soldr;
        /* h*conj(cold) */
        B->data[end][end] = hr * coldr + hi * coldi;
        B->imag[end][end] = -hr * coldi + hi * coldr;
    } else {
        /* conj(cold)*r */
        B->data[end][end] = coldr * rr + coldi * ri;
        B->imag[end][end] = coldr * ri - coldi * rr;
        B->data[end][end + 1] = 0.0;
        B->imag[end][end + 1] = 0.0;
    }

    if (FABS(B->imag[end][end]) >= SP_EPSILON || B->data[end][end] < 0.0) {
        double cer, cei;
        double cvr, cvi;
        
        svdrotcplx(B->data[end][end], B->imag[end][end], 0.0, 0.0,
                   &cr, &ci, &sr, &si, &rr, &ri);
        spDebug(80, "svdvsweepcplx", "convert to real for last diagonal: c = %f + %fi, s = %f + %fi, r = %f + %fi\n",
                cr, ci, sr, si, rr, ri);
        cdr = cr * B->data[end][end] - ci * B->imag[end][end];
        cdi = cr * B->imag[end][end] + ci * B->data[end][end];
        B->data[end][end] = cdr;
        B->imag[end][end] = cdi;
        cer = cr * B->data[end - 1][end] - ci * B->imag[end - 1][end];
        cei = cr * B->imag[end - 1][end] + ci * B->data[end - 1][end];
        B->data[end - 1][end] = cer;
        B->imag[end - 1][end] = cei;
        
        if (V != NODATA) {
            for (i = 0; i < V->row; i++) {
                cvr = cr * V->data[i][end] - ci * V->imag[i][end];
                cvi = cr * V->imag[i][end] + ci * V->data[i][end];
                V->data[i][end] = cvr;
                V->imag[i][end] = cvi;
            }
        }
    } else {
        B->imag[end][end] = 0.0;
    }
    
    spDebug(80, "svdvsweepcplx", "done\n");
    
#if 0
    if (U != NODATA) dmdump(U);
    if (V != NODATA) dmdump(V);
#endif
    
    return;
}

static void svdcplxshiftbasis(spDMatrix U, spDMatrix V)
{
    long i, j;
    long row;
    double sr, si, va;
    double vr, vi;
    double ur, ui;

    if (U != NODATA) {
        row = MAX(U->row, V->row);
    } else {
        row = V->row;
    }

    for (j = 0; j < V->col; j++) {
        if (FABS(V->imag[0][j]) >= SP_EPSILON) {
            va = spPreciseCplxAbs(V->data[0][j], V->imag[0][j]);
            sr = V->data[0][j] / va;
            si = -V->imag[0][j] / va; /* conjugate */

            for (i = 0; i < row; i++) {
                if (i < V->row) {
                    vr = V->data[i][j] * sr - V->imag[i][j] * si;
                    vi = V->data[i][j] * si + V->imag[i][j] * sr;
                    V->data[i][j] = vr;
                    V->imag[i][j] = vi;
                }
                if (U != NODATA && i < U->row) {
                    ur = U->data[i][j] * sr - U->imag[i][j] * si;
                    ui = U->data[i][j] * si + U->imag[i][j] * sr;
                    U->data[i][j] = ur;
                    U->imag[i][j] = ui;
                }
            }
        }
        V->imag[0][j] = 0.0;
    }
    
    return;
}

static double svdcalcthresh(spDMatrix B, long n, long max_iteration, double tolerance, spBool cplx_flag)
{
    long j;
    double da, ea;
    double lambda, prev_lambda, new_lambda;
    double mu, prev_mu, new_mu;
    double sigmaLower;
    double thresh;

    /*% The following convergence criterion is discussed by
     *% Demmel and Kahan.  First, estimate the smallest 
     *% singular value. */
    if (cplx_flag) {
        da = CABS(B->data[n - 1][n - 1], B->imag[n - 1][n - 1]);
    } else {
        da = FABS(B->data[n - 1][n - 1]);
    }
    lambda = prev_lambda = da;
    for (j = n - 2; j >= 0; j--) {
        if (cplx_flag) {
            da = CABS(B->data[j][j], B->imag[j][j]);
            ea = CABS(B->data[j][j + 1], B->imag[j][j + 1]);
        } else {
            da = FABS(B->data[j][j]);
            ea = FABS(B->data[j][j + 1]);
        }
        new_lambda = da * prev_lambda / (prev_lambda + ea);
        if (new_lambda < lambda) {
            lambda = new_lambda;
        }
        prev_lambda = new_lambda;
    }
    
    if (cplx_flag) {
        da = CABS(B->data[0][0], B->imag[0][0]);
    } else {
        da = FABS(B->data[0][0]);
    }
    mu = prev_mu = da;
    for (j = 0; j < n - 1; j++) {
        if (cplx_flag) {
            da = CABS(B->data[j + 1][j + 1], B->imag[j + 1][j + 1]);
            ea = CABS(B->data[j][j + 1], B->imag[j][j + 1]);
        } else {
            da = FABS(B->data[j + 1][j + 1]);
            ea = FABS(B->data[j][j + 1]);
        }
        new_mu = da * prev_mu / (prev_mu + ea);
        if (new_mu < mu) {
            mu = new_mu;
        }
        prev_mu = new_mu;
    }
    sigmaLower = MIN(lambda, mu);
    /*thresh = MAX(tolerance * sigmaLower, SP_EPSILON);*/
    thresh = MAX(tolerance * sigmaLower, (double)max_iteration * SP_DBL_MIN);
    spDebug(80, "svdcalcthresh", "lambda = %g, mu = %g, sigmaLower = %g, tolerance = %g, thresh = %g\n",
            lambda, mu, sigmaLower, tolerance, thresh);

    return thresh;
}

static long svdbidiag(spDMatrix B, long max_iteration, double tolerance, spBool cplx_flag,
                      spDMatrix U, spDMatrix V)
{
    long n;
    long i, j;
    long iteration;
    long iLower, iUpper;
    double ea;
    double thresh;

    n = MIN(B->row, B->col);
    
    if (max_iteration <= 0) {
        /*max_iteration = 75;*/
        max_iteration = 500 * n * n;
    }
    if (tolerance <= 0.0) {
        tolerance = 100.0 * SP_EPSILON;
    }

    thresh = svdcalcthresh(B, n, max_iteration, tolerance, cplx_flag);
    spDebug(80, "svdbidiag", "n = %ld, max_iteration = %ld, tolerance = %g, thresh = %g\n",
            n, max_iteration, tolerance, thresh);

    iUpper = n - 2;
    iLower = 0;

    for (iteration = 1; iteration <= max_iteration; iteration++) {
        /*% reduce problem size when some zeros are 
         *% on the superdiagonal */
        
        spDebug(80, "svdbidiag", "iteration = %ld / %ld\n", iteration, max_iteration);
        
        if (iteration >= 2) {
            /*% how many zeros are near the bottom right? */
            for (i = iUpper; i >= 0; i--) {
                iUpper = i;
                if (cplx_flag) {
                    ea = CABS(B->data[i][i + 1], B->imag[i][i + 1]);
                } else {
                    ea = FABS(B->data[i][i + 1]);
                }
                if (ea > thresh) {
                    break;
                }
            }

            /*% how many zeros are near the top left? */
            j = iUpper;
            for (i = iLower; i <= iUpper; i++) {
                if (cplx_flag) {
                    ea = CABS(B->data[i][i + 1], B->imag[i][i + 1]);
                } else {
                    ea = FABS(B->data[i][i + 1]);
                }
                if (ea > thresh) {
                    j = i;
                    break;
                }
            }
            iLower = j;
        }

        if (iUpper == iLower) {
            if (cplx_flag) {
                ea = CABS(B->data[iUpper][iUpper + 1], B->imag[iUpper][iUpper + 1]);
            } else {
                ea = FABS(B->data[iUpper][iUpper + 1]);
            }
        } else {
            ea = thresh + 1.0;
        }
        spDebug(80, "svdbidiag", "iteration = %ld / %ld, ea = %g, thresh = %g, iUpper = %ld, iLower = %ld\n",
                iteration, max_iteration, ea, thresh, iUpper, iLower);
            
        if (ea <= thresh || iUpper < iLower) {
            if (cplx_flag && V != NODATA) {
                svdcplxshiftbasis(U, V);
            }
#if 0
            for (i = 0; i < n; i++) {
                spDebug(10, "svdbidiag", "result diag %ld = %g\n", i, B->data[i][i]);
            }
#endif
            return iteration;
        }

        /*% do a sweep */
        if (cplx_flag) {
            svdvsweepcplx(B, iLower, iUpper + 1, U, V);
        } else {
            svdvsweep(B, iLower, iUpper + 1, U, V);
#if 0
            for (i = 0; i < n; i++) {
                spDebug(10, "svdbidiag", "diag %ld = %g\n", i, B->data[i][i]);
            }
            for (i = 0; i < n - 1; i++) {
                spDebug(10, "svdbidiag", "supdiag %ld = %g\n", i, B->data[i][i + 1]);
            }
#endif
        }

    }
    
    spWarning("svd: too many iterations: %ld\n", iteration);

    return -1;
}

spBool dmsvdbidiag(spDMatrix B /* input: bidiagonal matrix, output: SVD diagonal matrix. size = [M, N] */,
                   long max_iteration, double tolerance,
                   spDMatrix oU /* can be NODATA. must be initialized. size = [M, M] */,
                   spDMatrix oV /* can be NODATA. must be initialized. size = [N, N] */) /* inputB = oU * outputB * oV' */
{
    spBool cplx_flag;
    
    if (B->imag != NULL) {
        cplx_flag = SP_TRUE;
        if (oU != NODATA && oU->imag == NULL) {
            dmizeros(oU, 0, 0);
        }
        if (oV != NODATA && oV->imag == NULL) {
            dmizeros(oV, 0, 0);
        }
    } else {
        cplx_flag = SP_FALSE;
    }
    
    if (svdbidiag(B, max_iteration, tolerance, cplx_flag, oU, oV) <= 0) {
        return SP_FALSE;
    }
    
    dmdiag(B);

    return SP_TRUE;
}

spBool dmsvd(spDMatrix A /* output: SVD diagonal matrix. size = [M, N] */, long max_iteration, double tolerance,
             spDMatrix oU /* size = [M, M] */, spDMatrix oV /* size = [N, N] */) /* inputA = oU * outputA * oV' */
{
    if (dmbidiag(A, oU, oV) == SP_FALSE) {
        return SP_FALSE;
    }
    
    return dmsvdbidiag(A, max_iteration, tolerance, oU, oV);
}

spDMatrix xdmsvd(spDMatrix A /* size = [M, N] */, long max_iteration, double tolerance,
               spDMatrix *xoU /* size = [M, M] */, spDMatrix *xoV /* size = [N, N] */) /* A = xoU * diag(output) * xoV' */
{
    spDMatrix U, V;
    spDMatrix B;
    spBool flag = SP_FALSE;

    U = V = NODATA;

    if (xoU != NULL) {
        if (A->imag != NULL) {
            U = xdmrialloc(A->row, A->row);
        } else {
            U = xdmalloc(A->row, A->row);
        }
        *xoU = U;
    }
    if (xoV != NULL) {
        if (A->imag != NULL) {
            V = xdmrialloc(A->col, A->col);
        } else {
            V = xdmalloc(A->col, A->col);
        }
        *xoV = V;
    }

    B = xdmclone(A);
    
    if (dmbidiag(B, U, V) == SP_TRUE) {
        //if (U != NODATA) dmdump(U);
        //if (V != NODATA) dmdump(V);
        
        flag = dmsvdbidiag(B, max_iteration, tolerance, U, V);
    }

    if (flag == SP_FALSE) {
        if (U != NODATA) {
            xdmfree(U);
            *xoU = NODATA;
        }
        if (V != NODATA) {
            xdmfree(V);
            *xoV = NODATA;
        }

        xdmfree(B);
        B = NODATA;
    }

    return B;
}

/*
 * cholesky
 *   reference: http://nalab.mind.meiji.ac.jp/~mk/labo/text/cholesky.pdf
 */
static spBool cholesky_upper(long n, double **U, double **a)
{
    long j, i, k;
    double s;
    
    for (i = 0; i < n; i++) {
	s = a[i][i];
	for (k = 0; k < i; k++)
	    s -= SQUARE(U[k][i]);
	if (s <= 0) {
	    spwarning("Cholesky decomposition: s <= 0\n");
	    return SP_FALSE;
	}
	U[i][i] = sqrt(s);
	for (j = i + 1; j < n; j++) {
	    s = a[i][j];
	    for (k = 0; k < i; k++)
		s -= U[k][j] * U[k][i];
	    U[i][j] = s / U[i][i];
	}
    }
    
    return SP_TRUE;
}
 
static spBool cholesky_lower(long n, double **L, double **a)
{
    long i, j, k;
    double s;

    for (i = 0; i < n; i++) {
	for (j = 0; j < i; j++) {
	    s = a[i][j];
	    for (k = 0; k < j; k++)
		s -= L[i][k] * L[j][k];
	    L[i][j] = s / L[j][j];
	}
	s = a[i][i];
	for (k = 0; k < i; k++)
	    s -= SQUARE(L[i][k]);
	if (s <= 0) {
	    spwarning("Cholesky decomposition: s <= 0\n");
	    return SP_FALSE;
	}
	L[i][i] = sqrt(s);
    }

    return SP_TRUE;
}

static spBool cholesky_upper_cplx(long n, double **Ur, double **Ui, double **ar, double **ai)
{
    long j, i, k;
    double sr, si;
    
    for (i = 0; i < n; i++) {
	sr = ar[i][i];
	si = 0.0;
	for (k = 0; k < i; k++) {
	    sr -= CSQUARE(Ur[k][i], Ui[k][i]);
	}
	if (sr <= 0) {
	    spwarning("Cholesky decomposition: sr <= 0\n");
	    return SP_FALSE;
	}
	Ur[i][i] = sqrt(sr);
	Ui[i][i] = 0.0;
	
	for (j = i + 1; j < n; j++) {
	    sr = ar[i][j];
	    si = ai[i][j];
	    for (k = 0; k < i; k++) {
		/* Ui[k][j] needs a minus sign because of conjugate of U[k][j] */
		sr -= (Ur[k][j] * Ur[k][i] + Ui[k][j] * Ui[k][i]);
		si -= (Ur[k][j] * Ui[k][i] - Ui[k][j] * Ur[k][i]);
	    }
	    Ur[i][j] = sr / Ur[i][i];
	    Ui[i][j] = si / Ur[i][i];
	}
    }
    
    return SP_TRUE;
}
 
static spBool cholesky_lower_cplx(long n, double **Lr, double **Li, double **ar, double **ai)
{
    long i, j, k;
    double sr, si;

    for (i = 0; i < n; i++) {
	for (j = 0; j < i; j++) {
	    sr = ar[i][j];
	    si = ai[i][j];
	    
	    for (k = 0; k < j; k++) {
		/* Li[j][k] needs a minus sign because of conjugate of L[j][k] */
		sr -= (Lr[i][k] * Lr[j][k] + Li[i][k] * Li[j][k]);
		si -= (-Lr[i][k] * Li[j][k] + Li[i][k] * Lr[j][k]);
	    }
	    Lr[i][j] = sr / Lr[j][j];
	    Li[i][j] = si / Lr[j][j];
	}
	
	sr = ar[i][i];
	for (k = 0; k < i; k++) {
	    sr -= CSQUARE(Lr[i][k], Li[i][k]);
	}
	if (sr <= 0) {
	    spwarning("Cholesky decomposition: sr <= 0\n");
	    return SP_FALSE;
	}
	Lr[i][i] = sqrt(sr);
	Li[i][i] = 0.0;
    }

    return SP_TRUE;
}

spBool dmchol(spDMatrix C/* size: (A->row, A->row) */, spDMatrix A, spBool lower)
{
    long N;
    spBool flag;

    if (A == NODATA || C == NODATA
	|| A->row != A->col || C->row != C->col || A->row != C->row) return SP_FALSE;

    N = A->row;
    
    if (A->imag != NULL) {
        if (C->imag == NULL) {
            dmialloc(C);
        }

	if (lower == SP_TRUE) {
	    flag = cholesky_lower_cplx(N, C->data, C->imag, A->data, A->imag);
	} else {
	    flag = cholesky_upper_cplx(N, C->data, C->imag, A->data, A->imag);
	}
    } else {
	if (lower == SP_TRUE) {
	    flag = cholesky_lower(N, C->data, A->data);
	} else {
	    flag = cholesky_upper(N, C->data, A->data);
	}
    }

    return flag;
}

spDMatrix xdmchol(spDMatrix A, spBool lower)
{
    spDMatrix C;
    long N;
    spBool flag;

    if (A == NODATA
	|| A->row != A->col) return SP_FALSE;

    N = A->row;
    
    if (A->imag != NULL) {
	C = xdmrizeros(N, N);

	if (lower == SP_TRUE) {
	    flag = cholesky_lower_cplx(N, C->data, C->imag, A->data, A->imag);
	} else {
	    flag = cholesky_upper_cplx(N, C->data, C->imag, A->data, A->imag);
	}
    } else {
	C = xdmzeros(N, N);

	if (lower == SP_TRUE) {
	    flag = cholesky_lower(N, C->data, A->data);
	} else {
	    flag = cholesky_upper(N, C->data, A->data);
	}
    }

    if (flag == SP_FALSE) {
	xdmfree(C);
	return NODATA;
    } else {
	return C;
    }
}

static spBool dmcholinvcore(spDMatrix Ainv, spDMatrix C, spBool lower)
{
    long i, j, k;
    long N;
    double t;
    double tr, ti;
    
    N = C->row;
    
    for (k = 0; k < N; k++) {
	for (i = 0; i < N; i++) {
            if (k == 0 && C->data[i][i] == 0.0) {
                return SP_FALSE;
            }
            
	    if (Ainv->imag != NULL) {
		tr = (i == k ? 1.0 : 0.0);
		ti = 0.0;
                if (lower) {
                    for (j = 0; j < i; j++) {
                        tr -= (C->data[i][j] * Ainv->data[j][k] - C->imag[i][j] * Ainv->imag[j][k]);
                        ti -= (C->data[i][j] * Ainv->imag[j][k] + C->imag[i][j] * Ainv->data[j][k]);
                    }
                } else {
                    for (j = 0; j < i; j++) {
                        tr -= (C->data[j][i] * Ainv->data[j][k] + C->imag[j][i] * Ainv->imag[j][k]);
                        ti -= (C->data[j][i] * Ainv->imag[j][k] - C->imag[j][i] * Ainv->data[j][k]);
                    }
                }
		Ainv->data[i][k] = tr / C->data[i][i];
		Ainv->imag[i][k] = ti / C->data[i][i];
	    } else {
		t = (i == k ? 1.0 : 0.0);
                if (lower) {
                    for (j = 0; j < i; j++) {
                        t -= C->data[i][j] * Ainv->data[j][k];
                    }
                } else {
                    for (j = 0; j < i; j++) {
                        t -= C->data[j][i] * Ainv->data[j][k];
                    }
                }
		Ainv->data[i][k] = t / C->data[i][i];
	    }
	}
	for (i = N - 1; i >= 0; i--) {
	    if (Ainv->imag != NULL) {
		tr = Ainv->data[i][k];
		ti = Ainv->imag[i][k];
                if (lower) {
                    for (j = i + 1; j < N; j++) {
                        tr -= (C->data[j][i] * Ainv->data[j][k] + C->imag[j][i] * Ainv->imag[j][k]);
                        ti -= (C->data[j][i] * Ainv->imag[j][k] - C->imag[j][i] * Ainv->data[j][k]);
                    }
                } else {
                    for (j = i + 1; j < N; j++) {
                        tr -= (C->data[i][j] * Ainv->data[j][k] - C->imag[i][j] * Ainv->imag[j][k]);
                        ti -= (C->data[i][j] * Ainv->imag[j][k] + C->imag[i][j] * Ainv->data[j][k]);
                    }
                }
		Ainv->data[i][k] = tr / C->data[i][i];
		Ainv->imag[i][k] = ti / C->data[i][i];
	    } else {
		t = Ainv->data[i][k];
                if (lower) {
                    for (j = i + 1; j < N; j++) {
                        t -= C->data[j][i] * Ainv->data[j][k];
                    }
                } else {
                    for (j = i + 1; j < N; j++) {
                        t -= C->data[i][j] * Ainv->data[j][k];
                    }
                }
		Ainv->data[i][k] = t / C->data[i][i];
	    }
	}
    }

    return SP_TRUE;
}

spBool dmcholinv(spDMatrix Ainv, spDMatrix C, spBool lower)
{
    if (C == NODATA || Ainv == NODATA
	|| C->row != C->col
        || Ainv->row != C->row
        || Ainv->row != Ainv->col) return SP_FALSE;
    
    if (C->imag != NULL && Ainv->imag == NULL) {
	dmialloc(Ainv);
    }

    return dmcholinvcore(Ainv, C, lower);
}

spDMatrix xdmcholinv(spDMatrix C, spBool lower)
{
    spDMatrix Ainv;
    
    if (C == NODATA
	|| C->row != C->col) return NODATA;
    
    if (C->imag != NULL) {
	Ainv = xdmrialloc(C->row, C->row);
    } else {
	Ainv = xdmalloc(C->row, C->row);
    }

    if (dmcholinvcore(Ainv, C, lower) == SP_FALSE) {
        xdmfree(Ainv);
        return NODATA;
    }

    return Ainv;
}

static spBool cholsolve(long n, double **C, double *b, spBool lower, double *x)
{
    long i, j;
    double t;

    for (i = 0; i < n; i++) {
        if (C[i][i] == 0.0) return SP_FALSE;
        
	t = b[i];
        if (lower) {
            for (j = 0; j < i; j++) {
                t -= C[i][j] * x[j];
            }
        } else {
            for (j = 0; j < i; j++) {
                t -= C[j][i] * x[j];
            }
        }
        x[i] = t / C[i][i];
    }
    for (i = n - 1; i >= 0; i--) {
        t = x[i];
        if (lower) {
            for (j = i + 1; j < n; j++) {
                t -= C[j][i] * x[j];
            }
        } else {
            for (j = i + 1; j < n; j++) {
                t -= C[i][j] * x[j];
            }
        }
        x[i] = t / C[i][i];
    }

    return SP_TRUE;
}

static spBool cholsolvecplx(long n, double **Cr, double **Ci, double *br, double *bi,
                            spBool lower, double *xr, double *xi)
{
    long i, j;
    double tr, ti;
    double c;

    for (i = 0; i < n; i++) {
        if (Cr[i][i] == 0.0) return SP_FALSE;
        
	tr = br[i];
	if (bi != NULL) {
	    ti = bi[i];
	} else {
	    ti = 0.0;
	}
        if (lower) {
            if (Ci == NULL) {
                for (j = 0; j < i; j++) {
                    tr -= Cr[i][j] * xr[j];
                    ti -= Cr[i][j] * xi[j];
                }
            } else {
                for (j = 0; j < i; j++) {
                    tr -= (Cr[i][j] * xr[j] - Ci[i][j] * xi[j]);
                    ti -= (Cr[i][j] * xi[j] + Ci[i][j] * xr[j]);
                }
            }
        } else {
            if (Ci == NULL) {
                for (j = 0; j < i; j++) {
                    tr -= Cr[j][i] * xr[j];
                    ti -= Cr[j][i] * xi[j];
                }
            } else {
                for (j = 0; j < i; j++) {
                    tr -= (Cr[j][i] * xr[j] + Ci[j][i] * xi[j]);
                    ti -= (Cr[j][i] * xi[j] - Ci[j][i] * xr[j]);
                }
            }
        }
        c = Cr[i][i];
        xr[i] = tr / c;
        xi[i] = ti / c;
    }
    for (i = n - 1; i >= 0; i--) {
        tr = xr[i];
        ti = xi[i];
        if (lower) {
            if (Ci == NULL) {
                for (j = i + 1; j < n; j++) {
                    tr -= Cr[j][i] * xr[j];
                    ti -= Cr[j][i] * xi[j];
                }
            } else {
                for (j = i + 1; j < n; j++) {
                    tr -= (Cr[j][i] * xr[j] + Ci[j][i] * xi[j]);
                    ti -= (Cr[j][i] * xi[j] - Ci[j][i] * xr[j]);
                }
            }
        } else {
            if (Ci == NULL) {
                for (j = i + 1; j < n; j++) {
                    tr -= Cr[i][j] * xr[j];
                    ti -= Cr[i][j] * xi[j];
                }
            } else {
                for (j = i + 1; j < n; j++) {
                    tr -= (Cr[i][j] * xr[j] - Ci[i][j] * xi[j]);
                    ti -= (Cr[i][j] * xi[j] + Ci[i][j] * xr[j]);
                }
            }
        }
	c = Cr[i][i];
	xr[i] = tr / c;
	xi[i] = ti / c;
    }

    return SP_TRUE;
}

static spBool dvcholsolvecore(spDVector x, spDMatrix C, spDVector b, spBool lower)
{
    long N;
    spBool flag;
    
    N = C->row;

    if (C->imag != NULL || b->imag != NULL) {
	flag = cholsolvecplx(N, C->data, C->imag, b->data, b->imag, lower, x->data, x->imag);
    } else {
	flag = cholsolve(N, C->data, b->data, lower, x->data);
    }

    return flag;
}

spBool dvcholsolve(spDVector x, spDMatrix C, spDVector b, spBool lower)
{
    if (C == NODATA || b == NODATA || x == NODATA
	|| C->row != C->col || C->col != b->length
        || b->length != x->length) return SP_FALSE;
    
    if ((C->imag != NULL || b->imag != NULL) && x->imag == NULL) {
	dvialloc(x);
    }

    return dvcholsolvecore(x, C, b, lower);
}

spDVector xdvcholsolve(spDMatrix C, spDVector b, spBool lower)
{
    spDVector x;
    
    if (C == NODATA || b == NODATA
	|| C->row != C->col || C->col != b->length) return NODATA;
    
    if (C->imag != NULL) {
	x = xdvrialloc(C->row);
    } else {
	x = xdvalloc(C->row);
    }
    if (dvcholsolvecore(x, C, b, lower) == SP_FALSE) {
        xdvfree(x);
        return NODATA;
    }
    
    return x;
}

/*
 * lu, lusolve
 *   reference: ``C gengo ni yoru saishin algorithm jiten'' [in Japanese]
 *   see http://oku.edu.mie-u.ac.jp/~okumura/algo/
 */
#define SP_LU_EPSILON /*1.0e-30*//*SP_EPSILON*/0.0
static double lu(long n, double **a, long *ip,
                 double *weightbuf /* weightbuf[n] */)
{
    long i, j, k, ii, ik;
    double t, u, det;
    
    det = 0;
    for (k = 0; k < n; k++) {
	ip[k] = k;
        u = 0.0;
        for (j = 0; j < n; j++) {
            t = FABS(a[k][j]);  if (t > u) u = t;
        }
	spDebug(100, "lu", "weight calculation: k = %ld / %ld, u = %f\n", k, n, u);
	
        if (FABS(u) <= SP_LU_EPSILON) {
            spDebug(10, "lu", "k = %ld / %ld, error: u (%g) is too small in first loop\n", k, n, u);
            goto EXIT;
        }
	weightbuf[k] = 1 / u;
	spDebug(100, "lu", "weight calculation: weightbuf[%ld] = %f\n", k, weightbuf[k]);
    }
    det = 1;
    for (k = 0; k < n; k++) {
        u = -1.0;
        for (i = k; i < n; i++) {
            ii = ip[i];
            t = FABS(a[ii][k]) * weightbuf[ii];
            if (t > u) {  u = t;  j = i;  }
        }
        ik = ip[j];
        if (j != k) {
            ip[j] = ip[k];
	    ip[k] = ik;
            det = -det;
        }
        u = a[ik][k];
	det *= u;
	spDebug(100, "lu", "det calculation: k = %ld / %ld, u = %f, det = %f\n", k, n, u, det);
	
        if (FABS(u) <= SP_LU_EPSILON) {
            spDebug(10, "lu", "k = %ld / %ld, error: u (%g) is too small in second loop\n", k, n, u);
            goto EXIT;
        }
        for (i = k + 1; i < n; i++) {
            ii = ip[i];
            t = (a[ii][k] /= u);
            for (j = k + 1; j < n; j++)
                a[ii][j] -= t * a[ik][j];
        }
    }
  EXIT:
    
    spDebug(80, "lu", "det = %g\n", det);
    return det;
}

static void lusolve(long n, double **a, double *b, long *ip, double *x)
{
    long i, j, ii;
    double t;

    for (i = 0; i < n; i++) {
	if (ip == NULL) {
	    ii = i;
	} else {
	    ii = ip[i];
	}
	t = b[ii];
        for (j = 0; j < i; j++) {
	    t -= a[ii][j] * x[j];
	}
        x[i] = t;
    }
    for (i = n - 1; i >= 0; i--) {
        t = x[i];
	if (ip == NULL) {
	    ii = i;
	} else {
	    ii = ip[i];
	}
        for (j = i + 1; j < n; j++) {
	    t -= a[ii][j] * x[j];
	}
        x[i] = t / a[ii][i];
    }

    return;
}

static spBool lucplx(long n, double **ar, double **ai, long *ip, double *odetr, double *odeti,
                     double *weightbuf /* weightbuf[n] */)
{
    long i, j, k, ii, ik;
    double t, u;
    double tr, ti;
    double ur, ui;
    double vr, vi;
    double detr, deti;
    spBool ok = SP_TRUE;

    detr = deti = 0.0;
    for (k = 0; k < n; k++) {
	ip[k] = k;
        u = 0.0;
        for (j = 0; j < n; j++) {
            t = spPreciseCplxAbs(ar[k][j], ai[k][j]);  if (t > u) u = t;
        }
	spDebug(100, "lucplx", "weight calculation: k = %ld / %ld, u = %f\n", k, n, u);
	
        if (FABS(u) <= SP_LU_EPSILON) {
            spDebug(10, "lucplx", "k = %ld / %ld, error: u (%g) is too small in first loop\n", k, n, u);
	    ok = SP_FALSE;
	    goto EXIT;
	}
	weightbuf[k] = 1 / u;
	spDebug(100, "lucplx", "weight calculation: weightbuf[%ld] = %f\n", k, weightbuf[k]);
    }
    detr = 1.0;
    deti = 0.0;
    for (k = 0; k < n; k++) {
        u = -1.0;
        for (i = k; i < n; i++) {
            ii = ip[i];
            t = spPreciseCplxAbs(ar[ii][k], ai[ii][k]) * weightbuf[ii];
            if (t > u) {  u = t;  j = i;  }
        }
        ik = ip[j];
        if (j != k) {
            ip[j] = ip[k];
	    ip[k] = ik;
            detr = -detr;
            deti = -deti;
        }
        ur = ar[ik][k];
        ui = ai[ik][k];
	tr = detr * ur - deti * ui;
	ti = detr * ui + deti * ur;
	detr = tr;
	deti = ti;
	spDebug(100, "lucplx", "det calculation: k = %ld / %ld, u = %f + %fj, det = %f + %fj\n",
		k, n, ur, ui, detr, deti);
	
        /*if (FABS(ur) <= SP_LU_EPSILON && FABS(ui) <= SP_LU_EPSILON) {*/
        if (spPreciseCplxAbs(ur, ui) <= SP_LU_EPSILON) {
            spDebug(10, "lucplx", "k = %ld / %ld, error: u (%g + %gi) is too small in second loop\n",
                    k, n, ur, ui);
	    ok = SP_FALSE;
	    goto EXIT;
	}
        for (i = k + 1; i < n; i++) {
            ii = ip[i];

	    t = CSQUARE(ur, ui);
	    tr = (ar[ii][k] * ur + ai[ii][k] * ui) / t;
	    ti = (-ar[ii][k] * ui + ai[ii][k] * ur) / t;
	    ar[ii][k] = tr;
	    ai[ii][k] = ti;
	    
            for (j = k + 1; j < n; j++) {
                vr = tr * ar[ik][j] - ti * ai[ik][j];
                vi = tr * ai[ik][j] + ti * ar[ik][j];
		ar[ii][j] -= vr;
		ai[ii][j] -= vi;
	    }
        }
    }
  EXIT:
    
    spDebug(80, "lucplx", "det = %f + %fj\n", detr, deti);
    if (odetr != NULL) *odetr = detr;
    if (odeti != NULL) *odeti = deti;
    
    return ok;
}

static void lusolvecplx(long n, double **ar, double **ai, double *br, double *bi,
			long *ip, double *xr, double *xi)
{
    long i, j, ii;
    double tr, ti;
    double c;

    for (i = 0; i < n; i++) {
	if (ip == NULL) {
	    ii = i;
	} else {
	    ii = ip[i];
	}
	tr = br[ii];
	if (bi != NULL) {
	    ti = bi[ii];
	} else {
	    ti = 0.0;
	}
        if (ai == NULL) {
            for (j = 0; j < i; j++) {
                tr -= ar[ii][j] * xr[j];
                ti -= ar[ii][j] * xi[j];
            }
        } else {
            for (j = 0; j < i; j++) {
                tr -= (ar[ii][j] * xr[j] - ai[ii][j] * xi[j]);
                ti -= (ar[ii][j] * xi[j] + ai[ii][j] * xr[j]);
            }
	}
        xr[i] = tr;
        xi[i] = ti;
    }
    for (i = n - 1; i >= 0; i--) {
        tr = xr[i];
        ti = xi[i];
	if (ip == NULL) {
	    ii = i;
	} else {
	    ii = ip[i];
	}
        if (ai == NULL) {
            for (j = i + 1; j < n; j++) {
                tr -= ar[ii][j] * xr[j];
                ti -= ar[ii][j] * xi[j];
            }
            xr[i] = tr / ar[ii][i];
            xi[i] = ti / ar[ii][i];
        } else {
            for (j = i + 1; j < n; j++) {
                tr -= (ar[ii][j] * xr[j] - ai[ii][j] * xi[j]);
                ti -= (ar[ii][j] * xi[j] + ai[ii][j] * xr[j]);
            }
            c = CSQUARE(ar[ii][i], ai[ii][i]);
            xr[i] = (tr * ar[ii][i] + ti * ai[ii][i]) / c;
            xi[i] = (-tr * ai[ii][i] + ti * ar[ii][i]) / c;
        }
    }

    return;
}

spBool dmlu(spDMatrix A, spLVector *xoindex /* can be NULL, must be freed */, double *det)
{
    long N;
    double ldet;
    spLVector tindex;
    double *weight;
    spBool flag;
    
    if (A == NODATA || A->imag != NULL
	|| A->row != A->col) {
        if (xoindex != NULL) *xoindex = NODATA;
        return SP_FALSE;
    }

    N = A->row;
    spDebug(80, "dmlu", "N = %ld\n", N);

    tindex = xlvalloc(N);
    weight = xalloc(N, double);

    if ((ldet = lu(N, A->data, tindex->data, weight)) == 0.0) {
        spDebug(10, "dmlu", "lu failed: ldet = %f\n", ldet);
	xlvfree(tindex); tindex = NODATA;
	flag = SP_FALSE;
    } else {
	flag = SP_TRUE;
	if (det != NULL) *det = ldet;
    }
    
    xfree(weight);
    
    if (xoindex == NODATA) {
	xlvfree(tindex);
    } else {
	*xoindex = tindex;
    }

    return flag;
}

/* real LU decomposition with buffer (no memory allocation in this function) */
spBool dmluwb(spDMatrix A, spDVector weightbuf/* length: A->row (= A->col) */,
              spLVector index/* length: A->row (= A->col) */, double *det)
{
    long N;
    double ldet;
    spBool flag;
    
    if (A == NODATA || weightbuf == NODATA || index == NODATA
        || A->imag != NULL || A->row != A->col
        || weightbuf->length != A->row || index->length != A->row) return SP_FALSE;

    N = A->row;
    spDebug(80, "dmluwb", "N = %ld\n", N);

    if ((ldet = lu(N, A->data, index->data, weightbuf->data)) == 0.0) {
	flag = SP_FALSE;
    } else {
	flag = SP_TRUE;
	if (det != NULL) *det = ldet;
    }
    
    return flag;
}

spDMatrix xdmlu(spDMatrix A, spLVector *index, double *det)
{
    spDMatrix LU;

    LU = xdmclone(A);

    if (dmlu(LU, index, det) == SP_FALSE) {
	xdmfree(LU);
	return NODATA;
    }

    return LU;
}

spBool dmlucplx(spDMatrix A, spLVector *xoindex, double *detr, double *deti)
{
    long N;
    double ldetr, ldeti;
    spLVector tindex;
    double *weight;
    spBool flag;
    
    if (A == NODATA || A->imag == NULL || A->row != A->col) return SP_FALSE;

    N = A->row;
    spDebug(80, "dmlucplx", "N = %ld\n", N);

    tindex = xlvalloc(N);
    weight = xalloc(N, double);

    if (!lucplx(N, A->data, A->imag, tindex->data, &ldetr, &ldeti, weight)) {
        spDebug(10, "dmlucplx", "lucplx failed: ldet = %f + %fi\n", ldetr, ldeti);
	xlvfree(tindex); tindex = NODATA;
	flag = SP_FALSE;
    } else {
	flag = SP_TRUE;
	if (detr != NULL) *detr = ldetr;
	if (deti != NULL) *deti = ldeti;
    }
    
    xfree(weight);
    
    if (xoindex == NODATA) {
	xlvfree(tindex);
    } else {
	*xoindex = tindex;
    }

    return flag;
}

/* complex LU decomposition with buffer (no memory allocation in this function) */
spBool dmlucplxwb(spDMatrix A, spDVector weightbuf/* length: A->row (= A->col) */,
                  spLVector index/* length: A->row (= A->col) */,
                  double *detr, double *deti)
{
    long N;
    double ldetr, ldeti;
    spBool flag;
    
    if (A == NODATA || weightbuf == NODATA || index == NODATA
        || A->imag == NULL || A->row != A->col
        || weightbuf->length != A->row || index->length != A->row) return SP_FALSE;

    N = A->row;
    spDebug(80, "dmlucplxwb", "N = %ld\n", N);

    if (!lucplx(N, A->data, A->imag, index->data, &ldetr, &ldeti, weightbuf->data)) {
	flag = SP_FALSE;
    } else {
	flag = SP_TRUE;
	if (detr != NULL) *detr = ldetr;
	if (deti != NULL) *deti = ldeti;
    }
    
    return flag;
}

spDMatrix xdmlucplx(spDMatrix A, spLVector *xoindex, double *detr, double *deti)
{
    spDMatrix LU;

    if (A == NODATA || A->imag == NULL || A->row != A->col) return NODATA;
    
    LU = xdmclone(A);

    if (dmlucplx(LU, xoindex, detr, deti) == SP_FALSE) {
	xdmfree(LU);
	return NODATA;
    }

    return LU;
}

spBool dmluupper(spDMatrix U, spDMatrix LU, spLVector index)
{
    long i, j, ip;

    if (U == NODATA || LU == NODATA
	|| U->row != LU->row || U->col != LU->col) return SP_FALSE;

    for (i = 0; i < LU->row; i++) {
	for (j = 0; j < i; j++) {
	    if (j >= LU->col) break;
	    
	    U->data[i][j] = 0.0;
	    if (U->imag != NULL) {
		U->imag[i][j] = 0.0;
	    }
	}
	for (j = i; j < LU->col; j++) {
	    if (index != NODATA) {
		ip = index->data[i];
	    } else {
		ip = i;
	    }
	    U->data[i][j] = LU->data[ip][j];
	    if (U->imag != NULL) {
		if (LU->imag != NULL) {
		    U->imag[i][j] = LU->imag[ip][j];
		} else {
		    U->imag[i][j] = 0.0;
		}
	    }
	}
    }

    return SP_TRUE;
}

spBool dmlulower(spDMatrix L, spDMatrix LU, spLVector index)
{
    long i, j, ip;

    if (L == NODATA || LU == NODATA
	|| L->row != LU->row || L->col != LU->col) return SP_FALSE;

    if (index != NODATA && index->length != LU->row) return SP_FALSE;

    for (i = 0; i < LU->row; i++) {
	for (j = 0; j < i; j++) {
	    if (j >= LU->col) break;

	    if (index != NODATA) {
		ip = index->data[i];
	    } else {
		ip = i;
	    }
	    L->data[i][j] = LU->data[ip][j];
	    if (L->imag != NULL) {
		if (LU->imag != NULL) {
		    L->data[i][j] = LU->imag[ip][j];
		} else {
		    L->data[i][j] = 0.0;
		}
	    }
	}
	if (j < LU->col) {
	    if (L->imag != NULL) {
		L->imag[i][j] = 0.0;
	    }
	    L->data[i][j++] = 1.0;
	    
	    for (; j < LU->col; j++) {
		if (L->imag != NULL) {
		    L->imag[i][j] = 0.0;
		}
		L->data[i][j] = 0.0;
	    }
	}
    }
    
    return SP_TRUE;
}

spBool dmlupermlower(spDMatrix L1, spDMatrix LU, spLVector index)
{
    long i, j, ip;

    if (L1 == NODATA || LU == NODATA
	|| L1->row != LU->row || L1->col != LU->col) return SP_FALSE;

    if (index != NODATA && index->length != LU->row) return SP_FALSE;

    for (i = 0; i < LU->row; i++) {
	for (j = 0; j < LU->col; j++) {
	    if (index != NODATA) {
		ip = index->data[i];
	    } else {
		ip = i;
	    }
	    
	    if (L1->imag != NULL) {
		L1->imag[ip][j] = 0.0;
	    }
	    
	    if (j == i) {
		L1->data[ip][j] = 1.0;
	    } else if (j > i) {
		L1->data[ip][j] = 0.0;
	    } else {
		L1->data[ip][j] = LU->data[ip][j];
		if (L1->imag != NULL && LU->imag != NULL) {
		    L1->imag[ip][j] = LU->imag[ip][j];
		}
	    }
	}
    }
    
    return SP_TRUE;
}

spDMatrix xdmluupper(spDMatrix LU, spLVector index)
{
    spDMatrix U;

    if (LU == NODATA) return NODATA;

    if (LU->imag != NULL) {
	U = xdmrialloc(LU->row, LU->col);
    } else {
	U = xdmalloc(LU->row, LU->col);
    }
    dmluupper(U, LU, index);

    return U;
}

spDMatrix xdmlulower(spDMatrix LU, spLVector index)
{
    spDMatrix L;

    if (LU == NODATA) return NODATA;

    if (index != NODATA && index->length != LU->row) return NODATA;

    if (LU->imag != NULL) {
	L = xdmrialloc(LU->row, LU->col);
    } else {
	L = xdmalloc(LU->row, LU->col);
    }
    dmlulower(L, LU, index);
    
    return L;
}

spDMatrix xdmlupermlower(spDMatrix LU, spLVector index)
{
    spDMatrix L;

    if (LU == NODATA) return NODATA;

    if (index != NODATA && index->length != LU->row) return NODATA;

    if (LU->imag != NULL) {
	L = xdmrialloc(LU->row, LU->col);
    } else {
	L = xdmalloc(LU->row, LU->col);
    }
    dmlupermlower(L, LU, index);
    
    return L;
}

spDMatrix xdmludiag(spDMatrix LU, spLVector index)
{
    long i, j, ip;
    spDMatrix D;

    if (LU == NODATA) return NODATA;

    if (index != NODATA && index->length != LU->row) return NODATA;

    if (LU->imag != NULL) {
	D = xdmrialloc(LU->row, LU->col);
    } else {
	D = xdmalloc(LU->row, LU->col);
    }
    
    for (i = 0; i < LU->row; i++) {
	for (j = 0; j < LU->col; j++) {
	    if (i == j) {
		if (index != NODATA) {
		    ip = index->data[i];
		} else {
		    ip = i;
		}
		D->data[i][j] = LU->data[ip][j];
		if (D->imag != NULL) {
		    if (LU->imag != NULL) {
			D->imag[i][j] = LU->imag[ip][j];
		    } else {
			D->imag[i][j] = 0.0;
		    }
		}
	    } else {
		D->data[i][j] = 0.0;
		if (D->imag != NULL) {
		    D->imag[i][j] = 0.0;
		}
	    }
	}
    }
    
    return D;
}

static void dmluinvcore(spDMatrix Ainv, spDMatrix LU, spLVector index)
{
    long i, ip, j, k;
    long N;
    double t;
    double tr, ti, c;
    
    N = LU->row;
    
    for (k = 0; k < N; k++) {
	for (i = 0; i < N; i++) {
	    if (index != NODATA) {
		ip = index->data[i];
	    } else {
		ip = i;
	    }
	    if (Ainv->imag != NULL) {
		tr = (ip == k ? 1.0 : 0.0);
		ti = 0.0;
		for (j = 0; j < i; j++) {
		    tr -= (LU->data[ip][j] * Ainv->data[j][k] - LU->imag[ip][j] * Ainv->imag[j][k]);
		    ti -= (LU->data[ip][j] * Ainv->imag[j][k] + LU->imag[ip][j] * Ainv->data[j][k]);
		}
		Ainv->data[i][k] = tr;
		Ainv->imag[i][k] = ti;
	    } else {
		t = (ip == k ? 1.0 : 0.0);
		for (j = 0; j < i; j++) {
		    t -= LU->data[ip][j] * Ainv->data[j][k];
		}
		Ainv->data[i][k] = t;
	    }
	}
	for (i = N - 1; i >= 0; i--) {
	    if (index != NODATA) {
		ip = index->data[i];
	    } else {
		ip = i;
	    }
	    if (Ainv->imag != NULL) {
		tr = Ainv->data[i][k];
		ti = Ainv->imag[i][k];
		for (j = i + 1; j < N; j++) {
		    tr -= (LU->data[ip][j] * Ainv->data[j][k] - LU->imag[ip][j] * Ainv->imag[j][k]);
		    ti -= (LU->data[ip][j] * Ainv->imag[j][k] + LU->imag[ip][j] * Ainv->data[j][k]);
		}
		c = CSQUARE(LU->data[ip][i], LU->imag[ip][i]);
		Ainv->data[i][k] = (tr * LU->data[ip][i] + ti * LU->imag[ip][i]) / c;
		Ainv->imag[i][k] = (-tr * LU->imag[ip][i] + ti * LU->data[ip][i]) / c;
	    } else {
		t = Ainv->data[i][k];
		for (j = i + 1; j < N; j++) {
		    t -= LU->data[ip][j] * Ainv->data[j][k];
		}
		Ainv->data[i][k] = t / LU->data[ip][i];
	    }
	}
    }

    return;
}

spBool dmluinv(spDMatrix Ainv, spDMatrix LU, spLVector index)
{
    if (LU == NODATA || Ainv == NODATA
	|| LU->row != LU->col
        || Ainv->row != LU->row
        || Ainv->row != Ainv->col) return SP_FALSE;
    
    if (index != NODATA && index->length != LU->row) return SP_FALSE;

    if (LU->imag != NULL && Ainv->imag == NULL) {
	dmialloc(Ainv);
    }

    dmluinvcore(Ainv, LU, index);

    return SP_TRUE;
}

spDMatrix xdmluinv(spDMatrix LU, spLVector index)
{
    spDMatrix Ainv;
    
    if (LU == NODATA
	|| LU->row != LU->col) return NODATA;
    
    if (index != NODATA && index->length != LU->row) return NODATA;
    
    if (LU->imag != NULL) {
	Ainv = xdmrialloc(LU->row, LU->row);
    } else {
	Ainv = xdmalloc(LU->row, LU->row);
    }

    dmluinvcore(Ainv, LU, index);
    
    return Ainv;
}

static void dvlusolvecore(spDVector x, spDMatrix LU, spLVector index, spDVector b)
{
    long N;
    
    N = LU->row;

    if (LU->imag != NULL || b->imag != NULL) {
	lusolvecplx(N, LU->data, LU->imag, b->data, b->imag,
                    index == NODATA ? NULL : index->data, x->data, x->imag);
    } else {
	lusolve(N, LU->data, b->data, index == NODATA ? NULL : index->data, x->data);
    }

    return;
}

spBool dvlusolve(spDVector x, spDMatrix LU, spLVector index, spDVector b)
{
    if (LU == NODATA || b == NODATA || x == NODATA
	|| LU->row != LU->col || LU->col != b->length
        || b->length != x->length) return SP_FALSE;
    
    if (index != NODATA && index->length != LU->row) return SP_FALSE;
    
    if ((LU->imag != NULL || b->imag != NULL) && x->imag == NULL) {
	dvialloc(x);
    }
    dvlusolvecore(x, LU, index, b);

    return SP_TRUE;
}

spDVector xdvlusolve(spDMatrix LU, spLVector index, spDVector b)
{
    spDVector x;
    
    if (LU == NODATA || b == NODATA
	|| LU->row != LU->col || LU->col != b->length) return NODATA;
    
    if (index != NODATA && index->length != LU->row) return NODATA;

    if (LU->imag != NULL || b->imag != NULL) {
	x = xdvrialloc(LU->row);
    } else {
	x = xdvalloc(LU->row);
    }
    dvlusolvecore(x, LU, index, b);

    return x;
}

spDMatrix xdminv(spDMatrix A)
{
    spDMatrix LU;
    spDMatrix Ainv;
    spLVector index;

    if (A == NODATA || A->row != A->col) return NODATA;

    if (A->imag != NULL) {
	LU = xdmlucplx(A, &index, NULL, NULL);
    } else {
	LU = xdmlu(A, &index, NULL);
    }
    
    if (LU == NODATA) return NODATA;
    
    Ainv = xdmluinv(LU, index);

    xdmfree(LU);
    xlvfree(index);

    return Ainv;
}

spDVector xdvmldivide(spDMatrix A, spDVector b)
{
    spDMatrix LU;
    spLVector index;
    spDVector x = NODATA;

    if (A == NODATA || A->row != A->col) return NODATA;

    if (A->imag != NULL) {
	LU = xdmlucplx(A, &index, NULL, NULL);
    } else {
	LU = xdmlu(A, &index, NULL);
    }
    
    if (LU != NODATA) {
	x = xdvlusolve(LU, index, b);
    
	xdmfree(LU);
	xlvfree(index);
    } else {
        spDebug(10, "xdvmldivide", "xdmlu return NULL\n");
    }

    return x;
}

static void dmtimescore(spDMatrix C, spDMatrix A, spDMatrix B)
{
    long i, j, k;

#if 0
    for (i = 0; i < A->row; i++) {
	for (j = 0; j < B->col; j++) {
	    C->data[i][j] = 0.0;
	    if (C->imag != NULL) {
		C->imag[i][j] = 0.0;
	    }
	    for (k = 0; k < A->col; k++) {
		C->data[i][j] += A->data[i][k] * B->data[k][j];
		if (C->imag != NULL) {
		    if (A->imag != NULL && B->imag != NULL) {
			C->data[i][j] -= A->imag[i][k] * B->imag[k][j];
		    }
		    if (A->imag != NULL) {
			C->imag[i][j] += A->imag[i][k] * B->data[k][j];
		    }
		    if (B->imag != NULL) {
			C->imag[i][j] += A->data[i][k] * B->imag[k][j];
		    }
		}
	    }
	}
    }
#else
    for (i = 0; i < A->row; i++) {
	for (k = 0; k < A->col; k++) {
	    for (j = 0; j < B->col; j++) {
		if (k == 0) {
		    C->data[i][j] = 0.0;
		    if (C->imag != NULL) {
			C->imag[i][j] = 0.0;
		    }
		}
		
		C->data[i][j] += A->data[i][k] * B->data[k][j];
		if (C->imag != NULL) {
		    if (A->imag != NULL && B->imag != NULL) {
			C->data[i][j] -= A->imag[i][k] * B->imag[k][j];
		    }
		    if (A->imag != NULL) {
			C->imag[i][j] += A->imag[i][k] * B->data[k][j];
		    }
		    if (B->imag != NULL) {
			C->imag[i][j] += A->data[i][k] * B->imag[k][j];
		    }
		}
	    }
	}
    }
#endif
    
    return;
}

spBool dmtimes(spDMatrix C/* size: (A->row, A->col) */, spDMatrix A, spDMatrix B)
{
    if (A == NODATA || B == NODATA || C == NODATA
	|| A->col != B->row
        || C->row != A->row || C->col != B->col) return SP_FALSE;

    if ((A->imag != NULL || B->imag != NULL) && C->imag == NULL) {
	dmialloc(C);
    }

    dmtimescore(C, A, B);
    
    return SP_TRUE;
}

spDMatrix xdmtimes(spDMatrix A, spDMatrix B)
{
    spDMatrix C;
    
    if (A == NODATA || B == NODATA
	|| A->col != B->row) return NODATA;

    C = xdmalloc(A->row, B->col);
    if (A->imag != NULL || B->imag != NULL) {
	dmialloc(C);
    }

    dmtimescore(C, A, B);
    
    return C;
}

static void dvmvtimescore(spDVector y, spDMatrix A, spDVector x/* column vector */)
{
    long i, j;
    long row, col;

    row = MIN(A->row, y->length);
    col = MIN(A->col, x->length);

    for (i = 0; i < row; i++) {
	y->data[i] = 0.0;
	if (y->imag != NULL) {
	    y->imag[i] = 0.0;
	}
	for (j = 0; j < col; j++) {
	    y->data[i] += A->data[i][j] * x->data[j];
	    if (y->imag != NULL) {
		if (A->imag != NULL && x->imag != NULL) {
		    y->data[i] -= A->imag[i][j] * x->imag[j];
		}
		if (A->imag != NULL) {
		    y->imag[i] += A->imag[i][j] * x->data[j];
		}
		if (x->imag != NULL) {
		    y->imag[i] += A->data[i][j] * x->imag[j];
		}
	    }
	}
    }
    
    return;
}

spBool dvmvtimes(spDVector y, spDMatrix A, spDVector x/* column vector */)
{
    if (A == NODATA || x == NODATA || y == NODATA
	/*|| A->col != x->length*/ /*|| A->row != y->length*/) return SP_FALSE;

    if ((A->imag != NULL || x->imag != NULL) && y->imag == NULL) {
	dvialloc(y);
    }
    
    dvmvtimescore(y, A, x);
    
    return SP_TRUE;
}

spDVector xdvmvtimes(spDMatrix A, spDVector x/* column vector */)
{
    spDVector y;

    if (A == NODATA || x == NODATA
	/*|| A->col != x->length*/) return NODATA;

    /*y = xdvalloc(MIN(A->row, x->length));*/
    y = xdvalloc(A->row);
    if (A->imag != NULL || x->imag != NULL) {
	dvialloc(y);
    }
    
    dvmvtimescore(y, A, x);
    
    return y;
}

static void dvvmtimescore(spDVector y/* row vector */, spDVector x/* row vector */, spDMatrix A)
{
    long i, j;
    long row, col;

    col = MIN(A->col, y->length);
    row = MIN(A->row, x->length);

    for (j = 0; j < col; j++) {
	y->data[j] = 0.0;
	if (y->imag != NULL) {
	    y->imag[j] = 0.0;
	}
        for (i = 0; i < row; i++) {
            y->data[j] += x->data[i] * A->data[i][j];
            if (y->imag != NULL) {
                if (A->imag != NULL && x->imag != NULL) {
                    y->data[j] -= x->imag[i] * A->imag[i][j];
                }
                if (A->imag != NULL) {
                    y->imag[j] += x->data[i] * A->imag[i][j];
                }
                if (x->imag != NULL) {
                    y->imag[j] += x->imag[i] * A->data[i][j];
                }
            }
        }
    }
    
    return;
}

spBool dvvmtimes(spDVector y/* row vector */, spDVector x/* row vector */, spDMatrix A)
{
    if (x == NODATA || A == NODATA || y == NODATA
	/*|| A->row != x->length*/ /*|| A->col != y->length*/) return SP_FALSE;

    if ((A->imag != NULL || x->imag != NULL) && y->imag == NULL) {
	dvialloc(y);
    }
    
    dvvmtimescore(y, x, A);
    
    return SP_TRUE;
}

spDVector xdvvmtimes(spDVector x/* row vector */, spDMatrix A)
{
    spDVector y;

    if (x == NODATA || A == NODATA
	/*|| A->row != x->length*/) return NODATA;

    y = xdvalloc(MIN(A->row, x->length));
    if (A->imag != NULL || x->imag != NULL) {
	dvialloc(y);
    }
    
    dvvmtimescore(y, x, A);
    
    return y;
}

static void dmvvtimescore(spDMatrix C, spDVector a/* column vector */, spDVector b/* row vector */)
{
    long i, j;
    
    for (i = 0; i < a->length; i++) {
	for (j = 0; j < b->length; j++) {
	    C->data[i][j] = a->data[i] * b->data[j];
	    if (C->imag != NULL) {
		C->imag[i][j] = 0.0;
		if (a->imag != NULL && b->imag != NULL) {
		    C->data[i][j] -= a->imag[i] * b->imag[j];
		}
		if (a->imag != NULL) {
		    C->imag[i][j] += a->imag[i] * b->data[j];
		}
		if (b->imag != NULL) {
		    C->imag[i][j] += a->data[i] * b->imag[j];
		}
	    }
	}
    }
    
    return;
}

spBool dmvvtimes(spDMatrix C/* size: (a->length, b->length) */, spDVector a/* column vector */, spDVector b/* row vector */)
{
    if (a == NODATA || b == NODATA || C == NODATA
	|| a->length != b->length
        || C->row /*!=*/< a->length || C->col /*!=*/< b->length) return SP_FALSE;

    if ((a->imag != NULL || b->imag != NULL) && C->imag == NULL) {
	dmialloc(C);
    }

    dmvvtimescore(C, a, b);
    
    return SP_TRUE;
}

spDMatrix xdmvvtimes(spDVector a/* column vector */, spDVector b/* row vector */)
{
    spDMatrix C;
    
    if (a == NODATA || b == NODATA
	|| a->length != b->length) return NODATA;

    C = xdmalloc(a->length, b->length);
    if (a->imag != NULL || b->imag != NULL) {
	dmialloc(C);
    }

    dmvvtimescore(C, a, b);
    
    return C;
}

double dmfrobnorm(spDMatrix A)
{
    long i;
    double sqsum, sqsumtotal;
    spDVector a_i;

    if (A->imag != NULL) {
	a_i = xdvrialloc(A->col);
    } else {
	a_i = xdvalloc(A->col);
    }
    
    sqsumtotal = 0.0;
    for (i = 0; i < A->row; i++) {
	dmextractrow(A, i, a_i);
	
	sqsum = dvsqsum(a_i);

        sqsumtotal += sqsum;
    }

    xdvfree(a_i);

    return sqrt(sqsumtotal);
}

double dminftynorm(spDMatrix A)
{
    long i;
    double asum, asummax;
    spDVector a_i;

    if (A->imag != NULL) {
	a_i = xdvrialloc(A->col);
    } else {
	a_i = xdvalloc(A->col);
    }
    
    asummax = 0.0;
    for (i = 0; i < A->row; i++) {
	dmextractrow(A, i, a_i);
	
	asum = dvabssum(a_i);

	if (asum > asummax) {
	    asummax = asum;
	}
	
    }

    xdvfree(a_i);

    return asummax;
}

double dmonenorm(spDMatrix A)
{
    long j;
    double asum, asummax;
    spDVector a_j;

    if (A->imag != NULL) {
	a_j = xdvrialloc(A->row);
    } else {
	a_j = xdvalloc(A->row);
    }
    
    asummax = 0.0;
    for (j = 0; j < A->col; j++) {
	dmextractcol(A, j, a_j);
	
	asum = dvabssum(a_j);

	if (asum > asummax) {
	    asummax = asum;
	}
	
    }

    xdvfree(a_j);

    return asummax;
}

double dmtwonorm(spDMatrix A)
{
    double nrm = -1.0;
    spDMatrix B;
    
    if ((B = xdmsvd(A, 0, 0.0, NULL, NULL)) != NODATA) {
        nrm = B->data[0][0];
        xdmfree(B);
    }

    return nrm;
}

/* p == -1: infinity norm, p == 0: frobenius norm, p == 1: 1-norm, p == 2: 2-norm */
double dmnorm(spDMatrix A, long p)
{
    if (p == -1) {
        return dminftynorm(A);
    } else if (p == 0) {
        return dmfrobnorm(A);
    } else if (p == 1) {
        return dmonenorm(A);
    } else if (p == 2) {
        return dmtwonorm(A);
    } else {
        return -1.0;
    }
}

double dmcondp(spDMatrix A, long p)
{
    long minrowcol;
    double cn;
    spDMatrix Ainv;

    if (p == 2) {
        double minsvd;
        spDMatrix S;
        
        if ((S = xdmsvd(A, 0, 0.0, NULL, NULL)) == NODATA) {
            spDebug(10, "dmcondp", "xdmsvd failed\n");
            return 1.0 / SP_EPSILON;
        }
        //dmfdump(S, stderr);

        minrowcol = MIN(S->row, S->col);

        minsvd = S->data[minrowcol - 1][minrowcol - 1];
        spDebug(80, "dmcondp", "minsvd = %g, S->data[0][0] = %g\n", minsvd, S->data[0][0]);

        if (minsvd == 0.0) {
            cn = 1.0 / SP_EPSILON;
        } else {
            cn = S->data[0][0] / minsvd;
        }
        spDebug(80, "dmcondp", "cn = %g\n", cn);
        
        xdmfree(S);
    } else {
        if ((Ainv = xdminv(A)) == NODATA) {
            return 1.0 / SP_EPSILON;
        }

        cn = dmnorm(A, p) * dmnorm(Ainv, p);

        xdmfree(Ainv);
    }

    return cn;
}

double dmcond(spDMatrix A)
{
    return dmcondp(A, 2);
}

double dmrank(spDMatrix A, double tolerance)
{
    double r;
    long k, p;
    spDMatrix S;

    if ((S = xdmsvd(A, 0, 0.0, NULL, NULL)) == NODATA) {
        return -1.0;
    }
    
    if (tolerance <= 0.0) {
        tolerance = (double)MAX(A->row, A->col) * SP_EPSILON;
    }

    p = MIN(S->row, S->col);

    for (k = 0, r = 0.0; k < p; k++) {
        if (S->data[k][k] <= tolerance) {
            break;
        }
        r += S->data[k][k];
    }

    xdmfree(S);

    return r;
}

spDMatrix xdmcompan(spDVector u, double tolerance,
                  long *onum_leading_zero /* can be NULL */, long *onum_trailing_zero /* can be NULL */)
{
    long k;
    long i, j;
    long offset, length;
    double amp;
    double den;
    spDMatrix C;

    if (tolerance <= 0.0) {
        tolerance = SP_EPSILON;
    }

    offset = length = -1;
    
    for (k = 0; k < u->length; k++) {
        if (u->imag != NULL) {
            amp = CABS(u->data[k], u->imag[k]);
        } else {
            amp = FABS(u->data[k]);
        }
        if (amp >= tolerance) {
            offset = k;
            break;
        }
    }
    if (offset >= 0) {
        for (k = u->length - 1; k >= offset; k--) {
            if (u->imag != NULL) {
                amp = CABS(u->data[k], u->imag[k]);
            } else {
                amp = FABS(u->data[k]);
            }
            if (amp >= tolerance) {
                length = k + 1 - offset;
                break;
            }
        }
    }
    spDebug(80, "xdmcompan", "offset = %ld, length = %ld\n", offset, length);
    
    if (offset < 0 || length < 0) {
        if (onum_leading_zero != NULL) *onum_leading_zero = u->length;
        if (onum_trailing_zero != NULL) *onum_trailing_zero = u->length;
        return NODATA;
    }

    if (length <= 1) {
        C = xdmzeros(1, 1);
    } else {
        if (u->imag != NULL) {
            C = xdmrizeros(length - 1, length - 1);
            den = u->data[offset] * u->data[offset] + u->imag[offset] * u->imag[offset];
        } else {
            C = xdmzeros(length - 1, length - 1);
            den = u->data[offset];
        }
        for (j = 1; j < length; j++) {
            if (u->imag != NULL) {
                C->data[0][j - 1] = -(u->data[offset + j] * u->data[offset] + u->imag[offset + j] * u->imag[offset]) / den;
                C->imag[0][j - 1] = -(u->imag[offset + j] * u->data[offset] - u->data[offset + j] * u->imag[offset]) / den;
            } else {
                C->data[0][j - 1] = -u->data[offset + j] / den;
            }
        }
        for (i = 1; i < C->row; i++) {
            C->data[i][i - 1] = 1.0;
        }
    }

    if (onum_leading_zero != NULL) *onum_leading_zero = offset;
    if (onum_trailing_zero != NULL) *onum_trailing_zero = u->length - (offset + length);
    
    return C;
}

spDVector xdvrootscompan(spDMatrix C /* must be companion matrix */, long num_leading_zero, long num_trailing_zero)
{
    spDVector r;
    long length;

    if ((r = xdmeigdsqr(C, 1000, 0.0, NULL)) == NODATA) {
        return NODATA;
    }

    length = r->length;
    
    if (num_leading_zero > 0) length += num_leading_zero;
    if (num_trailing_zero > 0) length += num_trailing_zero;

    if (length > r->length) {
        r = xdvrealloc(r, length);
    }

    return r;
}

spDVector xdvroots(spDVector u, double tolerance)
{
    long num_leading_zero, num_trailing_zero;
    spDVector r;
    spDMatrix C;

    if ((C = xdmcompan(u, tolerance, &num_leading_zero, &num_trailing_zero)) == NODATA) {
        return NODATA;
    }
    
    dmbalance(C, NODATA);
    
    r = xdvrootscompan(C, num_leading_zero, num_trailing_zero);
    xdmfree(C);

    //dvdump(r);

    return r;
}
