/** * Loads the correlation/covariance matrix from a file. * * @throws IOException if a problem occurs */ protected void loadMatrix() throws IOException { File f = new File(m_pathToMatrix); if (!f.exists()) { throw new IOException("The matrix file '" + m_pathToMatrix + "' does not seem to exist on the file system!"); } BufferedReader br = null; try { br = new BufferedReader(new FileReader(m_pathToMatrix)); try { m_matrix = new Matrix(br); } catch (Exception e) { throw new IOException(e); } br.close(); br = null; } finally { if (br != null) { br.close(); } } }
/** * Classifies a given instance. * * @param inst the instance to be classified * @return the classification * @throws Exception if instance could not be classified successfully */ @Override public double classifyInstance(Instance inst) throws Exception { // Filter instance inst = filterInstance(inst); // Build K vector Matrix k = new Matrix(m_NumTrain, 1); for (int i = 0; i < m_NumTrain; i++) { k.set(i, 0, m_kernel.eval(-1, i, inst)); } double result = k.transpose().times(m_t).get(0, 0) + m_avg_target; result = (result - m_Blin) / m_Alin; return result; }
/** * Computes standard deviation for given instance, without transforming target * back into original space. */ protected double computeStdDev(Instance inst, Matrix k) throws Exception { double kappa = m_kernel.eval(-1, -1, inst) + m_deltaSquared; double s = 0; int n = m_L.length; for (int i = 0; i < n; i++) { double t = 0; for (int j = 0; j < n; j++) { t -= k.get(j, 0) * (i > j ? m_L[i][j] : m_L[j][i]); } s += t * k.get(i, 0); } double sigma = m_delta; if (kappa > s) { sigma = Math.sqrt(kappa - s); } return sigma; }
/** * Returns natural logarithm of density estimate for given value based on * given instance. * * @param instance the instance to make the prediction for. * @param value the value to make the prediction for. * @return the natural logarithm of the density estimate * @exception Exception if the density cannot be computed */ @Override public double logDensity(Instance inst, double value) throws Exception { inst = filterInstance(inst); // Build K vector (and Kappa) Matrix k = new Matrix(m_NumTrain, 1); for (int i = 0; i < m_NumTrain; i++) { k.set(i, 0, m_kernel.eval(-1, i, inst)); } double estimate = k.transpose().times(m_t).get(0, 0) + m_avg_target; double sigma = computeStdDev(inst, k); // transform to GP space value = value * m_Alin + m_Blin; // center around estimate value = value - estimate; double z = -Math.log(sigma * Math.sqrt(2 * Math.PI)) - value * value / (2.0 * sigma * sigma); return z + Math.log(m_Alin); }
/** * Classifies a given instance. * * @param inst * the instance to be classified * @return the classification * @throws Exception * if instance could not be classified successfully */ public double classifyInstance(Instance inst) throws Exception { // Filter instance inst = filterInstance(inst); // Build K vector Matrix k = new Matrix(m_NumTrain, 1); for (int i = 0; i < m_NumTrain; i++) { k.set(i, 0, m_kernel.eval(-1, i, inst)); } double result = k.transpose().times(m_t).get(0, 0) + m_avg_target; result = (result - m_Blin) / m_Alin; return result; }
/** * Computes standard deviation for given instance, without * transforming target back into original space. */ protected double computeStdDev(Instance inst, Matrix k) throws Exception { double kappa = m_kernel.eval(-1, -1, inst) + m_delta * m_delta; double s = 0; int n = m_L.length; for (int i = 0; i < n; i++) { double t = 0; for (int j = 0; j < n; j++) { t -= k.get(j,0) * (i>j? m_L[i][j] : m_L[j][i]); } s += t * k.get(i,0); } double sigma = m_delta; if (kappa > s) { sigma = Math.sqrt(kappa - s); } return sigma; }
/** * Returns natural logarithm of density estimate for given value based on given instance. * * @param instance the instance to make the prediction for. * @param value the value to make the prediction for. * @return the natural logarithm of the density estimate * @exception Exception if the density cannot be computed */ public double logDensity(Instance inst, double value) throws Exception { inst = filterInstance(inst); // Build K vector (and Kappa) Matrix k = new Matrix(m_NumTrain, 1); for (int i = 0; i < m_NumTrain; i++) { k.set(i, 0, m_kernel.eval(-1, i, inst)); } double estimate = k.transpose().times(m_t).get(0, 0) + m_avg_target; double sigma = computeStdDev(inst, k); // transform to GP space value = value * m_Alin + m_Blin; // center around estimate value = value - estimate; double z = -Math.log(sigma * Math.sqrt(2 * Math.PI)) - value * value /(2.0*sigma*sigma); return z + Math.log(m_Alin); }
/** Creates a default PrecomputedKernelMatrixKernell */ public Kernel getKernel() { PrecomputedKernelMatrixKernel pc = new PrecomputedKernelMatrixKernel(); // load kernel matrix try { pc.setKernelMatrix( new Matrix( new InputStreamReader(ClassLoader.getSystemResourceAsStream( "weka/classifiers/data/test.matrix")))); } catch (Exception e) { e.printStackTrace(); } return pc; }
/** * Compute the product among the matrix and the vector * * @param current * The matrix. * @param vectorX * The vector. */ public void productVector(Matrix current, double[] vectorX) { for (int m = 0; m < vectorX.length; m++) { for (int nn = 0; nn < vectorX.length; nn++) { current.set(m, nn, vectorX[m] * vectorX[nn]); } } }
public static Image getHeatMapForMatrix(Matrix matrix, List<String> rowAttNames) { double[][] m = matrix.getArray(); // generate the heat map // need to reverse the order of the rows double[][] mm = new double[m.length][]; for (int i = 0; i < m.length; i++) { mm[m.length - 1 - i] = m[i]; } String[] xLabels = new String[rowAttNames.size()]; String[] yLabels = new String[rowAttNames.size()]; for (int i = 0; i < rowAttNames.size(); i++) { xLabels[i] = rowAttNames.get(i); yLabels[rowAttNames.size() - 1 - i] = rowAttNames.get(i); } HeatChart map = new HeatChart(mm, true); map.setTitle("Correlation matrix heat map"); map.setCellSize(new java.awt.Dimension(30, 30)); map.setHighValueColour(java.awt.Color.RED); map.setLowValueColour(java.awt.Color.BLUE); map.setXValues(xLabels); map.setYValues(yLabels); return map.getChartImage(); }
/** * Generates a heat map from a matrix of correlations * * @param matrix a Matrix (expected to hold correlation values between -1 and * 1) * @param rowAttNames a list of labels for the columns/rows * @return an Image holding the heat map */ public static Image getHeatMapForMatrix(Matrix matrix, List<String> rowAttNames) { double[][] m = matrix.getArray(); // generate the heat map // need to reverse the order of the rows double[][] mm = new double[m.length][]; for (int i = 0; i < m.length; i++) { mm[m.length - 1 - i] = m[i]; } String[] xLabels = new String[rowAttNames.size()]; String[] yLabels = new String[rowAttNames.size()]; for (int i = 0; i < rowAttNames.size(); i++) { xLabels[i] = rowAttNames.get(i); yLabels[rowAttNames.size() - 1 - i] = rowAttNames.get(i); } HeatChart map = new HeatChart(mm, true); map.setTitle("Correlation matrix heat map"); map.setCellSize(new java.awt.Dimension(30, 30)); map.setHighValueColour(java.awt.Color.RED); map.setLowValueColour(java.awt.Color.BLUE); map.setXValues(xLabels); map.setYValues(yLabels); return map.getChartImage(); }
/** * Returns the standard errors of slope and intercept for a simple linear * regression model: y = a + bx. The first element is the standard error of * slope, the second element is standard error of intercept. * * @param data (the data set) * @param chosen (chosen x-attribute) * @param slope (slope determined by simple linear regression model) * @param intercept (intercept determined by simple linear regression model) * @param df (number of instances - 2) * * @return array of standard errors of slope and intercept * @throws Exception if there is a missing class value in data */ public static double[] calculateStdErrorOfCoef(Instances data, Attribute chosen, double slope, double intercept, int df) throws Exception { // calculate sum of squared residuals, mean squared error double ssr = calculateSSR(data, chosen, slope, intercept); double mse = ssr / df; /* * put data into 2-D array with 2 columns first column is value of chosen * attribute second column is constant (1's) */ double[][] array = new double[data.numInstances()][2]; for (int i = 0; i < data.numInstances(); i++) { array[i][0] = data.instance(i).value(chosen); array[i][1] = 1.0; } /* * linear algebra calculation: covariance matrix = mse * (XtX)^-1 diagonal * of covariance matrix is square of standard error of coefficients */ Matrix X = new Matrix(array); Matrix Xt = X.transpose(); Matrix XtX = Xt.times(X); Matrix inverse = XtX.inverse(); Matrix cov = inverse.times(mse); double[] result = new double[2]; for (int i = 0; i < 2; i++) { result[i] = Math.sqrt(cov.get(i, i)); } return result; }
/** * Returns an array of the standard errors of the coefficients in a multiple * linear regression. The last element in the array is the standard error of * the constant coefficient. The standard error array is used to calculate the * t-statistics. * * @param data (the data set * @param selected (flags indicating variables used in the regression) * @param ssr (sum of squared residuals) * @param n (number of instances) * @param k (number of coefficients; includes constant) * * @return array of standard errors of coefficients * @throws Exception if there is a missing class value in data */ public static double[] calculateStdErrorOfCoef(Instances data, boolean[] selected, double ssr, int n, int k) throws Exception { // Construct a matrix to hold X variables double[][] array = new double[n][k]; // put data into 2-D array format int column = 0; for (int j = 0; j < data.numAttributes(); j++) { if ((data.classIndex() != j) && (selected[j])) { for (int i = 0; i < n; i++) { array[i][column] = data.instance(i).value(j); } column++; } } // last column in array is constant (1's) for (int i = 0; i < n; i++) { array[i][k - 1] = 1.0; } /* * linear algebra calculation: covariance matrix = mse * (XtX)^-1 diagonal * of covariance matrix is square of standard error of coefficients */ Matrix X = new Matrix(array); Matrix Xt = X.transpose(); Matrix XtX = Xt.times(X); Matrix inverse = XtX.inverse(); double mse = ssr / (n - k); Matrix cov = inverse.times(mse); double[] result = new double[k]; for (int i = 0; i < k; i++) { result[i] = Math.sqrt(cov.get(i, i)); } return result; }
/** * Gives standard deviation of the prediction at the given instance. * * @param inst the instance to get the standard deviation for * @return the standard deviation * @throws Exception if computation fails */ public double getStandardDeviation(Instance inst) throws Exception { inst = filterInstance(inst); // Build K vector (and Kappa) Matrix k = new Matrix(m_NumTrain, 1); for (int i = 0; i < m_NumTrain; i++) { k.set(i, 0, m_kernel.eval(-1, i, inst)); } return computeStdDev(inst, k) / m_Alin; }
/** Calculate covariance and value means */ private void calculateCovariance() { double sumValues = 0, sumConds = 0; for (int i = 0; i < m_Values.size(); i++) { sumValues += m_Values.elementAt(i).doubleValue() * m_Weights.elementAt(i).doubleValue(); sumConds += m_CondValues.elementAt(i).doubleValue() * m_Weights.elementAt(i).doubleValue(); } m_ValueMean = sumValues / m_SumOfWeights; m_CondMean = sumConds / m_SumOfWeights; double c00 = 0, c01 = 0, c10 = 0, c11 = 0; for (int i = 0; i < m_Values.size(); i++) { double x = m_Values.elementAt(i).doubleValue(); double y = m_CondValues.elementAt(i).doubleValue(); double weight = m_Weights.elementAt(i).doubleValue(); c00 += (x - m_ValueMean) * (x - m_ValueMean) * weight; c01 += (x - m_ValueMean) * (y - m_CondMean) * weight; c11 += (y - m_CondMean) * (y - m_CondMean) * weight; } c00 /= (m_SumOfWeights - 1.0); c01 /= (m_SumOfWeights - 1.0); c10 = c01; c11 /= (m_SumOfWeights - 1.0); m_Covariance = new Matrix(2, 2); m_Covariance.set(0, 0, c00); m_Covariance.set(0, 1, c01); m_Covariance.set(1, 0, c10); m_Covariance.set(1, 1, c11); }
/** * Returns value for normal kernel * * @param x the argument to the kernel function * @param variance the variance * @return the value for a normal kernel */ private double normalKernel(double x) { Matrix thisPoint = new Matrix(1, 2); thisPoint.set(0, 0, x); thisPoint.set(0, 1, m_ConstDelta); return Math.exp(-thisPoint.times(m_CovarianceInverse). times(thisPoint.transpose()).get(0, 0) / 2) / (Math.sqrt(TWO_PI) * m_Determinant); }
/** * * @see weka.estimators.MultivariateEstimator#estimate(double[][], double[]) */ @Override public void estimate(double[][] observations, double[] weights) { double[] means; double[][] cov; if (weights != null) { double sum = 0; for (double weight : weights) { if (Double.isNaN(weight) || Double.isInfinite(weight)) { throw new IllegalArgumentException( "Invalid numbers in the weight vector"); } sum += weight; } if (Math.abs(sum - 1.0) > 1e-10) { throw new IllegalArgumentException("Weights do not sum to one"); } means = weightedMean(observations, weights, 0); cov = weightedCovariance(observations, weights, means); } else { // Compute mean vector means = mean(observations); cov = covariance(observations, means); } CholeskyDecomposition chol = new CholeskyDecomposition(new Matrix(cov)); // Become the newly fitted distribution. recalculate(means, cov, chol); }
private double getLogDeterminant(Matrix L) { double logDeterminant; double detL = 0; int n = L.getRowDimension(); double[][] matrixAsArray = L.getArray(); for (int i = 0; i < n; i++) { detL += Math.log(matrixAsArray[i][i]); } logDeterminant = detL * 2; return logDeterminant; }
/** * Performs the expectation maximization (EM) algorithm to find the maximum * likelihood estimate (or posterior mode if ridge prior is being used) * for the multivariate normal parameters of a dataset with missing values. * @param data preprocessed dataset with missing values * @param t_obs the complete data sufficient statistics for the observed values * @return theta the maximum likelihood estimate for the parameters of the multivariate normal distribution * @throws Exception if processing goes wrong */ private Matrix EM(Instances data, Matrix t_obs) throws Exception { int p = m_numAttributes; // number of columns Matrix theta = new Matrix(p+1, p+1); // parameter matrix // if numIterations is -1, change to largest int int numIterations = m_numIterations; if (numIterations < 0) { numIterations = Integer.MAX_VALUE; } // starting theta value (means and variances of each column, correlations left at zero) // values are standardized so means are 0 and variances are 1 theta.set(0, 0, -1); for (int i = 1; i < data.numAttributes(); i++) { theta.set(0, i, 0); // mu_i theta.set(i, 0, 0); theta.set(i, i, 1); // sigma_ii } double likelihood = logLikelihood(data, theta); double deltaLikelihood = Double.MAX_VALUE; for (int i = 0; i < numIterations && deltaLikelihood > m_LogLikelihoodThreshold; i++) { theta = doEMIteration(data, theta, t_obs); double newLikelihood = logLikelihood(data, theta); deltaLikelihood = newLikelihood - likelihood; likelihood = newLikelihood; } return theta; }
/** * Performs the normal sweep operation. * @param g a matrix * @param k the pivot position * @return h the matrix after being swept on position k * @throws Exception if processing goes wrong */ private static Matrix swp(Matrix g, int k) throws Exception { try { return doSweep(g, k, 1); // call actual sweep function with proper parameters } catch (Exception e) { throw e; } }
/** * Performs the reverse sweep operation. * @param g a matrix * @param k the pivot position * @return h the matrix after being swept on position k * @throws Exception if processing goes wrong */ private static Matrix rsw(Matrix g, int k) throws Exception { try { return doSweep(g, k, -1); // call actual sweep function with proper parameters } catch (Exception e) { throw e; } }
/** * returns the X and Y matrix again as Instances object, based on the given * header (must have a class attribute set). * * @param header the format of the instance object * @param x the X matrix (data) * @param y the Y matrix (class) * @return the assembled data */ protected Instances toInstances(Instances header, Matrix x, Matrix y) { double[] values; int i; int n; Instances result; int rows; int cols; int offset; int clsIdx; result = new Instances(header, 0); rows = x.getRowDimension(); cols = x.getColumnDimension(); clsIdx = header.classIndex(); for (i = 0; i < rows; i++) { values = new double[cols + 1]; offset = 0; for (n = 0; n < values.length; n++) { if (n == clsIdx) { offset--; values[n] = y.get(i, 0); } else { values[n] = x.get(i, n + offset); } } result.add(new DenseInstance(1.0, values)); } return result; }
/** * determines the dominant eigenvector for the given matrix and returns it * * @param m the matrix to determine the dominant eigenvector for * @return the dominant eigenvector */ protected Matrix getDominantEigenVector(Matrix m) { EigenvalueDecomposition eigendecomp; double[] eigenvalues; int index; Matrix result; eigendecomp = m.eig(); eigenvalues = eigendecomp.getRealEigenvalues(); index = Utils.maxIndex(eigenvalues); result = columnAsVector(eigendecomp.getV(), index); return result; }
/** * normalizes the given vector (inplace) * * @param v the vector to normalize */ protected void normalizeVector(Matrix v) { double sum; int i; // determine length sum = 0; for (i = 0; i < v.getRowDimension(); i++) sum += v.get(i, 0) * v.get(i, 0); sum = StrictMath.sqrt(sum); // normalize content for (i = 0; i < v.getRowDimension(); i++) v.set(i, 0, v.get(i, 0) / sum); }
/** * Gives standard deviation of the prediction at the given instance. * * @param inst * the instance to get the standard deviation for * @return the standard deviation * @throws Exception * if computation fails */ public double getStandardDeviation(Instance inst) throws Exception { inst = filterInstance(inst); // Build K vector (and Kappa) Matrix k = new Matrix(m_NumTrain, 1); for (int i = 0; i < m_NumTrain; i++) { k.set(i, 0, m_kernel.eval(-1, i, inst)); } return computeStdDev(inst, k) / m_Alin; }
/** Calculate covariance and value means */ private void calculateCovariance() { double sumValues = 0, sumConds = 0; for(int i = 0; i < m_Values.size(); i++) { sumValues += ((Double)m_Values.elementAt(i)).doubleValue() * ((Double)m_Weights.elementAt(i)).doubleValue(); sumConds += ((Double)m_CondValues.elementAt(i)).doubleValue() * ((Double)m_Weights.elementAt(i)).doubleValue(); } m_ValueMean = sumValues / m_SumOfWeights; m_CondMean = sumConds / m_SumOfWeights; double c00 = 0, c01 = 0, c10 = 0, c11 = 0; for(int i = 0; i < m_Values.size(); i++) { double x = ((Double)m_Values.elementAt(i)).doubleValue(); double y = ((Double)m_CondValues.elementAt(i)).doubleValue(); double weight = ((Double)m_Weights.elementAt(i)).doubleValue(); c00 += (x - m_ValueMean) * (x - m_ValueMean) * weight; c01 += (x - m_ValueMean) * (y - m_CondMean) * weight; c11 += (y - m_CondMean) * (y - m_CondMean) * weight; } c00 /= (m_SumOfWeights - 1.0); c01 /= (m_SumOfWeights - 1.0); c10 = c01; c11 /= (m_SumOfWeights - 1.0); m_Covariance = new Matrix(2, 2); m_Covariance.set(0, 0, c00); m_Covariance.set(0, 1, c01); m_Covariance.set(1, 0, c10); m_Covariance.set(1, 1, c11); }
/** Creates a default PrecomputedKernelMatrixKernell */ @Override public Kernel getKernel() { PrecomputedKernelMatrixKernel pc = new PrecomputedKernelMatrixKernel(); // load kernel matrix try { pc.setKernelMatrix(new Matrix(new InputStreamReader(ClassLoader .getSystemResourceAsStream("weka/classifiers/data/test.matrix")))); } catch (Exception e) { e.printStackTrace(); } return pc; }
/** * Create a new empty <code>Partition</code> instance. */ public Partition() { Pt_x = new int[m_numInstances]; for (int i = 0; i < m_numInstances; i++) { Pt_x[i] = -1; } Pt = new double[m_numCluster]; Py_t = new Matrix(m_numAttributes, m_numCluster); counter = 0; }
/** * Put an instance into a new cluster and update. * @param instIdx instance to be updated * @param newt index of the new cluster this instance has been assigned to * @param T the current working partition * @param Px an array of prior probabilities of the instances */ private void updateAssignment(int instIdx, int newt, Partition T, double Px, Matrix Py_x) { T.Pt_x[instIdx] = newt; // update probability of attributes in the cluster double mass = Px + T.Pt[newt]; double pi1 = Px / mass; double pi2 = T.Pt[newt] / mass; for (int i = 0; i < m_numAttributes; i++) { T.Py_t.set(i, newt, pi1 * Py_x.get(i, instIdx) + pi2 * T.Py_t.get(i, newt)); } T.Pt[newt] = mass; }