001 /**
002 * Copyright (c) 2010, SIB. All rights reserved.
003 *
004 * SIB (Swiss Institute of Bioinformatics) - http://www.isb-sib.ch Host -
005 * https://sourceforge.net/projects/javaprotlib/
006 *
007 * Redistribution and use in source and binary forms, with or without
008 * modification, are permitted provided that the following conditions are met:
009 * Redistributions of source code must retain the above copyright notice, this
010 * list of conditions and the following disclaimer. Redistributions in binary
011 * form must reproduce the above copyright notice, this list of conditions and
012 * the following disclaimer in the documentation and/or other materials provided
013 * with the distribution. Neither the name of the SIB/GENEBIO nor the names of
014 * its contributors may be used to endorse or promote products derived from this
015 * software without specific prior written permission.
016 *
017 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
020 * ARE DISCLAIMED. IN NO EVENT SHALL SIB/GENEBIO BE LIABLE FOR ANY DIRECT,
021 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
024 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027 */
028 package org.expasy.jpl.commons.collection.stat;
029
030
031 import java.util.Arrays;
032 import org.apache.commons.collections.primitives.ArrayDoubleList;
033 import org.apache.commons.collections.primitives.DoubleCollection;
034 import org.apache.commons.math.MathException;
035 import org.apache.commons.math.distribution.ContinuousDistribution;
036 import org.apache.commons.math.distribution.DiscreteDistribution;
037 import org.apache.commons.math.distribution.Distribution;
038 import org.apache.commons.math.stat.StatUtils;
039 import org.apache.commons.math.util.MathUtils;
040 import org.expasy.jpl.commons.base.builder.BuilderException;
041 import org.expasy.jpl.commons.base.builder.InstanceBuilder;
042 import org.expasy.jpl.commons.collection.IntegerSequence;
043 import org.expasy.jpl.commons.collection.Interval;
044 import org.expasy.jpl.commons.collection.PrimitiveArrayUtils;
045
046
047 /**
048 * In statistics, a histogram is a graphical representation, showing a visual
049 * impression of the distribution of data.
050 *
051 * A histogram consists of tabular frequencies, shown as adjacent rectangles,
052 * erected over discrete intervals (bins), with an area equal to the frequency
053 * of the observations in the interval. The height of a rectangle is also equal
054 * to the frequency density of the interval, i.e., the frequency divided by the
055 * width of the interval. The total area of the histogram is equal to the number
056 * of data.
057 *
058 * A histogram may also be normalized displaying relative frequencies. It then
059 * shows the proportion of cases that fall into each of several categories, with
060 * the total area equaling 1. The categories are usually specified as
061 * consecutive, non-overlapping intervals of a variable. The categories
062 * (intervals) must be adjacent, and often are chosen to be of the same size.
063 *
064 * @author nikitin
065 *
066 * @version 1.0
067 *
068 */
069 public final class HistogramDataSet {
070
071 /** default bin width */
072 public static double DEFAULT_BIN_WIDTH = 1;
073
074 /** default window width */
075 public static int DEFAULT_SMOOTHING_WINDOW_WIDTH = 20;
076
077 /** minimum and maximum */
078 private Interval intervalOfDefinition;
079
080 /** observation values and frequencies/weights (optional) */
081 private double[] values;
082 private double[] weights;
083
084 private double[] bins;
085 private double normalizedValue;
086 private boolean isNormalizedSum;
087 private boolean isNormalizedData;
088
089 /** the interval range */
090 private double binWidth;
091
092 /** number of intervals (bins) */
093 private int size;
094
095 /**
096 * The standard error of a method of measurement is the standard deviation
097 * of the sampling distribution associated with the estimation method.
098 *
099 * Here, the measure error in a MS/MS experiment is usually assumed to
100 * follow a normal distribution, and the propagation of uncertainty is
101 * computed using this assumption.
102 *
103 * http://en.wikipedia.org/wiki/Standard_error_%28statistics%29
104 */
105 private double stdError;
106
107 /**
108 * The relative error of the measure (stdError is an absolute error)
109 */
110 private double ppm;
111
112 private IntegerSequence originalValueIndices;
113
114 /**
115 * Build a binned peak list with {@code binNumber} bins, the bins precision
116 * is deduced.
117 *
118 * @author nikitin
119 */
120 public static class Builder implements InstanceBuilder<HistogramDataSet> {
121
122 private double[] aValues;
123 private double[] aWeights;
124 private double[] bins;
125
126 private IntegerSequence dataIndices;
127
128 // delete weights and data (keep bins only)
129 private boolean isDeleteData;
130
131 // the values to bin.
132 private DoubleCollection values;
133
134 // the frequencies associated to values (1 by default).
135 private DoubleCollection weights;
136
137 // the random distribution
138 private Distribution dist;
139
140 // the number of random values to generate
141 private int trialsNumber;
142
143 // Optional parameters - init values
144 // the mz to bin from (included).
145 private Interval interval;
146
147 // width of each bin.
148 private double binWidth = 0;
149
150 // the bumber of bins.
151 private int binNumber = 0;
152
153 // the absolute standard error
154 private double stdError;
155
156 // the relative standard error
157 private double ppm;
158
159 private boolean isSortValues = false;
160
161 private double normalizedValue = 10000;
162
163 private boolean isNormalizedSum;
164
165 private boolean isNormalizedData;
166
167 private BuilderException exception = null;
168
169 public Builder() {
170 this.values = new ArrayDoubleList();
171 this.weights = new ArrayDoubleList();
172 }
173
174 public Builder(double[] bins, Interval interval) {
175 this.bins = bins.clone();
176 this.interval = interval;
177 }
178
179 public final Builder sortValues() {
180 this.isSortValues = true;
181
182 return this;
183 }
184
185 /**
186 * Add values from discrete distribution.
187 *
188 * @param dist the discrete distribution.
189 * @param trialsNumber the number of trials.
190 *
191 * @return this builder.
192 */
193 public final Builder addValuesFromDist(DiscreteDistribution dist,
194 int trials) {
195 this.dist = dist;
196 this.trialsNumber = trials;
197
198 return this;
199 }
200
201 /**
202 * Add values from continuous distribution.
203 *
204 * @param dist the distribution.
205 * @param n the number of values from the distribution to generate.
206 * @return
207 */
208 public final Builder addValuesFromDist(ContinuousDistribution dist,
209 int n) {
210 this.dist = dist;
211 this.trialsNumber = n;
212
213 return this;
214 }
215
216 /**
217 * Add value.
218 *
219 * @param value the value to add.
220 * @return this builder.
221 */
222 public final Builder addValue(final double value) {
223 values.add(value);
224 return this;
225 }
226
227 /**
228 * Add values.
229 *
230 * @param values the values to add.
231 * @return this builder.
232 */
233 public final Builder addValues(final double[] vals) {
234
235 for (double value : vals) {
236 values.add(value);
237 }
238 return this;
239 }
240
241 /**
242 * Add weight value.
243 *
244 * @param value the weight value to add.
245 * @return this builder.
246 */
247 public final Builder addWeight(final double value) {
248
249 weights.add(value);
250
251 return this;
252 }
253
254 /**
255 * Add weights.
256 *
257 * @param values the weights to add.
258 * @return this builder.
259 */
260 public final Builder addWeights(final double[] values) {
261
262 for (double value : values) {
263 weights.add(value);
264 }
265 return this;
266 }
267
268 /**
269 * Set the interval to make bins.
270 *
271 * @param interval the interval limit.
272 *
273 * @return this builder.
274 */
275 public final Builder interval(final Interval interval) {
276
277 this.interval = interval;
278
279 return this;
280 }
281
282 /**
283 * Set the standard error value.
284 *
285 * @param error the tolerance of the mass spectrometer.
286 *
287 * @return this builder.
288 */
289 public final Builder stdError(final double error) {
290 if (error < 0) {
291 exception = new BuilderException(error + ": bad stderr value");
292 } else {
293 this.stdError = error;
294 }
295 return this;
296 }
297
298 /**
299 * Set the ppm value.
300 *
301 * @param ppm the ppm of the mass spectrometer.
302 *
303 * @return this builder.
304 */
305 public final Builder ppm(final double ppm) {
306 if (ppm < 0) {
307 exception = new BuilderException(ppm + ": bad ppm value");
308 } else {
309 this.ppm = ppm;
310 }
311 return this;
312 }
313
314 /**
315 * Set the normalized sum value.
316 *
317 * @param normalizedSum the normalized sum.
318 *
319 * @return this builder.
320 */
321 public final Builder normalizedSum(final double normalizedSum) {
322 if (normalizedSum < 0) {
323 exception =
324 new BuilderException(normalizedSum + ": bad scale value");
325 } else {
326 this.normalizedValue = normalizedSum;
327 this.isNormalizedSum = true;
328 this.isNormalizedData = true;
329 }
330
331 return this;
332 }
333
334 /**
335 * Update the maximum weight value and normalize others.
336 *
337 * @param value the maximum weight value.
338 *
339 * @return this builder.
340 */
341 public final Builder setMaxWeight(final double value) {
342 if (normalizedValue < 0) {
343 exception =
344 new BuilderException(normalizedValue + ": bad scale value");
345 } else {
346 this.normalizedValue = value;
347 this.isNormalizedSum = false;
348 this.isNormalizedData = true;
349 }
350
351 return this;
352 }
353
354 /**
355 * Set the width of each bin.
356 *
357 * <b>warning:</b> cannot set bin number if defined.
358 *
359 * @param binWidth the bin width.
360 *
361 * @return this builder.
362 *
363 * @see #binNumber
364 */
365 public final Builder binWidth(final double binWidth) {
366 if (binWidth <= 0) {
367 exception =
368 new BuilderException(binWidth
369 + ": bin width cannot be negative or null.");
370 } else {
371 this.binWidth = binWidth;
372 }
373 return this;
374 }
375
376 /**
377 * Set the number of bins.
378 *
379 * <b>warning:</b> cannot set bin width if defined.
380 *
381 * @param binNumber the number of bins.
382 *
383 * @return this builder.
384 *
385 * @see #binWidth
386 */
387 public final Builder binNumber(final int binNumber) {
388 if (binNumber <= 0) {
389 exception =
390 new BuilderException(binNumber
391 + " : bin number cannot be negative or null.");
392 } else {
393 this.binNumber = binNumber;
394 }
395 return this;
396 }
397
398 /**
399 * Delete data.
400 */
401 public final Builder freeData() {
402 this.isDeleteData = true;
403
404 return this;
405 }
406
407 private static double computeBinWidth(final Interval interval,
408 final int size) {
409 return interval.getRange() / size;
410 }
411
412 private static int computeSize(Interval interval, final double binWidth) {
413
414 final double value = interval.getRange() / binWidth;
415
416 if (interval.getRange() % binWidth == 0) {
417 if (interval.isUpperBoundIncluded()) {
418 return (int) value + 1;
419 }
420 return (int) value;
421 } else {
422 return (int) value + 1;
423 }
424 }
425
426 private void checkWidth() {
427 if (binNumber > 0) {
428 if (binWidth == 0) {
429 binWidth = computeBinWidth(interval, binNumber);
430 } else if (interval != null
431 && interval.getRange() != Double.POSITIVE_INFINITY) {
432 throw new BuilderException(
433 "Exclusively set a value for bin"
434 + " width or bin number in interval " + interval);
435 }
436 } else {
437 if (binWidth == 0) {
438 binWidth = DEFAULT_BIN_WIDTH;
439 }
440 binNumber = computeSize(interval, binWidth);
441 }
442 }
443
444 private void generateValuesFromDiscreteDist() {
445 DiscreteDistribution disc = (DiscreteDistribution) dist;
446
447 // upper bound can be positive infinite
448 int count = 0;
449 for (int i = (int) Math.ceil(interval.getLowerBound()); count < trialsNumber
450 && i < interval.getUpperBound(); i++) {
451
452 values.add(i);
453 weights.add(disc.probability(i));
454
455 count++;
456 }
457 }
458
459 private void generateValuesFromContinuousDist() throws MathException {
460 ContinuousDistribution cont = (ContinuousDistribution) dist;
461
462 // here n is the number of values to compute probability from
463 double increment = interval.getRange() / trialsNumber;
464 double x = interval.getLowerBound();
465
466 while (x < interval.getUpperBound()) {
467
468 double nextX = x + increment;
469
470 values.add(x);
471 weights.add(cont.cumulativeProbability(x, nextX));
472
473 x = nextX;
474 }
475 }
476
477 private void binValues() throws OutOfUpperBoundException {
478
479 bins = new double[binNumber];
480
481 for (int i = 0; i < aValues.length; i++) {
482 final int index = getBinIndex(aValues[i], interval, binWidth);
483
484 // System.out.println("search " + aValues[i] + ", index = "
485 // + index + ", interval=" + interval + ", bin width="
486 // + binWidth);
487
488 double weight = 1;
489
490 if (aWeights != null && aWeights.length > 0) {
491 weight = aWeights[i];
492 }
493
494 if (stdError == 0 && ppm == 0) {
495 bins[index] += weight;
496 } else {
497 // TODO: test the ratio between error and bin width and
498 // compute the
499 // distributions if >= threshold
500
501 // distribute weight over neighborhood bins
502 LaplaceGaussRandomVar lgVar = null;
503
504 if (ppm != 0) {
505 lgVar = LaplaceGaussRandomVar.withPPM(aValues[i], ppm);
506 } else {
507 lgVar =
508 LaplaceGaussRandomVar.withStdDev(aValues[i],
509 stdError);
510 }
511 lgVar.setFrequency(weight);
512
513 HistogramDataSet lgPeakDist =
514 ProbDistLGUtils.toHistogram(lgVar);
515
516 // System.out.println(lgPeakDist);
517
518 // Distribute peak intensity around neighborhood
519 for (int binIndex = 0; binIndex < lgPeakDist.size(); binIndex++) {
520 // System.out.println(Arrays.toString(lgPeakDist
521 // .getValuesAtBin(binIndex)));
522
523 // get the bin corresponding to the specific mz
524 double value = lgPeakDist.getValuesAtBin(binIndex)[0];
525
526 // found the value in the specific bin
527 int neighborBinIndex =
528 HistogramDataSet.getBinIndex(value, interval,
529 binWidth);
530
531 if (neighborBinIndex < bins.length) {
532 bins[neighborBinIndex] +=
533 lgPeakDist.getWeighAtBin(binIndex);
534 } else {
535 bins[bins.length - 1] +=
536 lgPeakDist.getWeighAtBin(binIndex);
537 }
538 }
539
540 // rounding
541 bins[index] = MathUtils.round(bins[index], 6);
542 }
543 }
544 }
545
546 private final void setIntervalOfDefinition(final double[] values) {
547 if (values == null) {
548 exception = new BuilderException("values are not defined.");
549 }
550
551 double min = StatUtils.min(values);
552 double max = StatUtils.max(values);
553
554 double[] mm = computeBounds(min, max, binWidth);
555
556 Interval.Builder builder = new Interval.Builder(mm[0], mm[1]);
557
558 if (max == mm[1]) {
559 builder.includeUpperBound();
560 }
561 interval = builder.build();
562 }
563
564 /**
565 * Compute the number of bins.
566 *
567 * @param from the lowest value.
568 * @param to the highest value.
569 * @param binWidth the width of bin.
570 * @return the number of bin.
571 */
572 public static int computeSize(double from, double to,
573 final double binWidth) {
574
575 final double value = (to - from) / binWidth;
576
577 if (value % binWidth == 0) {
578 return (int) value;
579 } else {
580 return (int) value + 1;
581 }
582 }
583
584 private static double[] computeBounds(double start, double end,
585 double binWidth) {
586
587 double mod = start % binWidth;
588 double[] minAndMax = new double[2];
589 int binNumber = computeSize(start, end, binWidth);
590
591 if (mod == 0) {
592 minAndMax[0] = start;
593 } else {
594 minAndMax[0] = start - mod;
595 }
596
597 double range = binWidth * binNumber;
598
599 if (end > minAndMax[0] + range) {
600 range++;
601 }
602 minAndMax[1] = minAndMax[0] + range;
603
604 return minAndMax;
605 }
606
607 /**
608 * Build an instance of {@code BinnedPeakListImpl}.
609 *
610 */
611 public HistogramDataSet build() throws BuilderException {
612
613 if (exception != null) {
614 throw new BuilderException("cannot bin values.", exception);
615 }
616
617 if (bins == null) {
618
619 // 1. generate values from dist
620 if (dist != null) {
621
622 if (values.size() > 0) {
623 throw new BuilderException(
624 "cannot build histogram from distribution and values");
625 }
626 if (weights.size() > 0) {
627 throw new BuilderException(
628 "cannot build histogram from distribution and weights");
629 }
630
631 if (dist instanceof DiscreteDistribution) {
632 if (interval == null) {
633 interval =
634 new Interval.Builder(0, trialsNumber).build();
635 }
636 checkWidth();
637 generateValuesFromDiscreteDist();
638 } else {
639 if (interval == null) {
640 interval =
641 new Interval.Builder(0, trialsNumber).build();
642 }
643 checkWidth();
644 try {
645 generateValuesFromContinuousDist();
646 } catch (MathException e) {
647 throw new BuilderException(
648 "cannot build histogram from continuous distribution",
649 e);
650 }
651 }
652 }
653 // 2. check data if values are not coming from discrete
654 // distribution
655 else if (values != null) {
656 if (values.size() == 0) {
657 throw new BuilderException("no values to bin.");
658 } else if (weights.size() > 0
659 && weights.size() != values.size()) {
660 throw new BuilderException("values# (" + values.size()
661 + ") != weights# (" + weights.size() + ")");
662 }
663 // REALLY UGLY
664 // else {
665 // try {
666 // MathUtils.checkOrder(values.toArray(), 1, true);
667 // } catch (IllegalArgumentException e) {
668 // throw new BuilderException(
669 // "values have to be sorted: " + e.getMessage());
670 // }
671 // }
672 } else {
673 throw new BuilderException("no histogram to build");
674 }
675
676 // convert list of Doubles to array of doubles
677 aValues = values.toArray();
678 aWeights = weights.toArray();
679
680 // sort values and weights
681 if (isSortValues) {
682 if (aWeights.length > 0) {
683 int[] indices =
684 PrimitiveArrayUtils.sortIndexesUp(aValues);
685 double[] tmp = new double[aWeights.length];
686
687 int i = 0;
688 for (int index : indices) {
689 tmp[i++] = aWeights[index];
690 }
691 aWeights = tmp;
692 }
693 Arrays.sort(aValues);
694 }
695
696 if (interval == null) {
697 setIntervalOfDefinition(aValues);
698
699 dataIndices = new IntegerSequence.Builder(interval).build();
700 } else {
701 dataIndices =
702 PrimitiveArrayUtils.getIndexRange(aValues, interval);
703
704 double[] tmpValues = new double[dataIndices.size()];
705 double[] tmpWeights = null;
706
707 if (weights != null && aWeights.length > 0) {
708 tmpWeights = new double[dataIndices.size()];
709 }
710
711 int i = 0;
712 for (int index = dataIndices.getFrom(); index < dataIndices
713 .getTo(); index++) {
714 tmpValues[i] = aValues[index];
715 if (aWeights != null && aWeights.length > 0) {
716 tmpWeights[i] = aWeights[index];
717 }
718 i++;
719 }
720 aValues = tmpValues;
721 aWeights = tmpWeights;
722 }
723
724 // 2. check and compute number of bins and bin's widths
725 if (dist == null) {
726 checkWidth();
727 }
728
729 if (isNormalizedData) {
730
731 if (isNormalizedSum) {
732 aWeights =
733 MathUtils.normalizeArray(aWeights, normalizedValue);
734 } else {
735 double max = StatUtils.max(aWeights);
736 double ratio = normalizedValue / max;
737
738 for (int i = 0; i < aWeights.length; i++) {
739 aWeights[i] *= ratio;
740 }
741 }
742 }
743
744 try {
745 binValues();
746 } catch (Exception e) {
747 throw new BuilderException(e);
748 }
749
750 if (isDeleteData) {
751 aValues = null;
752 aWeights = null;
753 // values = null;
754 // weights = null;
755 }
756 } else {
757 checkWidth();
758 }
759
760 return new HistogramDataSet(this);
761 }
762 }
763
764 /**
765 * Create a binned peak list from a builder.
766 *
767 * @param builder the builder that provides data.
768 *
769 */
770 public HistogramDataSet(final Builder builder) {
771 values = builder.aValues;
772 weights = builder.aWeights;
773 intervalOfDefinition = builder.interval;
774 binWidth = builder.binWidth;
775 size = builder.binNumber;
776 stdError = builder.stdError;
777 ppm = builder.ppm;
778 bins = builder.bins;
779 normalizedValue = builder.normalizedValue;
780 isNormalizedSum = builder.isNormalizedSum;
781 isNormalizedData = builder.isNormalizedData;
782 originalValueIndices = builder.dataIndices;
783 }
784
785 public static HistogramDataSet smoothHisto(HistogramDataSet histo) {
786 return smoothHisto(histo, DEFAULT_SMOOTHING_WINDOW_WIDTH);
787 }
788
789 /**
790 * A local smoothing by medians, of given window size is applied to the
791 * given histogram.
792 *
793 * @param histo the dataset to convert.
794 * @param windowWidth the window width.
795 *
796 * @return a baseline histogram dataset.
797 */
798 public static HistogramDataSet smoothHisto(HistogramDataSet histo,
799 int windowWidth) {
800
801 double[] histoBins = histo.getBins();
802 double[] baselineIntensities = new double[histoBins.length];
803
804 int windowHalfSize = windowWidth / 2;
805
806 for (int i = windowHalfSize; i < histoBins.length - windowHalfSize; i++) {
807
808 double d = histoBins[i];
809
810 if (d > 0) {
811 double[] subList =
812 PrimitiveArrayUtils.mapping(histoBins,
813 new IntegerSequence.Builder(i - windowHalfSize, i
814 + windowHalfSize).build().toInts());
815
816 Arrays.sort(subList);
817
818 // get the median value, the middle numerical value separating
819 // the sample in two halves
820 baselineIntensities[i] = StatUtils.percentile(subList, 50);
821 }
822 }
823
824 return new HistogramDataSet.Builder(baselineIntensities, histo
825 .getIntervalOfDefinition()).binWidth(histo.getBinWidth()).build();
826 }
827
828 /**
829 * @return the intervalOfDefinition
830 */
831 public Interval getIntervalOfDefinition() {
832 return intervalOfDefinition;
833 }
834
835 /**
836 * @return the binWidth
837 */
838 public double getBinWidth() {
839 return binWidth;
840 }
841
842 /**
843 * @return the stdError
844 */
845 public double getStdError() {
846 return stdError;
847 }
848
849 /**
850 * @return the ppm
851 */
852 public double getPpm() {
853 return ppm;
854 }
855
856 public boolean hasBin(double value) {
857 try {
858 int i = getBinIndex(value);
859 return i >= 0 && i < size;
860 } catch (Exception e) {
861 return false;
862 }
863
864 }
865
866 /**
867 * Get the index of bin containing the given {@code value}.
868 *
869 * @param value the value to look for bin index from.
870 * @param interval the boundaries.
871 * @param binWidth the bin width.
872 *
873 * @return a positive integer if found else -1.
874 */
875 public static int getBinIndex(final double value, Interval interval,
876 double binWidth) throws OutOfUpperBoundException {
877
878 int index = (int) ((value - interval.getLowerBound()) / binWidth);
879 // TODO: test more indexMax
880 int indexMax = Builder.computeSize(interval, binWidth);
881
882 if (index < 0) {
883 return -1;
884 } else if (index > indexMax) {
885 throw new OutOfUpperBoundException(index, indexMax);
886 }
887
888 return index;
889 }
890
891 /**
892 * Get the index of bin containing the given {@code value}.
893 *
894 * @param value the value to look for bin index from.
895 * @return a positive integer if found else -1.
896 * @throws OutOfUpperBoundException
897 * @throws OutOfLowerBoundException
898 */
899 public int getBinIndex(final double value) throws OutOfUpperBoundException {
900 return getBinIndex(value, intervalOfDefinition, binWidth);
901 }
902
903 /**
904 * Get the interval of the ith bin.
905 *
906 * @param i the bin index.
907 * @return an instance of cached interval.
908 */
909 private Interval getCachedIntervalBinAt(final int i) {
910 final double lowerBound =
911 intervalOfDefinition.getLowerBound() + i * binWidth;
912
913 return new Interval.Builder(lowerBound, lowerBound + binWidth).cache()
914 .build();
915 }
916
917 private double[] getCachedDoubleIntervalBinAt(final int i) {
918 final double lowerBound =
919 intervalOfDefinition.getLowerBound() + i * binWidth;
920
921 return new double[] {lowerBound, lowerBound + binWidth};
922 }
923
924 public Interval getIntervalBinAt(final int i) {
925 return getCachedIntervalBinAt(i).clone();
926 }
927
928 /**
929 * Get bounds indices
930 *
931 * @param i
932 * @return
933 */
934 public int[] getBoundIndicesAtBin(int i) {
935 double[] interval = getCachedDoubleIntervalBinAt(i);
936
937 return PrimitiveArrayUtils.getIndexRange(values, interval[0],
938 interval[1]);
939 }
940
941 public IntegerSequence getValuesIndicesAtInterval(Interval interval) {
942 if (values == null) {
943 return IntegerSequence.emptyInstance();
944 }
945 return PrimitiveArrayUtils.getIndexRange(values, interval);
946 }
947
948 public double[] getValues() {
949 if (values == null) {
950 return null;
951 }
952 return values.clone();
953 }
954
955 public IntegerSequence getLoadedValuesIndices() {
956 return originalValueIndices;
957 }
958
959 public double getMinValue() {
960 if (values == null) {
961 return toCategory(0).getCenter();
962 }
963
964 return values[0];
965 }
966
967 public double getMaxValue() {
968 if (values == null) {
969 return toCategory(size - 1).getCenter();
970 }
971 return values[values.length - 1];
972 }
973
974 public double[] getValuesAtBin(int index) {
975 int[] indices = getBoundIndicesAtBin(index);
976
977 if (indices != null && indices.length > 0) {
978 return PrimitiveArrayUtils.mapping(values, indices[0], indices[1]);
979 }
980 return new double[] { };
981 }
982
983 public double[] getValuesAtInterval(Interval interval) {
984 int[] indices = getValuesIndicesAtInterval(interval).toInts();
985 return PrimitiveArrayUtils.mapping(values, indices);
986 }
987
988 public double getWeighAtBin(int i) {
989 if (bins != null) {
990 return bins[i];
991 }
992
993 return getWeighAtInterval(getCachedIntervalBinAt(i));
994 }
995
996 public double getWeighAtInterval(Interval interval) {
997 int[] indices = getValuesIndicesAtInterval(interval).toInts();
998
999 double weight = 0;
1000
1001 for (int i : indices) {
1002
1003 if (weights == null || weights.length == 0) {
1004 weight++;
1005 } else {
1006 weight += weights[i];
1007 }
1008 }
1009 return weight;
1010 }
1011
1012 /**
1013 * @return the weights
1014 */
1015 public double[] getWeights() {
1016 if (weights == null) {
1017 return null;
1018 }
1019 return weights.clone();
1020 }
1021
1022 /**
1023 * @return the bins
1024 */
1025 public double[] getBins() {
1026 return bins.clone();
1027 }
1028
1029 /**
1030 * @return the normalized value
1031 */
1032 public double getNormalizedValue() {
1033 return normalizedValue;
1034 }
1035
1036 public boolean isNormalizedSum() {
1037 return isNormalizedSum;
1038 }
1039
1040 public boolean isDataNormalized() {
1041 return isNormalizedData;
1042 }
1043
1044 public int size() {
1045 return size;
1046 }
1047
1048 public double binWidth() {
1049 return binWidth;
1050 }
1051
1052 public StatisticalCategory toCategory(int i) {
1053 Interval interval = getIntervalBinAt(i);
1054
1055 StatisticalCategory cat = new StatisticalCategory(this, interval);
1056
1057 cat.updateFrequency(getWeighAtInterval(interval));
1058
1059 return cat;
1060 }
1061
1062 public String toString() {
1063 final StringBuffer sb = new StringBuffer();
1064
1065 if (values != null && values.length > 0) {
1066 sb.append("obs#=" + values.length + " data");
1067 if (isNormalizedData) {
1068 sb.append("[normalized weights]");
1069 }
1070 sb.append(", ");
1071 }
1072 sb.append("interval=" + intervalOfDefinition + ", ");
1073 sb.append("bin width=" + binWidth + ", ");
1074 sb.append("bin#=" + size);
1075
1076 if (isNormalizedData) {
1077 if (isNormalizedSum) {
1078 sb.append("[normalized bin sum : " + normalizedValue + "]\n");
1079 } else {
1080 sb
1081 .append("[max weight value updated (+array normalization) to : "
1082 + normalizedValue + "]\n");
1083 }
1084 }
1085 return sb.toString();
1086 }
1087
1088 public static class OutOfUpperBoundException extends Exception {
1089
1090 private static final long serialVersionUID = 6254862093474313168L;
1091
1092 OutOfUpperBoundException(int index, int max) {
1093 super(index + " out of upper bound " + max);
1094 }
1095 }
1096 }