001    /**
002     * Copyright (c) 2010, SIB. All rights reserved.
003     * 
004     * SIB (Swiss Institute of Bioinformatics) - http://www.isb-sib.ch Host -
005     * https://sourceforge.net/projects/javaprotlib/
006     * 
007     * Redistribution and use in source and binary forms, with or without
008     * modification, are permitted provided that the following conditions are met:
009     * Redistributions of source code must retain the above copyright notice, this
010     * list of conditions and the following disclaimer. Redistributions in binary
011     * form must reproduce the above copyright notice, this list of conditions and
012     * the following disclaimer in the documentation and/or other materials provided
013     * with the distribution. Neither the name of the SIB/GENEBIO nor the names of
014     * its contributors may be used to endorse or promote products derived from this
015     * software without specific prior written permission.
016     * 
017     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018     * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
020     * ARE DISCLAIMED. IN NO EVENT SHALL SIB/GENEBIO BE LIABLE FOR ANY DIRECT,
021     * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022     * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023     * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
024     * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026     * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027     */
028    package org.expasy.jpl.commons.collection.stat;
029    
030    
031    import java.util.Arrays;
032    import org.apache.commons.collections.primitives.ArrayDoubleList;
033    import org.apache.commons.collections.primitives.DoubleCollection;
034    import org.apache.commons.math.MathException;
035    import org.apache.commons.math.distribution.ContinuousDistribution;
036    import org.apache.commons.math.distribution.DiscreteDistribution;
037    import org.apache.commons.math.distribution.Distribution;
038    import org.apache.commons.math.stat.StatUtils;
039    import org.apache.commons.math.util.MathUtils;
040    import org.expasy.jpl.commons.base.builder.BuilderException;
041    import org.expasy.jpl.commons.base.builder.InstanceBuilder;
042    import org.expasy.jpl.commons.collection.IntegerSequence;
043    import org.expasy.jpl.commons.collection.Interval;
044    import org.expasy.jpl.commons.collection.PrimitiveArrayUtils;
045    
046    
047    /**
048     * In statistics, a histogram is a graphical representation, showing a visual
049     * impression of the distribution of data.
050     * 
051     * A histogram consists of tabular frequencies, shown as adjacent rectangles,
052     * erected over discrete intervals (bins), with an area equal to the frequency
053     * of the observations in the interval. The height of a rectangle is also equal
054     * to the frequency density of the interval, i.e., the frequency divided by the
055     * width of the interval. The total area of the histogram is equal to the number
056     * of data.
057     * 
058     * A histogram may also be normalized displaying relative frequencies. It then
059     * shows the proportion of cases that fall into each of several categories, with
060     * the total area equaling 1. The categories are usually specified as
061     * consecutive, non-overlapping intervals of a variable. The categories
062     * (intervals) must be adjacent, and often are chosen to be of the same size.
063     * 
064     * @author nikitin
065     * 
066     * @version 1.0
067     * 
068     */
069    public final class HistogramDataSet {
070            
071            /** default bin width */
072            public static double DEFAULT_BIN_WIDTH = 1;
073            
074            /** default window width */
075            public static int DEFAULT_SMOOTHING_WINDOW_WIDTH = 20;
076            
077            /** minimum and maximum */
078            private Interval intervalOfDefinition;
079            
080            /** observation values and frequencies/weights (optional) */
081            private double[] values;
082            private double[] weights;
083            
084            private double[] bins;
085            private double normalizedValue;
086            private boolean isNormalizedSum;
087            private boolean isNormalizedData;
088            
089            /** the interval range */
090            private double binWidth;
091            
092            /** number of intervals (bins) */
093            private int size;
094            
095            /**
096             * The standard error of a method of measurement is the standard deviation
097             * of the sampling distribution associated with the estimation method.
098             * 
099             * Here, the measure error in a MS/MS experiment is usually assumed to
100             * follow a normal distribution, and the propagation of uncertainty is
101             * computed using this assumption.
102             * 
103             * http://en.wikipedia.org/wiki/Standard_error_%28statistics%29
104             */
105            private double stdError;
106            
107            /**
108             * The relative error of the measure (stdError is an absolute error)
109             */
110            private double ppm;
111            
112            private IntegerSequence originalValueIndices;
113            
114            /**
115             * Build a binned peak list with {@code binNumber} bins, the bins precision
116             * is deduced.
117             * 
118             * @author nikitin
119             */
120            public static class Builder implements InstanceBuilder<HistogramDataSet> {
121                    
122                    private double[] aValues;
123                    private double[] aWeights;
124                    private double[] bins;
125                    
126                    private IntegerSequence dataIndices;
127                    
128                    // delete weights and data (keep bins only)
129                    private boolean isDeleteData;
130                    
131                    // the values to bin.
132                    private DoubleCollection values;
133                    
134                    // the frequencies associated to values (1 by default).
135                    private DoubleCollection weights;
136                    
137                    // the random distribution
138                    private Distribution dist;
139                    
140                    // the number of random values to generate
141                    private int trialsNumber;
142                    
143                    // Optional parameters - init values
144                    // the mz to bin from (included).
145                    private Interval interval;
146                    
147                    // width of each bin.
148                    private double binWidth = 0;
149                    
150                    // the bumber of bins.
151                    private int binNumber = 0;
152                    
153                    // the absolute standard error
154                    private double stdError;
155                    
156                    // the relative standard error
157                    private double ppm;
158                    
159                    private boolean isSortValues = false;
160                    
161                    private double normalizedValue = 10000;
162                    
163                    private boolean isNormalizedSum;
164                    
165                    private boolean isNormalizedData;
166                    
167                    private BuilderException exception = null;
168                    
169                    public Builder() {
170                            this.values = new ArrayDoubleList();
171                            this.weights = new ArrayDoubleList();
172                    }
173                    
174                    public Builder(double[] bins, Interval interval) {
175                            this.bins = bins.clone();
176                            this.interval = interval;
177                    }
178                    
179                    public final Builder sortValues() {
180                            this.isSortValues = true;
181                            
182                            return this;
183                    }
184                    
185                    /**
186                     * Add values from discrete distribution.
187                     * 
188                     * @param dist the discrete distribution.
189                     * @param trialsNumber the number of trials.
190                     * 
191                     * @return this builder.
192                     */
193                    public final Builder addValuesFromDist(DiscreteDistribution dist,
194                        int trials) {
195                            this.dist = dist;
196                            this.trialsNumber = trials;
197                            
198                            return this;
199                    }
200                    
201                    /**
202                     * Add values from continuous distribution.
203                     * 
204                     * @param dist the distribution.
205                     * @param n the number of values from the distribution to generate.
206                     * @return
207                     */
208                    public final Builder addValuesFromDist(ContinuousDistribution dist,
209                        int n) {
210                            this.dist = dist;
211                            this.trialsNumber = n;
212                            
213                            return this;
214                    }
215                    
216                    /**
217                     * Add value.
218                     * 
219                     * @param value the value to add.
220                     * @return this builder.
221                     */
222                    public final Builder addValue(final double value) {
223                            values.add(value);
224                            return this;
225                    }
226                    
227                    /**
228                     * Add values.
229                     * 
230                     * @param values the values to add.
231                     * @return this builder.
232                     */
233                    public final Builder addValues(final double[] vals) {
234                            
235                            for (double value : vals) {
236                                    values.add(value);
237                            }
238                            return this;
239                    }
240                    
241                    /**
242                     * Add weight value.
243                     * 
244                     * @param value the weight value to add.
245                     * @return this builder.
246                     */
247                    public final Builder addWeight(final double value) {
248                            
249                            weights.add(value);
250                            
251                            return this;
252                    }
253                    
254                    /**
255                     * Add weights.
256                     * 
257                     * @param values the weights to add.
258                     * @return this builder.
259                     */
260                    public final Builder addWeights(final double[] values) {
261                            
262                            for (double value : values) {
263                                    weights.add(value);
264                            }
265                            return this;
266                    }
267                    
268                    /**
269                     * Set the interval to make bins.
270                     * 
271                     * @param interval the interval limit.
272                     * 
273                     * @return this builder.
274                     */
275                    public final Builder interval(final Interval interval) {
276                            
277                            this.interval = interval;
278                            
279                            return this;
280                    }
281                    
282                    /**
283                     * Set the standard error value.
284                     * 
285                     * @param error the tolerance of the mass spectrometer.
286                     * 
287                     * @return this builder.
288                     */
289                    public final Builder stdError(final double error) {
290                            if (error < 0) {
291                                    exception = new BuilderException(error + ": bad stderr value");
292                            } else {
293                                    this.stdError = error;
294                            }
295                            return this;
296                    }
297                    
298                    /**
299                     * Set the ppm value.
300                     * 
301                     * @param ppm the ppm of the mass spectrometer.
302                     * 
303                     * @return this builder.
304                     */
305                    public final Builder ppm(final double ppm) {
306                            if (ppm < 0) {
307                                    exception = new BuilderException(ppm + ": bad ppm value");
308                            } else {
309                                    this.ppm = ppm;
310                            }
311                            return this;
312                    }
313                    
314                    /**
315                     * Set the normalized sum value.
316                     * 
317                     * @param normalizedSum the normalized sum.
318                     * 
319                     * @return this builder.
320                     */
321                    public final Builder normalizedSum(final double normalizedSum) {
322                            if (normalizedSum < 0) {
323                                    exception =
324                                        new BuilderException(normalizedSum + ": bad scale value");
325                            } else {
326                                    this.normalizedValue = normalizedSum;
327                                    this.isNormalizedSum = true;
328                                    this.isNormalizedData = true;
329                            }
330                            
331                            return this;
332                    }
333                    
334                    /**
335                     * Update the maximum weight value and normalize others.
336                     * 
337                     * @param value the maximum weight value.
338                     * 
339                     * @return this builder.
340                     */
341                    public final Builder setMaxWeight(final double value) {
342                            if (normalizedValue < 0) {
343                                    exception =
344                                        new BuilderException(normalizedValue + ": bad scale value");
345                            } else {
346                                    this.normalizedValue = value;
347                                    this.isNormalizedSum = false;
348                                    this.isNormalizedData = true;
349                            }
350                            
351                            return this;
352                    }
353                    
354                    /**
355                     * Set the width of each bin.
356                     * 
357                     * <b>warning:</b> cannot set bin number if defined.
358                     * 
359                     * @param binWidth the bin width.
360                     * 
361                     * @return this builder.
362                     * 
363                     * @see #binNumber
364                     */
365                    public final Builder binWidth(final double binWidth) {
366                            if (binWidth <= 0) {
367                                    exception =
368                                        new BuilderException(binWidth
369                                            + ": bin width cannot be negative or null.");
370                            } else {
371                                    this.binWidth = binWidth;
372                            }
373                            return this;
374                    }
375                    
376                    /**
377                     * Set the number of bins.
378                     * 
379                     * <b>warning:</b> cannot set bin width if defined.
380                     * 
381                     * @param binNumber the number of bins.
382                     * 
383                     * @return this builder.
384                     * 
385                     * @see #binWidth
386                     */
387                    public final Builder binNumber(final int binNumber) {
388                            if (binNumber <= 0) {
389                                    exception =
390                                        new BuilderException(binNumber
391                                            + " : bin number cannot be negative or null.");
392                            } else {
393                                    this.binNumber = binNumber;
394                            }
395                            return this;
396                    }
397                    
398                    /**
399                     * Delete data.
400                     */
401                    public final Builder freeData() {
402                            this.isDeleteData = true;
403                            
404                            return this;
405                    }
406                    
407                    private static double computeBinWidth(final Interval interval,
408                        final int size) {
409                            return interval.getRange() / size;
410                    }
411                    
412                    private static int computeSize(Interval interval, final double binWidth) {
413                            
414                            final double value = interval.getRange() / binWidth;
415                            
416                            if (interval.getRange() % binWidth == 0) {
417                                    if (interval.isUpperBoundIncluded()) {
418                                            return (int) value + 1;
419                                    }
420                                    return (int) value;
421                            } else {
422                                    return (int) value + 1;
423                            }
424                    }
425                    
426                    private void checkWidth() {
427                            if (binNumber > 0) {
428                                    if (binWidth == 0) {
429                                            binWidth = computeBinWidth(interval, binNumber);
430                                    } else if (interval != null
431                                        && interval.getRange() != Double.POSITIVE_INFINITY) {
432                                            throw new BuilderException(
433                                                "Exclusively set a value for bin"
434                                                    + " width or bin number in interval " + interval);
435                                    }
436                            } else {
437                                    if (binWidth == 0) {
438                                            binWidth = DEFAULT_BIN_WIDTH;
439                                    }
440                                    binNumber = computeSize(interval, binWidth);
441                            }
442                    }
443                    
444                    private void generateValuesFromDiscreteDist() {
445                            DiscreteDistribution disc = (DiscreteDistribution) dist;
446                            
447                            // upper bound can be positive infinite
448                            int count = 0;
449                            for (int i = (int) Math.ceil(interval.getLowerBound()); count < trialsNumber
450                                && i < interval.getUpperBound(); i++) {
451                                    
452                                    values.add(i);
453                                    weights.add(disc.probability(i));
454                                    
455                                    count++;
456                            }
457                    }
458                    
459                    private void generateValuesFromContinuousDist() throws MathException {
460                            ContinuousDistribution cont = (ContinuousDistribution) dist;
461                            
462                            // here n is the number of values to compute probability from
463                            double increment = interval.getRange() / trialsNumber;
464                            double x = interval.getLowerBound();
465                            
466                            while (x < interval.getUpperBound()) {
467                                    
468                                    double nextX = x + increment;
469                                    
470                                    values.add(x);
471                                    weights.add(cont.cumulativeProbability(x, nextX));
472                                    
473                                    x = nextX;
474                            }
475                    }
476                    
477                    private void binValues() throws OutOfUpperBoundException {
478                            
479                            bins = new double[binNumber];
480                            
481                            for (int i = 0; i < aValues.length; i++) {
482                                    final int index = getBinIndex(aValues[i], interval, binWidth);
483                                    
484                                    // System.out.println("search " + aValues[i] + ", index = "
485                                    // + index + ", interval=" + interval + ", bin width="
486                                    // + binWidth);
487                                    
488                                    double weight = 1;
489                                    
490                                    if (aWeights != null && aWeights.length > 0) {
491                                            weight = aWeights[i];
492                                    }
493                                    
494                                    if (stdError == 0 && ppm == 0) {
495                                            bins[index] += weight;
496                                    } else {
497                                            // TODO: test the ratio between error and bin width and
498                                            // compute the
499                                            // distributions if >= threshold
500                                            
501                                            // distribute weight over neighborhood bins
502                                            LaplaceGaussRandomVar lgVar = null;
503                                            
504                                            if (ppm != 0) {
505                                                    lgVar = LaplaceGaussRandomVar.withPPM(aValues[i], ppm);
506                                            } else {
507                                                    lgVar =
508                                                        LaplaceGaussRandomVar.withStdDev(aValues[i],
509                                                            stdError);
510                                            }
511                                            lgVar.setFrequency(weight);
512                                            
513                                            HistogramDataSet lgPeakDist =
514                                                ProbDistLGUtils.toHistogram(lgVar);
515                                            
516                                            // System.out.println(lgPeakDist);
517                                            
518                                            // Distribute peak intensity around neighborhood
519                                            for (int binIndex = 0; binIndex < lgPeakDist.size(); binIndex++) {
520                                                    // System.out.println(Arrays.toString(lgPeakDist
521                                                    // .getValuesAtBin(binIndex)));
522                                                    
523                                                    // get the bin corresponding to the specific mz
524                                                    double value = lgPeakDist.getValuesAtBin(binIndex)[0];
525                                                    
526                                                    // found the value in the specific bin
527                                                    int neighborBinIndex =
528                                                        HistogramDataSet.getBinIndex(value, interval,
529                                                            binWidth);
530                                                    
531                                                    if (neighborBinIndex < bins.length) {
532                                                            bins[neighborBinIndex] +=
533                                                                lgPeakDist.getWeighAtBin(binIndex);
534                                                    } else {
535                                                            bins[bins.length - 1] +=
536                                                                lgPeakDist.getWeighAtBin(binIndex);
537                                                    }
538                                            }
539                                            
540                                            // rounding
541                                            bins[index] = MathUtils.round(bins[index], 6);
542                                    }
543                            }
544                    }
545                    
546                    private final void setIntervalOfDefinition(final double[] values) {
547                            if (values == null) {
548                                    exception = new BuilderException("values are not defined.");
549                            }
550                            
551                            double min = StatUtils.min(values);
552                            double max = StatUtils.max(values);
553                            
554                            double[] mm = computeBounds(min, max, binWidth);
555                            
556                            Interval.Builder builder = new Interval.Builder(mm[0], mm[1]);
557                            
558                            if (max == mm[1]) {
559                                    builder.includeUpperBound();
560                            }
561                            interval = builder.build();
562                    }
563                    
564                    /**
565                     * Compute the number of bins.
566                     * 
567                     * @param from the lowest value.
568                     * @param to the highest value.
569                     * @param binWidth the width of bin.
570                     * @return the number of bin.
571                     */
572                    public static int computeSize(double from, double to,
573                        final double binWidth) {
574                            
575                            final double value = (to - from) / binWidth;
576                            
577                            if (value % binWidth == 0) {
578                                    return (int) value;
579                            } else {
580                                    return (int) value + 1;
581                            }
582                    }
583                    
584                    private static double[] computeBounds(double start, double end,
585                        double binWidth) {
586                            
587                            double mod = start % binWidth;
588                            double[] minAndMax = new double[2];
589                            int binNumber = computeSize(start, end, binWidth);
590                            
591                            if (mod == 0) {
592                                    minAndMax[0] = start;
593                            } else {
594                                    minAndMax[0] = start - mod;
595                            }
596                            
597                            double range = binWidth * binNumber;
598                            
599                            if (end > minAndMax[0] + range) {
600                                    range++;
601                            }
602                            minAndMax[1] = minAndMax[0] + range;
603                            
604                            return minAndMax;
605                    }
606                    
607                    /**
608                     * Build an instance of {@code BinnedPeakListImpl}.
609                     * 
610                     */
611                    public HistogramDataSet build() throws BuilderException {
612                            
613                            if (exception != null) {
614                                    throw new BuilderException("cannot bin values.", exception);
615                            }
616                            
617                            if (bins == null) {
618                                    
619                                    // 1. generate values from dist
620                                    if (dist != null) {
621                                            
622                                            if (values.size() > 0) {
623                                                    throw new BuilderException(
624                                                        "cannot build histogram from distribution and values");
625                                            }
626                                            if (weights.size() > 0) {
627                                                    throw new BuilderException(
628                                                        "cannot build histogram from distribution and weights");
629                                            }
630                                            
631                                            if (dist instanceof DiscreteDistribution) {
632                                                    if (interval == null) {
633                                                            interval =
634                                                                new Interval.Builder(0, trialsNumber).build();
635                                                    }
636                                                    checkWidth();
637                                                    generateValuesFromDiscreteDist();
638                                            } else {
639                                                    if (interval == null) {
640                                                            interval =
641                                                                new Interval.Builder(0, trialsNumber).build();
642                                                    }
643                                                    checkWidth();
644                                                    try {
645                                                            generateValuesFromContinuousDist();
646                                                    } catch (MathException e) {
647                                                            throw new BuilderException(
648                                                                "cannot build histogram from continuous distribution",
649                                                                e);
650                                                    }
651                                            }
652                                    }
653                                    // 2. check data if values are not coming from discrete
654                                    // distribution
655                                    else if (values != null) {
656                                            if (values.size() == 0) {
657                                                    throw new BuilderException("no values to bin.");
658                                            } else if (weights.size() > 0
659                                                && weights.size() != values.size()) {
660                                                    throw new BuilderException("values# (" + values.size()
661                                                        + ") != weights# (" + weights.size() + ")");
662                                            }
663                                            // REALLY UGLY
664                                            // else {
665                                            // try {
666                                            // MathUtils.checkOrder(values.toArray(), 1, true);
667                                            // } catch (IllegalArgumentException e) {
668                                            // throw new BuilderException(
669                                            // "values have to be sorted: " + e.getMessage());
670                                            // }
671                                            // }
672                                    } else {
673                                            throw new BuilderException("no histogram to build");
674                                    }
675                                    
676                                    // convert list of Doubles to array of doubles
677                                    aValues = values.toArray();
678                                    aWeights = weights.toArray();
679                                    
680                                    // sort values and weights
681                                    if (isSortValues) {
682                                            if (aWeights.length > 0) {
683                                                    int[] indices =
684                                                        PrimitiveArrayUtils.sortIndexesUp(aValues);
685                                                    double[] tmp = new double[aWeights.length];
686                                                    
687                                                    int i = 0;
688                                                    for (int index : indices) {
689                                                            tmp[i++] = aWeights[index];
690                                                    }
691                                                    aWeights = tmp;
692                                            }
693                                            Arrays.sort(aValues);
694                                    }
695                                    
696                                    if (interval == null) {
697                                            setIntervalOfDefinition(aValues);
698                                            
699                                            dataIndices = new IntegerSequence.Builder(interval).build();
700                                    } else {
701                                            dataIndices =
702                                                PrimitiveArrayUtils.getIndexRange(aValues, interval);
703                                            
704                                            double[] tmpValues = new double[dataIndices.size()];
705                                            double[] tmpWeights = null;
706                                            
707                                            if (weights != null && aWeights.length > 0) {
708                                                    tmpWeights = new double[dataIndices.size()];
709                                            }
710                                            
711                                            int i = 0;
712                                            for (int index = dataIndices.getFrom(); index < dataIndices
713                                                .getTo(); index++) {
714                                                    tmpValues[i] = aValues[index];
715                                                    if (aWeights != null && aWeights.length > 0) {
716                                                            tmpWeights[i] = aWeights[index];
717                                                    }
718                                                    i++;
719                                            }
720                                            aValues = tmpValues;
721                                            aWeights = tmpWeights;
722                                    }
723                                    
724                                    // 2. check and compute number of bins and bin's widths
725                                    if (dist == null) {
726                                            checkWidth();
727                                    }
728                                    
729                                    if (isNormalizedData) {
730                                            
731                                            if (isNormalizedSum) {
732                                                    aWeights =
733                                                        MathUtils.normalizeArray(aWeights, normalizedValue);
734                                            } else {
735                                                    double max = StatUtils.max(aWeights);
736                                                    double ratio = normalizedValue / max;
737                                                    
738                                                    for (int i = 0; i < aWeights.length; i++) {
739                                                            aWeights[i] *= ratio;
740                                                    }
741                                            }
742                                    }
743                                    
744                                    try {
745                                            binValues();
746                                    } catch (Exception e) {
747                                            throw new BuilderException(e);
748                                    }
749                                    
750                                    if (isDeleteData) {
751                                            aValues = null;
752                                            aWeights = null;
753                                            // values = null;
754                                            // weights = null;
755                                    }
756                            } else {
757                                    checkWidth();
758                            }
759                            
760                            return new HistogramDataSet(this);
761                    }
762            }
763            
764            /**
765             * Create a binned peak list from a builder.
766             * 
767             * @param builder the builder that provides data.
768             * 
769             */
770            public HistogramDataSet(final Builder builder) {
771                    values = builder.aValues;
772                    weights = builder.aWeights;
773                    intervalOfDefinition = builder.interval;
774                    binWidth = builder.binWidth;
775                    size = builder.binNumber;
776                    stdError = builder.stdError;
777                    ppm = builder.ppm;
778                    bins = builder.bins;
779                    normalizedValue = builder.normalizedValue;
780                    isNormalizedSum = builder.isNormalizedSum;
781                    isNormalizedData = builder.isNormalizedData;
782                    originalValueIndices = builder.dataIndices;
783            }
784            
785            public static HistogramDataSet smoothHisto(HistogramDataSet histo) {
786                    return smoothHisto(histo, DEFAULT_SMOOTHING_WINDOW_WIDTH);
787            }
788            
789            /**
790             * A local smoothing by medians, of given window size is applied to the
791             * given histogram.
792             * 
793             * @param histo the dataset to convert.
794             * @param windowWidth the window width.
795             * 
796             * @return a baseline histogram dataset.
797             */
798            public static HistogramDataSet smoothHisto(HistogramDataSet histo,
799                int windowWidth) {
800                    
801                    double[] histoBins = histo.getBins();
802                    double[] baselineIntensities = new double[histoBins.length];
803                    
804                    int windowHalfSize = windowWidth / 2;
805                    
806                    for (int i = windowHalfSize; i < histoBins.length - windowHalfSize; i++) {
807                            
808                            double d = histoBins[i];
809                            
810                            if (d > 0) {
811                                    double[] subList =
812                                        PrimitiveArrayUtils.mapping(histoBins,
813                                            new IntegerSequence.Builder(i - windowHalfSize, i
814                                                + windowHalfSize).build().toInts());
815                                    
816                                    Arrays.sort(subList);
817                                    
818                                    // get the median value, the middle numerical value separating
819                                    // the sample in two halves
820                                    baselineIntensities[i] = StatUtils.percentile(subList, 50);
821                            }
822                    }
823                    
824                    return new HistogramDataSet.Builder(baselineIntensities, histo
825                        .getIntervalOfDefinition()).binWidth(histo.getBinWidth()).build();
826            }
827            
828            /**
829             * @return the intervalOfDefinition
830             */
831            public Interval getIntervalOfDefinition() {
832                    return intervalOfDefinition;
833            }
834            
835            /**
836             * @return the binWidth
837             */
838            public double getBinWidth() {
839                    return binWidth;
840            }
841            
842            /**
843             * @return the stdError
844             */
845            public double getStdError() {
846                    return stdError;
847            }
848            
849            /**
850             * @return the ppm
851             */
852            public double getPpm() {
853                    return ppm;
854            }
855            
856            public boolean hasBin(double value) {
857                    try {
858                            int i = getBinIndex(value);
859                            return i >= 0 && i < size;
860                    } catch (Exception e) {
861                            return false;
862                    }
863                    
864            }
865            
866            /**
867             * Get the index of bin containing the given {@code value}.
868             * 
869             * @param value the value to look for bin index from.
870             * @param interval the boundaries.
871             * @param binWidth the bin width.
872             * 
873             * @return a positive integer if found else -1.
874             */
875            public static int getBinIndex(final double value, Interval interval,
876                double binWidth) throws OutOfUpperBoundException {
877                    
878                    int index = (int) ((value - interval.getLowerBound()) / binWidth);
879                    // TODO: test more indexMax
880                    int indexMax = Builder.computeSize(interval, binWidth);
881                    
882                    if (index < 0) {
883                            return -1;
884                    } else if (index > indexMax) {
885                            throw new OutOfUpperBoundException(index, indexMax);
886                    }
887                    
888                    return index;
889            }
890            
891            /**
892             * Get the index of bin containing the given {@code value}.
893             * 
894             * @param value the value to look for bin index from.
895             * @return a positive integer if found else -1.
896             * @throws OutOfUpperBoundException
897             * @throws OutOfLowerBoundException
898             */
899            public int getBinIndex(final double value) throws OutOfUpperBoundException {
900                    return getBinIndex(value, intervalOfDefinition, binWidth);
901            }
902            
903            /**
904             * Get the interval of the ith bin.
905             * 
906             * @param i the bin index.
907             * @return an instance of cached interval.
908             */
909            private Interval getCachedIntervalBinAt(final int i) {
910                    final double lowerBound =
911                        intervalOfDefinition.getLowerBound() + i * binWidth;
912                    
913                    return new Interval.Builder(lowerBound, lowerBound + binWidth).cache()
914                        .build();
915            }
916            
917            private double[] getCachedDoubleIntervalBinAt(final int i) {
918                    final double lowerBound =
919                        intervalOfDefinition.getLowerBound() + i * binWidth;
920                    
921                    return new double[] {lowerBound, lowerBound + binWidth};
922            }
923            
924            public Interval getIntervalBinAt(final int i) {
925                    return getCachedIntervalBinAt(i).clone();
926            }
927            
928            /**
929             * Get bounds indices
930             * 
931             * @param i
932             * @return
933             */
934            public int[] getBoundIndicesAtBin(int i) {
935                    double[] interval = getCachedDoubleIntervalBinAt(i);
936                    
937                    return PrimitiveArrayUtils.getIndexRange(values, interval[0],
938                        interval[1]);
939            }
940            
941            public IntegerSequence getValuesIndicesAtInterval(Interval interval) {
942                    if (values == null) {
943                            return IntegerSequence.emptyInstance();
944                    }
945                    return PrimitiveArrayUtils.getIndexRange(values, interval);
946            }
947            
948            public double[] getValues() {
949                    if (values == null) {
950                            return null;
951                    }
952                    return values.clone();
953            }
954            
955            public IntegerSequence getLoadedValuesIndices() {
956                    return originalValueIndices;
957            }
958            
959            public double getMinValue() {
960                    if (values == null) {
961                            return toCategory(0).getCenter();
962                    }
963                    
964                    return values[0];
965            }
966            
967            public double getMaxValue() {
968                    if (values == null) {
969                            return toCategory(size - 1).getCenter();
970                    }
971                    return values[values.length - 1];
972            }
973            
974            public double[] getValuesAtBin(int index) {
975                    int[] indices = getBoundIndicesAtBin(index);
976                    
977                    if (indices != null && indices.length > 0) {
978                            return PrimitiveArrayUtils.mapping(values, indices[0], indices[1]);
979                    }
980                    return new double[] { };
981            }
982            
983            public double[] getValuesAtInterval(Interval interval) {
984                    int[] indices = getValuesIndicesAtInterval(interval).toInts();
985                    return PrimitiveArrayUtils.mapping(values, indices);
986            }
987            
988            public double getWeighAtBin(int i) {
989                    if (bins != null) {
990                            return bins[i];
991                    }
992                    
993                    return getWeighAtInterval(getCachedIntervalBinAt(i));
994            }
995            
996            public double getWeighAtInterval(Interval interval) {
997                    int[] indices = getValuesIndicesAtInterval(interval).toInts();
998                    
999                    double weight = 0;
1000                    
1001                    for (int i : indices) {
1002                            
1003                            if (weights == null || weights.length == 0) {
1004                                    weight++;
1005                            } else {
1006                                    weight += weights[i];
1007                            }
1008                    }
1009                    return weight;
1010            }
1011            
1012            /**
1013             * @return the weights
1014             */
1015            public double[] getWeights() {
1016                    if (weights == null) {
1017                            return null;
1018                    }
1019                    return weights.clone();
1020            }
1021            
1022            /**
1023             * @return the bins
1024             */
1025            public double[] getBins() {
1026                    return bins.clone();
1027            }
1028            
1029            /**
1030             * @return the normalized value
1031             */
1032            public double getNormalizedValue() {
1033                    return normalizedValue;
1034            }
1035            
1036            public boolean isNormalizedSum() {
1037                    return isNormalizedSum;
1038            }
1039            
1040            public boolean isDataNormalized() {
1041                    return isNormalizedData;
1042            }
1043            
1044            public int size() {
1045                    return size;
1046            }
1047            
1048            public double binWidth() {
1049                    return binWidth;
1050            }
1051            
1052            public StatisticalCategory toCategory(int i) {
1053                    Interval interval = getIntervalBinAt(i);
1054                    
1055                    StatisticalCategory cat = new StatisticalCategory(this, interval);
1056                    
1057                    cat.updateFrequency(getWeighAtInterval(interval));
1058                    
1059                    return cat;
1060            }
1061            
1062            public String toString() {
1063                    final StringBuffer sb = new StringBuffer();
1064                    
1065                    if (values != null && values.length > 0) {
1066                            sb.append("obs#=" + values.length + " data");
1067                            if (isNormalizedData) {
1068                                    sb.append("[normalized weights]");
1069                            }
1070                            sb.append(", ");
1071                    }
1072                    sb.append("interval=" + intervalOfDefinition + ", ");
1073                    sb.append("bin width=" + binWidth + ", ");
1074                    sb.append("bin#=" + size);
1075                    
1076                    if (isNormalizedData) {
1077                            if (isNormalizedSum) {
1078                                    sb.append("[normalized bin sum : " + normalizedValue + "]\n");
1079                            } else {
1080                                    sb
1081                                        .append("[max weight value updated (+array normalization) to : "
1082                                            + normalizedValue + "]\n");
1083                            }
1084                    }
1085                    return sb.toString();
1086            }
1087            
1088            public static class OutOfUpperBoundException extends Exception {
1089                    
1090                    private static final long serialVersionUID = 6254862093474313168L;
1091                    
1092                    OutOfUpperBoundException(int index, int max) {
1093                            super(index + " out of upper bound " + max);
1094                    }
1095            }
1096    }