001    /**
002     * Copyright (c) 2010, SIB. All rights reserved.
003     * 
004     * SIB (Swiss Institute of Bioinformatics) - http://www.isb-sib.ch Host -
005     * https://sourceforge.net/projects/javaprotlib/
006     * 
007     * Redistribution and use in source and binary forms, with or without
008     * modification, are permitted provided that the following conditions are met:
009     * Redistributions of source code must retain the above copyright notice, this
010     * list of conditions and the following disclaimer. Redistributions in binary
011     * form must reproduce the above copyright notice, this list of conditions and
012     * the following disclaimer in the documentation and/or other materials provided
013     * with the distribution. Neither the name of the SIB/GENEBIO nor the names of
014     * its contributors may be used to endorse or promote products derived from this
015     * software without specific prior written permission.
016     * 
017     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018     * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019     * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
020     * ARE DISCLAIMED. IN NO EVENT SHALL SIB/GENEBIO BE LIABLE FOR ANY DIRECT,
021     * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022     * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023     * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
024     * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026     * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027     */
028    package org.expasy.jpl.commons.collection.symbol.seq;
029    
030    
031    import java.util.ArrayList;
032    import java.util.HashMap;
033    import java.util.List;
034    import java.util.Map;
035    import java.util.Set;
036    import java.util.TreeSet;
037    import org.expasy.jpl.commons.collection.symbol.Symbol;
038    
039    
040    /**
041     * This object calculates the number of symbols in {@code SymbolSequence}[s].
042     * 
043     * @author nikitin
044     * 
045     * @param T the type element
046     * 
047     * @version 1.0
048     * 
049     */
050    public final class SequenceSymbolCounter<T> {
051            
052            private int from = 0;
053            private Map<Symbol<T>, List<Integer>> indices;
054            
055            private SequenceSymbolCounter() {
056                    indices = new HashMap<Symbol<T>, List<Integer>>();
057            }
058            
059            public static <T> SequenceSymbolCounter<T> newInstance() {
060                    return new SequenceSymbolCounter<T>();
061            }
062            
063            private void reinit() {
064                    from = 0;
065                    indices.clear();
066            }
067            
068            /**
069             * Compute all the occurrences of a sequence of symbols.
070             * 
071             */
072            public void process(final SymbolSequence<T> sequence) {
073                    reinit();
074                    processSequence(sequence);
075            }
076            
077            /**
078             * Compute all the occurrences of symbol for a list of symbol sequences
079             * 
080             */
081            public void process(final List<SymbolSequence<T>> sequences) {
082                    reinit();
083                    for (final SymbolSequence<T> sequence : sequences) {
084                            processSequence(sequence);
085                    }
086                    
087            }
088            
089            public void process(final SymbolSequence<T>... sequences) {
090                    reinit();
091                    for (final SymbolSequence<T> sequence : sequences) {
092                            processSequence(sequence);
093                    }
094                    
095            }
096            
097            private void processSequence(final SymbolSequence<T> sequence) {
098                    
099                    for (int i = 0; i < sequence.length(); i++) {
100                            Symbol<T> symbol = sequence.getSymbolAt(i);
101                            
102                            if (!indices.containsKey(symbol)) {
103                                    indices.put(symbol, new ArrayList<Integer>());
104                            }
105                            indices.get(symbol).add(from + i);
106                    }
107                    // if many sequences
108                    from += sequence.length();
109            }
110            
111            /**
112             * Return the list of indices of specified symbol
113             * 
114             * @param symbol the symbol to look for positions on.
115             * @return a list of indices for aaByte.
116             * 
117             */
118            public final List<Integer> getIndices(final Symbol<T> symbol) {
119                    return indices.get(symbol);
120            }
121            
122            public final List<Integer> getIndices(final Set<Symbol<T>> symbols) {
123                    final Set<Integer> localPositions = new TreeSet<Integer>();
124                    
125                    for (final Symbol<T> symbol : symbols) {
126                            
127                            if (indices.containsKey(symbol)) {
128                                    localPositions.addAll(indices.get(symbol));
129                            }
130                            
131                    }
132                    
133                    return new ArrayList<Integer>(localPositions);
134            }
135            
136            public int getSymbolNumber() {
137                    return getSymbols().size();
138            }
139            
140            public final int getSymbolNumber(final Set<Symbol<T>> symbols) {
141                    int occ = 0;
142                    for (Symbol<T> symbol : symbols) {
143                            occ += getSymbolNumber(symbol);
144                    }
145                    return occ;
146            }
147            
148            /**
149             * Return the number of occurrences for the specified symbol.
150             * 
151             * @param symbol the symbol to look for occurrences on.
152             * @return a number of occurrence for the given symbol.
153             * 
154             */
155            public final int getSymbolNumber(final Symbol<T> symbol) {
156                    if (hasSymbol(symbol)) {
157                            return getIndices(symbol).size();
158                    }
159                    return 0;
160            }
161            
162            public Set<Symbol<T>> getSymbols() {
163                    return indices.keySet();
164            }
165            
166            public final boolean hasSymbol(final Symbol<T> symbol) {
167                    return indices.containsKey(symbol);
168            }
169            
170            @Override
171            public String toString() {
172                    final StringBuilder sb = new StringBuilder();
173                    
174                    sb.append("Symbol Sequence Counter :\n");
175                    
176                    for (final Symbol<T> symbol : indices.keySet()) {
177                            sb.append("#");
178                            sb.append(symbol);
179                            sb.append(" = ");
180                            sb.append(indices.get(symbol) + "\n");
181                    }
182                    return sb.toString();
183            }
184    }