001 /**
002 * Copyright (c) 2010, SIB. All rights reserved.
003 *
004 * SIB (Swiss Institute of Bioinformatics) - http://www.isb-sib.ch Host -
005 * https://sourceforge.net/projects/javaprotlib/
006 *
007 * Redistribution and use in source and binary forms, with or without
008 * modification, are permitted provided that the following conditions are met:
009 * Redistributions of source code must retain the above copyright notice, this
010 * list of conditions and the following disclaimer. Redistributions in binary
011 * form must reproduce the above copyright notice, this list of conditions and
012 * the following disclaimer in the documentation and/or other materials provided
013 * with the distribution. Neither the name of the SIB/GENEBIO nor the names of
014 * its contributors may be used to endorse or promote products derived from this
015 * software without specific prior written permission.
016 *
017 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
020 * ARE DISCLAIMED. IN NO EVENT SHALL SIB/GENEBIO BE LIABLE FOR ANY DIRECT,
021 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
024 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027 */
028 package org.expasy.jpl.commons.collection.symbol.seq;
029
030
031 import java.util.ArrayList;
032 import java.util.HashMap;
033 import java.util.List;
034 import java.util.Map;
035 import java.util.Set;
036 import java.util.TreeSet;
037 import org.expasy.jpl.commons.collection.symbol.Symbol;
038
039
040 /**
041 * This object calculates the number of symbols in {@code SymbolSequence}[s].
042 *
043 * @author nikitin
044 *
045 * @param T the type element
046 *
047 * @version 1.0
048 *
049 */
050 public final class SequenceSymbolCounter<T> {
051
052 private int from = 0;
053 private Map<Symbol<T>, List<Integer>> indices;
054
055 private SequenceSymbolCounter() {
056 indices = new HashMap<Symbol<T>, List<Integer>>();
057 }
058
059 public static <T> SequenceSymbolCounter<T> newInstance() {
060 return new SequenceSymbolCounter<T>();
061 }
062
063 private void reinit() {
064 from = 0;
065 indices.clear();
066 }
067
068 /**
069 * Compute all the occurrences of a sequence of symbols.
070 *
071 */
072 public void process(final SymbolSequence<T> sequence) {
073 reinit();
074 processSequence(sequence);
075 }
076
077 /**
078 * Compute all the occurrences of symbol for a list of symbol sequences
079 *
080 */
081 public void process(final List<SymbolSequence<T>> sequences) {
082 reinit();
083 for (final SymbolSequence<T> sequence : sequences) {
084 processSequence(sequence);
085 }
086
087 }
088
089 public void process(final SymbolSequence<T>... sequences) {
090 reinit();
091 for (final SymbolSequence<T> sequence : sequences) {
092 processSequence(sequence);
093 }
094
095 }
096
097 private void processSequence(final SymbolSequence<T> sequence) {
098
099 for (int i = 0; i < sequence.length(); i++) {
100 Symbol<T> symbol = sequence.getSymbolAt(i);
101
102 if (!indices.containsKey(symbol)) {
103 indices.put(symbol, new ArrayList<Integer>());
104 }
105 indices.get(symbol).add(from + i);
106 }
107 // if many sequences
108 from += sequence.length();
109 }
110
111 /**
112 * Return the list of indices of specified symbol
113 *
114 * @param symbol the symbol to look for positions on.
115 * @return a list of indices for aaByte.
116 *
117 */
118 public final List<Integer> getIndices(final Symbol<T> symbol) {
119 return indices.get(symbol);
120 }
121
122 public final List<Integer> getIndices(final Set<Symbol<T>> symbols) {
123 final Set<Integer> localPositions = new TreeSet<Integer>();
124
125 for (final Symbol<T> symbol : symbols) {
126
127 if (indices.containsKey(symbol)) {
128 localPositions.addAll(indices.get(symbol));
129 }
130
131 }
132
133 return new ArrayList<Integer>(localPositions);
134 }
135
136 public int getSymbolNumber() {
137 return getSymbols().size();
138 }
139
140 public final int getSymbolNumber(final Set<Symbol<T>> symbols) {
141 int occ = 0;
142 for (Symbol<T> symbol : symbols) {
143 occ += getSymbolNumber(symbol);
144 }
145 return occ;
146 }
147
148 /**
149 * Return the number of occurrences for the specified symbol.
150 *
151 * @param symbol the symbol to look for occurrences on.
152 * @return a number of occurrence for the given symbol.
153 *
154 */
155 public final int getSymbolNumber(final Symbol<T> symbol) {
156 if (hasSymbol(symbol)) {
157 return getIndices(symbol).size();
158 }
159 return 0;
160 }
161
162 public Set<Symbol<T>> getSymbols() {
163 return indices.keySet();
164 }
165
166 public final boolean hasSymbol(final Symbol<T> symbol) {
167 return indices.containsKey(symbol);
168 }
169
170 @Override
171 public String toString() {
172 final StringBuilder sb = new StringBuilder();
173
174 sb.append("Symbol Sequence Counter :\n");
175
176 for (final Symbol<T> symbol : indices.keySet()) {
177 sb.append("#");
178 sb.append(symbol);
179 sb.append(" = ");
180 sb.append(indices.get(symbol) + "\n");
181 }
182 return sb.toString();
183 }
184 }