I wonder why the time complexity has to be O(N∗M∗Len) (where ‘N’ is the number of characters in the given string, ‘M’ is the total number of words, and ‘Len’ is the length of a word).
It seems to me that the term ‘M’ can be removed by using a trie to check if any of the M words appear in a given location in the string using only Len steps (as opposed to M*Len the way the solution is implemented).
Thanks
Guy
Correction: a trie is not really any better complexity-wise than checking if a word is in a dictionary.
But the time complexity can still be improved to O(N*Len). The proposed solution does redundant work by looking for a complete concatenation at each step instead of using a sliding window to re-use some of the previous word matches.
Here is my proposed solution:
def find_word_concatenation(str, words):
wlen = len(words[0])
# calculate word frequencies
wfreq = {}
for w in words:
wfreq.setdefault(w,0)
wfreq[w] += 1
result_indices = []
for phase in range(wlen):
matched_words = []
match_start = phase
for i in range(phase, len(str), wlen):
word = str[i:i+wlen]
if word in wfreq:
# word match found!
matched_words.append(word)
wfreq[word] -= 1
if wfreq[word] == 0 and len(matched_words) == len(words):
# complete match found!
result_indices.append(match_start)
while wfreq[word] < 0 or len(matched_words) == len(words):
# oops, too many occurences (or match found), shrink window
word2 = matched_words.pop(0)
wfreq[word2] += 1
match_start += wlen
else:
# no match, clear everything
for word2 in matched_words:
wfreq[word2] += 1
matched_words = []
match_start = i + wlen
return result_indices
matched_words is not necessary but it makes the code a little easier to read.
Please let me know if I’m missing anything
Hey Guy,
I’m glad you mentioned this because I had the same idea. I believe you actually CAN improve the runtime with a Trie. I did so myself, bringing the suggested runtime from O(N * W * Len) to O(N + W * Len) (Notice the +) . For the solution to actually have better performance characteristics, you have to use a counting strategy similar to the one used in the Problem Challenge 2.
See my Javascript implementation (There are a couple moving pieces but let me know if you’d like me to explain further):
/**
* W = number of words
* w = word size
* Space: O(W * w)
* Time: O(N + (W * w))
*
*/
const createWordTrie = (words) => {
const trie = {value: null, children: new Map() };
let current = trie;
for (const word of words) {
for (const char of word) {
const nextNode = current.children.get(char) || {value: null, children: new Map() };
current.children.set(char, nextNode);
current = nextNode;
}
current.value = current.value ? current.value + 1 : 1;
current = trie;
}
return trie;
}
const find_word_concatenation = function(str, words) {
const indices = [];
const singleWordSize = words[0].length;
const numberOfWords = words.length;
const wordsSize = singleWordSize * words.length;
const wordsTrie = createWordTrie(words);
let windowStart = 0;
let matches = 0;
let frontTrieHead = wordsTrie;
let backTrieHead = wordsTrie;
for (let windowEnd = 0; windowEnd < str.length; windowEnd ++) {
const char = str[windowEnd];
if (frontTrieHead.children.has(char)) {
frontTrieHead = frontTrieHead.children.get(char);
if (frontTrieHead.value !== null) {
frontTrieHead.value--;
if (frontTrieHead.value === 0) {
matches++;
}
frontTrieHead = wordsTrie;
}
} else {
frontTrieHead = wordsTrie;
}
if (windowEnd >= wordsSize) {
const startChar = str[windowStart];
if (backTrieHead.children.has(startChar)) {
backTrieHead = backTrieHead.children.get(startChar);
if (backTrieHead.value !== null) {
if (backTrieHead.value === 0) {
matches--;
}
backTrieHead.value++;
backTrieHead = wordsTrie
}
} else {
backTrieHead = wordsTrie;
}
windowStart++;
}
if (matches === numberOfWords && backTrieHead === wordsTrie && frontTrieHead === wordsTrie) {
indices.push(windowStart);
}
}
return indices;
};