Code:
#NoEnv
SetBatchLines, -1
text =
(
of course I use the phrase of course, of course you might think this is excessive,
but I don't think this is excessive.
)
; going from long to short can avoid sub-phrases if you so wish
MsgBox % DupPhrase(text, 4)
MsgBox % DupPhrase(text, 3)
MsgBox % DupPhrase(text)
DupPhrase(text, wordsInPhrase=2, disregard=""){
Report := "words in phrase: " wordsInPhrase "`n`n"
trunc := text
trunc := RegExReplace(trunc, "s)[,;:\.]*") . " " ; ignore punctuation + space allows last phrase
Loop
{
StringReplace, dummy, trunc, %A_Space%, , UseErrorLevel
If ( ErrorLevel < wordsInPhrase ) ; if there are less than enough spaces to get a phrase
Break
StringGetPos, splicePos, trunc, %A_Space%, L%wordsInPhrase%
phrase := SubStr( trunc, 1, splicePos ) ; get a new phrase from start
If ( InStr(disregard, phrase) ) ; skip if to be ignored
Continue
trunc := SubStr( trunc, InStr(trunc, A_Space) + 1 ) ; remove one further word from text
If phrase Not in %old_Phrases% ; if it's an unfamiliar phrase
{
old_Phrases .= phrase ","
StringReplace, dummy, text, %phrase%, , UseErrorLevel
If ErrorLevel > 1 ; if more than one occurence found
Report .= "phrase /" phrase "/ was found " ErrorLevel " times.`n"
}
}
Return Report
}