this is the regex but could be flawed
Code: Select all
RegExMatch(%WordCount%, "(*UCP)(?is)(\b[[:alpha:]]{2,}\b)(?!.*\b\1\b)", 3)
Code: Select all
FileSelectFile, SourceFile, 3,, Pick a text file to analyze.
if (SourceFile = "")
return
SplitPath, SourceFile,, SourceFilePath,, SourceFileNoExt
DestFile := SourceFilePath "\" SourceFileNoExt " frequency.txt"
if FileExist(DestFile)
{
MsgBox, 4,, Overwrite the existing file? Press No to append to it.`n`nFILE: %DestFile%
IfMsgBox, Yes
FileDelete, %DestFile%
}
WordCount := 0
Loop, read, %SourceFile%, %DestFile%
{
SearchString := A_LoopReadLine
Gosub, Search
}
MsgBox %WordCount% words were found and written to "%DestFile%".
return
Search:
Start1 := RegExMatch(%WordCount%, "(*UCP)(?is)(\b[[:alpha:]]{2,}\b)(?!.*\b\1\b)", 3)
Start := Start1
Loop
{
ArrayElement := Start%A_Index%
if (ArrayElement = "")
break
if (ArrayElement = 0)
continue
if (Start = 0)
Start := ArrayElement
else
{
if (ArrayElement != 0)
if (ArrayElement < Start)
Start := ArrayElement
}
}
if (Start = 0)
return
URL := SubStr(SearchString, Start)
Loop, parse, WRD, %A_Tab%%A_Space%<>
{
WRD := A_LoopField
break
}
StringReplace, Cleansed, WRD, ",, All
FileAppend, %Cleansed%`n
WordCount += 1
CharactersToOmit := StrLen(WRD)
CharactersToOmit += Start
SearchString := SubStr(SearchString, CharactersToOmit)
Gosub, Search
return