Sift library is really just two functions designed for sifting or searching through data for items that match a certain criteria.
The two functions are:
Sift_Regex
Sift_Ngram
Code: Select all
;{ Sift
; Fanatic Guru
; 2015 04 30
; Version 1.00
;
; LIBRARY to sift through a string or array and return items that match sift criteria.
;
; ===================================================================================================================================================
;
; Functions:
;
; ===================================================================================================================================================
; Sift_Regex(Haystack, Needle, Options, Delimiter)
;
; Parameters:
; 1) {Haystack} String or array of information to search, ByRef for efficiency but Haystack is not changed by function
;
; 2) {Needle} String providing search text or criteria, ByRef for efficiency but Needle is not changed by function
;
; 3) {Options}
; IN Needle anywhere IN Haystack item (Default = IN)
; LEFT Needle is to LEFT or beginning of Haystack item
; RIGHT Needle is to RIGHT or end of Haystack item
; EXACT Needle is an EXACT match to Haystack item
; REGEX Needle is an REGEX expression to check against Haystack item
; OC Needle is ORDERED CHARACTERS to be searched for even non-consecutively but in the given order in Haystack item
; OW Needle is ORDERED WORDS to be searched for even non-consecutively but in the given order in Haystack item
; UC Needle is UNORDERED CHARACTERS to be search for even non-consecutively and in any order in Haystack item
; UW Needle is UNORDERED WORDS to be search for even non-consecutively and in any order in Haystack item
;
; If an Option is all lower case then the search will be case insensitive
;
; 4) {Delimiter} Single character Delimiter of each item in a Haystack string (Default = `n)
;
; Returns:
; If Haystack is string then a string is returned of found Haystack items delimited by the Delimiter
; If Haystack is an array then an array is returned of found Haystack items
;
; Note:
; Sift_Regex searchs are all RegExMatch seaches with Needles crafted based on the options chosen
;
; ===================================================================================================================================================
; Sift_Ngram(Haystack, Needle, Delta, Haystack_Matrix, Ngram Size, Format)
;
; Parameters:
; 1) {Haystack} String or array of information to search, ByRef for efficiency but Haystack is not changed by function
;
; 2) {Needle} String providing search text or criteria, ByRef for efficiency but Needle is not changed by function
;
; 3) {Delta} (Default = .7) Fuzzy match coefficient, 1 is a prefect match, 0 is no match at all, only results above the Delta are returned
;
; 4) {Haystack_Matrix} (Default = false)
; An object containing the preprocessing of the Haystack for Ngrams content
; If a non-object is passed the Haystack is processed for Ngram content and the results are returned by ByRef
; If an object is passed then that is used as the processed Ngram content of Haystack
; If multiply calls to the function are made with no change to the Haystack then a previous processing of Haystack for Ngram content
; can be passed back to the function to avoid reprocessing the same Haystack again in order to increase efficiency.
;
; 5) {Ngram Size} (Default = 3) The length of Ngram used. Generally Ngrams made of 3 letters called a Trigram is good
;
; 6) {Format} (Default = S`n)
; S Return Object with results Sorted
; U Return Object with results Unsorted
; S%%% Return Sorted string delimited by characters after S
; U%%% Return Unsorted string delimited by characters after U
; Sorted results are by best match first
;
; Returns:
; A string or array depending on Format parameter.
; If string then it is delimited based on Format parameter.
; If array then an array of object is returned where each element is of the structure: {Object}.Delta and {Object}.Data
; Example Code to access object returned:
; for key, element in Sift_Ngram(Data, QueryText, NgramLimit, Data_Ngram_Matrix, NgramSize)
; Display .= element.delta "`t" element.data "`n"
;
; Dependencies: Sift_Ngram_Get, Sift_Ngram_Compare, Sift_Ngram_Matrix, Sift_SortResults
; These are helper functions that are generally not called directly. Although Sift_Ngram_Matrix could be useful to call directly to preprocess a large static Haystack
;
; Note:
; The string "dog house" would produce these Trigrams: dog|og |g h| ho|hou|ous|use
; Sift_Ngram breaks the needle and each item of the Haystack up into Ngrams.
; Then all the Needle Ngrams are looked for in the Haystack items Ngrams resulting in a percentage of Needle Ngrams found
;
; ===================================================================================================================================================
;
Sift_Regex(ByRef Haystack, ByRef Needle, Options := "IN", Delimit := "`n")
{
Sifted := {}
if (Options = "IN")
Needle_Temp := "\Q" Needle "\E"
else if (Options = "LEFT")
Needle_Temp := "^\Q" Needle "\E"
else if (Options = "RIGHT")
Needle_Temp := "\Q" Needle "\E$"
else if (Options = "EXACT")
Needle_Temp := "^\Q" Needle "\E$"
else if (Options = "REGEX")
Needle_Temp := Needle
else if (Options = "OC")
Needle_Temp := RegExReplace(Needle,"(.)","\Q$1\E.*")
else if (Options = "OW")
Needle_Temp := RegExReplace(Needle,"( )","\Q$1\E.*")
else if (Options = "UW")
Loop, Parse, Needle, " "
Needle_Temp .= "(?=.*\Q" A_LoopField "\E)"
else if (Options = "UC")
Loop, Parse, Needle
Needle_Temp .= "(?=.*\Q" A_LoopField "\E)"
if Options is lower
Needle_Temp := "i)" Needle_Temp
if IsObject(Haystack)
{
for key, Hay in Haystack
if RegExMatch(Hay, Needle_Temp)
Sifted.Insert(Hay)
}
else
{
Loop, Parse, Haystack, %Delimit%
if RegExMatch(A_LoopField, Needle_Temp)
Sifted .= A_LoopField Delimit
Sifted := SubStr(Sifted,1,-1)
}
return Sifted
}
Sift_Ngram(ByRef Haystack, ByRef Needle, Delta := .7, ByRef Haystack_Matrix := false, n := 3, Format := "S`n" )
{
if !IsObject(Haystack_Matrix)
Haystack_Matrix := Sift_Ngram_Matrix(Haystack, n)
Needle_Ngram := Sift_Ngram_Get(Needle, n)
if IsObject(Haystack)
{
Search_Results := {}
for key, Hay_Ngram in Haystack_Matrix
{
Result := Sift_Ngram_Compare(Hay_Ngram, Needle_Ngram)
if !(Result < Delta)
Search_Results[key,"Delta"] := Result, Search_Results[key,"Data"] := Haystack[key]
}
}
else
{
Search_Results := {}
Loop, Parse, Haystack, `n, `r
{
Result := Sift_Ngram_Compare(Haystack_Matrix[A_Index], Needle_Ngram)
if !(Result < Delta)
Search_Results[A_Index,"Delta"] := Result, Search_Results[A_Index,"Data"] := A_LoopField
}
}
if (Format ~= "i)^S")
Sift_SortResults(Search_Results)
if RegExMatch(Format, "i)^(S|U)(.+)$", Match)
{
for key, element in Search_Results
String_Results .= element.data Match2
return SubStr(String_Results,1,-StrLen(Match2))
}
else
return Search_Results
}
Sift_Ngram_Get(ByRef String, n := 3)
{
Pos := 1, Grams := {}
Loop, % (1 + StrLen(String) - n)
gram := SubStr(String, A_Index, n), Grams[gram] ? Grams[gram] ++ : Grams[gram] := 1
return Grams
}
Sift_Ngram_Compare(ByRef Hay, ByRef Needle)
{
for gram, Needle_Count in Needle
{
Needle_Total += Needle_Count
Match += (Hay[gram] > Needle_Count ? Needle_Count : Hay[gram])
}
return Match / Needle_Total
}
Sift_Ngram_Matrix(ByRef Data, n := 3)
{
if IsObject(Data)
{
Matrix := {}
for key, string in Data
Matrix.Insert(Sift_Ngram_Get(string, n))
}
else
{
Matrix := {}
Loop, Parse, Data, `n
Matrix.Insert(Sift_Ngram_Get(A_LoopField, n))
}
return Matrix
}
Sift_SortResults(ByRef Data)
{
Data_Temp := {}
for key, element in Data
Data_Temp[element.Delta SubStr("0000000000" key, -9)] := element
Data := {}
for key, element in Data_Temp
Data.InsertAt(1,element)
return
}
Sift_Ngram is a little more complicated because it is a more fuzzy type search that breaks Haystacks and Needles up into little chunks and then compares how many little chunks they have in common.
Here is a simple Sift_Ngram example code:
Code: Select all
#Include Sift.ahk
Data =
(
Big Apple trees are great.
Pear trees are not great.
Tree beetles kill trees.
A song Bird is in the tree.
An Apple is in the tree.
)
MsgBox % Sift_Ngram(Data, "The Apple tree")
MsgBox % Sift_Ngram(Data, "The Aple tree")
MsgBox % Sift_Ngram(Data, "The Apple tree",.3,,,"S`n=========`n")
Esc::ExitApp
This is not about the Gui it is just to help see how the functions work. Most of the example code is just to get the Gui to collect the user input.
All the meat of the example occurs in the Query subroutine in just a few calls of the functions.
Code: Select all
#Include Sift.ahk
Data =
(
Where can I find the official build, or older releases?
Why do some lines in my script never execute?
Why does my script run fine in XP but not in Vista or Windows 7 or 8?
I can't edit my script via tray icon because it won't start due to an error. Can I find my script somewhere else?
How can I find and fix errors in my code?
Can I run AHK from a USB drive?
Why is the Run command unable to launch my game or program?
How can the output of a command line operation be retrieved?
How can a script close, pause, or suspend other script(s)?
How can a repeating action be stopped without exiting the script?
How can performance be improved for games or at other times when the CPU is under heavy load?
How can context sensitive help for AutoHotkey commands be used in any editor?
How to detect when a web page is finished loading?
How can dates and times be compared or manipulated?
How can I send the current Date and/or Time?
How can I send text to a window which isn't active or isn't visible?
Why don't Hotstrings, Send, and MouseClick work in certain games?
How can Winamp be controlled even when it isn't active?
How can MsgBox's button names be changed?
How can I change the default editor, which is accessible via context menu or tray icon?
How can I save the contents of my GUI associated variables?
Can I draw something with AHK?
How can I start an action when a window appears, closes or becomes [in]active?
My antivirus program flagged AHK as malware. Does it really contain a virus?
How do I put my hotkeys and hotstrings into effect automatically every time I start my PC?
I'm having trouble getting my mouse buttons working as hotkeys. Any advice?
How can tab and space be defined as hotkeys?
How can keys or mouse buttons be remapped so that they become different keys?
How do I detect the double press of a key or button?
How can a hotkey or hotstring be made exclusive to certain program(s)? In other words, I want a certain key to act as it normally does except when a specific window is active.
How can a prefix key be made to perform its native function rather than doing nothing?
How can the built-in Windows shortcut keys, such as Win+U (Utility Manager) and Win+R (Run), be changed or disabled?
Can I use wildcards or regular expressions in Hotstrings?
How can I use a hotkey that is not in my keyboard layout?
My keypad has a special 000 key. Is it possible to turn it into a hotkey?
)
Display := Data
Options := "in"
NgramSize := 3
NgramLimit := .50
DisplayLimit := 0
Gui, Font, s10 bold
Gui, Add, Text, x78 y11 w120 h20, Query?
Gui, Font, s10 bold underline
Gui, Add, Text, x8 y50 w120 h20, Search Options
Gui, Font, norm
Gui, Add, Edit, x130 y10 w600 h20 vQueryText gQuery
Gui, Add, Radio, x5 yp+60 w120 h15 +Center vRadio gRadio Checked, IN
Gui, Add, Radio, x5 w120 h20 +Center gRadio, LEFT
Gui, Add, Radio, x5 w120 h20 +Center gRadio, RIGHT
Gui, Add, Radio, x5 w120 h20 +Center gRadio, EXACT
Gui, Add, Radio, x5 w120 h20 +Center gRadio, REGEX
Gui, Add, Radio, x5 w120 h40 +Center gRadio, ORDERED`nCHARACTERS
Gui, Add, Radio, x5 w120 h40 +Center gRadio, UNORDERED`nCHARACTERS
Gui, Add, Radio, x5 w120 h40 +Center gRadio, ORDERED`nWORDS
Gui, Add, Radio, x5 w120 h40 +Center gRadio, UNORDERED`nWORDS
Gui, Add, Radio, x5 w120 h40 +Center gRadio, Ngram
Gui, Add, ComboBox, yp40 w40 vNgramSize gNgramSize Choose2, 2|3|4|5
Gui, Add, Text, x48 yp+3, Size
Gui, Add, ComboBox, x5 w40 vNgramLimit gNgramLimit Choose3, 1.00|.70|.50|.30|.10|0
Gui, Add, Text, x48 yp+3, Result Limit
Gui, Add, ComboBox, x5 w40 vDisplayLimit gDisplayLimit Choose1, 0|1|2|3|4|5
Gui, Add, Text, x48 yp+3, Result #
Gui, Add, Checkbox, x5 w120 h40 +Center vNgramResult gNgramResult, SHOW NGRAM RESULT
Gui, Add, Text, x5 w120 h2 0x7 ; Line
Gui, Add, Checkbox, x5 w120 h40 +Center vCase gCheckboxCase, CASE SENSITIVE
Gui, Add, Text, x5 yp-190 w120 h2 0x7 ; Line
Gui, Add, Edit, w600 h570 x130 y40 vGui_Display ReadOnly +0x300000 -wrap, %Display%
Gui, Show, w740 h620
Esc::ExitApp
Query:
Gui, Submit, NoHide
if (Options = "NGRAM")
{
if (StrLen(QueryText)<NgramSize)
Display := Sift_Regex(Data,QueryText, "in")
else
{
Display := ""
if NgramResult
{
for key, element in Sift_Ngram(Data, QueryText, NgramLimit, Data_Ngram_Matrix, NgramSize, "S")
Display .= element.delta "`t" element.data "`n"
Display := SubStr(Display,1,-1)
}
else
Display := Sift_Ngram(Data, QueryText, NgramLimit, Data_Ngram_Matrix, NgramSize)
If DisplayLimit
Display := SubStr(Display, 1, InStr(Display,"`n",,, DisplayLimit))
}
}
else
Display := Sift_Regex(Data, QueryText, Options)
GuiControl,, Gui_Display, %Display%
return
CheckboxCase:
Gui, Submit, NoHide
if (Options = "NGRAM")
{
Case := 0
GuiControl,, Case, 0
}
if Case
StringUpper, Options, Options
else
StringLower, Options, Options
gosub Query
return
Radio:
Gui, Submit, NoHide
if (Radio = 1)
Options := "IN"
else if (Radio = 2)
Options := "LEFT"
else if (Radio = 3)
Options := "RIGHT"
else if (Radio = 4)
Options := "EXACT"
else if (Radio = 5)
Options := "REGEX"
else if (Radio = 6)
Options := "OC"
else if (Radio = 7)
Options := "UC"
else if (Radio = 8)
Options := "OW"
else if (Radio = 9)
Options := "UW"
else if (Radio = 10)
{
if Case
{
Case := 0
GuiControl,, Case, 0
}
Options := "NGRAM"
}
gosub CheckboxCase
return
NgramSize:
Gui, Submit, NoHide
Data_Ngram_Matrix := ""
gosub Query
return
NgramLimit:
Gui, Submit, NoHide
gosub Query
return
DisplayLimit:
Gui, Submit, NoHide
gosub Query
return
NgramResult:
Gui, Submit, NoHide
gosub Query
return