parse CSV (specify delimiters and qualifier)
Posted: 11 Jul 2019, 19:28
- I wrote an attempt at parsing a line of CSV text, where you can specify multiple delimiters, multiple omit characters and one qualifier.
- I've only done a few tests on it, and it's not something I'm likely to use very much.
- The principle it uses is: use RegExMatch to find the next delimiter, if there are an even number of qualifiers (e.g. quotes) between the first character and delimiter, then you have reached the end of a field, otherwise you are still within a field.
- People are welcome to make suggestions or modify/adapt the script.
- Has anything similar been written? Thanks.
- I've only done a few tests on it, and it's not something I'm likely to use very much.
- The principle it uses is: use RegExMatch to find the next delimiter, if there are an even number of qualifiers (e.g. quotes) between the first character and delimiter, then you have reached the end of a field, otherwise you are still within a field.
- People are welcome to make suggestions or modify/adapt the script.
- Has anything similar been written? Thanks.
Code: Select all
q:: ;parse CSV
;vText = """a""bc""","def","ghi"
vText = ""","",a,"",b,c,"","""," def ",;;," ghi "
vOutput := ""
for vKey, vValue in StrSplitCSV(vText)
vOutput .= vKey " " vValue "`r`n"
MsgBox, % vOutput
vOutput := ""
for vKey, vValue in StrSplitCSV(vText, ",;", " ")
vOutput .= vKey " " vValue "`r`n"
MsgBox, % vOutput
return
StrSplitCSV(vText, vDelim:=",", vOmitChars:="", vQual:="_DQ_") ;note: AHK v1: """", AHK v2: "`""
{
local
static vIsV1 := !!SubStr(1, 0)
if (vQual = "_DQ_")
vQual := Chr(34)
if !(StrLen(vQual) = 1)
throw Exception("invalid qualifier", -1)
if (vText = "")
return []
if IsObject(vDelim)
{
oDelim := vDelim
vDelim := ""
for _, vValue in oDelim
vDelim .= vValue
}
if InStr(vDelim, vQual, 1)
throw Exception("qualifier matches a delimiter", -1)
;4 characters that need escaping in a RegEx character class: ^-]\
vNeedle := "[" RegExReplace(vDelim, "[\^\-\]\\]", "\$0") "]"
vPos := 1
vPos1 := 1
vDoEnd := 0
oArray := []
Loop
{
vPos := RegExMatch(vText, vNeedle,, vPos)
if vPos
vPos2 := vPos-1
else
vPos2 := StrLen(vText), vDoEnd := 1
vTemp := SubStr(vText, vPos1, vPos2-vPos1+1)
StrReplace(vTemp, vQual,, vCount)
if !(vCount & 1) ;an even number of qualifiers
|| vDoEnd
{
vIsQuote1 := (SubStr(vTemp, 1, 1) = vQual)
vIsQuote2 := (SubStr(vTemp, vIsV1-1) = vQual)
if !(vIsQuote1 = vIsQuote2)
throw Exception("qualifier start/end mismatch:`r`n" vTemp, -1)
if vIsQuote1
vTemp := SubStr(vTemp, 2, -1)
oArray.Push(Trim(StrReplace(vTemp, vQual vQual, vQual), vOmitChars))
vPos1 := vPos + 1
}
if vDoEnd
{
if (vCount & 1)
throw Exception("qualifier count mismatch:`r`n" vTemp, -1)
break
}
vPos++
}
return oArray
}