So where is mistake in this code?
Code: Select all
gosub tst
;msgbox,% xmldata
doc := ComObjCreate("MSXML2.DOMDocument.6.0")
doc.async := false
doc.loadXML(xmldata)
;al(doc)
;DocNode := doc.selectSingleNode("//html/body/div[0]").getAttribute("title") ;not working
;DocNode := doc.selectSingleNode("//html/body/div[0]/div/p/span/span/strong") ;not working
DocNode := doc.selectSingleNode("//html/body/div[0]/div/p/span/span").getAttribute("bbox") ;not working
DocText := DocNode.text
MsgBox,% DocText
return
f4::reload
tst:
xmldata =
(join`r`n
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title></title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name='ocr-system' content='tesseract 4.00.00alpha' />
<meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word'/>
</head>
<body>
<div class='ocr_page' id='page_1' title='image "D:\VideoSubFinder_2.10_x32_64\down\RGBImages\x\2\ClearedText\0_07_47_760__0_07_49_039.png"; bbox 0 0 1280 170; ppageno 0'>
<div class='ocr_carea' id='block_1_1' title="bbox 467 87 812 128">
<p class='ocr_par' id='par_1_1' lang='pol' title="bbox 467 87 812 128">
<span class='ocr_line' id='line_1_1' title="bbox 467 87 812 128; baseline 0 -9; x_size 41; x_descenders 9; x_ascenders 10"><span class='ocrx_word' id='word_1_1' title='bbox 467 87 571 119; x_wconf 94'><strong>Jesteś</strong></span> <span class='ocrx_word' id='word_1_2' title='bbox 582 87 653 128; x_wconf 95'><strong>zbyt</strong></span> <span class='ocrx_word' id='word_1_3' title='bbox 663 87 812 119; x_wconf 95'><strong>ambitna!</strong></span>
</span>
</p>
</div>
</div>
</body>
</html>
)
return