Extended Speech to Text SAPI 5.4

Get help with using AutoHotkey (v2 or newer) and its commands and hotkeys
RogerWilcoNL
Posts: 17
Joined: 13 Nov 2019, 10:41

Extended Speech to Text SAPI 5.4

Post by RogerWilcoNL » 13 Nov 2019, 11:19

I am using SAPI 5.4 to build a Speech recognizer. Got the start from this forum. (great).
Now I want to do something more elaborate by introducing corrections on the recognised text.
for this purpose I need to have access to the extended interfaces( Numbers 2) of IspRegognizer, IspContext and or IspRecoResult.
After surching the whole internet :facepalm: , i found some clues, but I don't completely comprehend the working of
ComObjQuery and ComObject. Furthermore, there seems to be a difference in the notation of the GUID's
I think I tried every combination, but mostly get zero as a Result for ComObjQuery.
Can anyone help ?
Below is a simplified piece of my code:

Code: Select all

;SR
LogFile := "SR.log"
FileDelete(LogFile)
Lg( LogFile . ": " . A_Now )
iSR := New SR()
iSR.Do()
iSR := ""
return
 
class SR
{
	__New()
	{
		try {
		this.cListener := ComObjCreate("SAPI.SpInprocRecognizer")
		cAudioInputs := this.cListener.GetAudioInputs()
		if !cAudioInputs.Count {
			MsgBox( "No microphone detected", "DM" )
			throw Exception( "No microphone detected" )
		}
		this.Reco2 := ComObjCreate("SAPI.IspRecognizer2")
		this.cListener.AudioInput := cAudioInputs.Item(0) 		;set audio device to first input
		this.cContext := this.cListener.CreateRecoContext()
		Lg( A_ThisFunc . ": " . ComObjType(this.cContext) )
		Lg( A_ThisFunc . ": " . ComObjType(this.cContext, "Name") )
		Lg( A_ThisFunc . ": " . ComObjType(this.cContext, "IID") )
		
		IID_ISpRecoContext2 := "{BEAD311C-52FF-437f-9464-6B21054CA73D}"
		this.cContext2 := ComObject(9,ComObjQuery(this.cContext,IID_ISpRecoContext2,IID_ISpRecoContext2),1)
		Lg( A_ThisFunc . ": " . ComObjType(this.cContext2) )
		Lg( A_ThisFunc . ": " . ComObjType(this.cContext2, "Name") )
		Lg( A_ThisFunc . ": " . ComObjType(this.cContext2, "IID") )
		this.cGrammar := this.cContext.CreateGrammar(0)			;obtain phrase manager (ISpeechRecoGrammar object)
		this.cGrammar.State := 0                                ;SGSDisabled
		this.cGrammar.DictationLoad( "", 0 )                    ;all topics, SLOStatic
		this.cGrammar.DictationSetState( 1 )                    ;enable dictation mode (SGDSActive)
		this.cGrammar.CmdLoadFromFile( "GameGrammar.xml", 1 )   ;SPLO_STATIC=0 / SPLO_DYNAMIC=1
		ComObjConnect(this.cContext, "SR_")    					;connect the recognition context events
		this.cGrammar.State := 1                                ;SGSDisabled
		}
		catch e
			Lg( jxon_Dump(e,A_Space) )
	}
	
	__Delete()
	{
		;ObjRelease(this.SR)
		this.SR := ""
	}
	
	Do()
	{
		While true {
			Sleep 50
		}
	}
	
	OnEvaluate(StreamNumber,StreamPosition, RecognitionType, ByRef cResult, ByRef cContext) ; don't know the result yet
	{
		Lg( A_ThisFunc . ": " . ComObjType(cResult) )
		Lg( A_ThisFunc . ": " . ComObjType(cResult, "Name") )
		Lg( A_ThisFunc . ": " . ComObjType(cResult, "IID") )
		
		Lg( A_ThisFunc . ": " . ComObjType(cContext) )
		Lg( A_ThisFunc . ": " . ComObjType(cContext, "Name") )
		Lg( A_ThisFunc . ": " . ComObjType(cContext, "IID") )
		IID_ISpeechRecoResult := "{ED2879CF-CED9-4EE6-34A5-D59101DE}"
		IID_ISpeechRecoResult2 := "{8E0A246D-D3C8-45DE-5786-450C2904}"
		IID_ISpeechRecoResult2 := "{8E0A246D-D3C8-45DE-865704290C458C3C}"
		res2 := ComObject(9,ComObjQuery(cResult,IID_ISpeechRecoResult2),1)
		Lg( A_ThisFunc . ": " . ComObjType(res2) )
		Lg( A_ThisFunc . ": " . ComObjType(res2, "Name") )
		Lg( A_ThisFunc . ": " . ComObjType(res2, "IID") )
	}
}

SR_Recognition(StreamNumber, StreamPosition, RecognitionType, ByRef cResult, ByRef cContext ) ;speech recognition engine produced a recognition
{	Global iSR
    Lg( A_ThisFunc . ": StreamNumber(" . StreamNumber . ") StreamPosition(" . StreamPosition . ") RecognitionType(" . RecognitionType . ")" )
    iSR.OnEvaluate( StreamNumber, StreamPosition, RecognitionType, cResult, cContext )
}

Return to “Ask for Help (v2)”