From 8cdded7617d5a87ad19e46bc3367031b8f182829 Mon Sep 17 00:00:00 2001
From: Justin Georgi <justin.georgi@gmail.com>
Date: Thu, 25 Jul 2024 17:56:21 +0000
Subject: [PATCH] Add detection worker (#187)

Closes: #186

This PR shifts much of the tensorflow function to a shared worker for multithreading performance.

Reviewed-on: https://gitea.azgeorgis.net/ALVINN/ALVINN_f7/pulls/187
---
 src/assets/detect-worker.js  | 182 +++++++++++++++++++++++++++++++++++
 src/pages/camera-mixin.js    |  47 +++++++++
 src/pages/detect.vue         |  72 +++++++++-----
 src/pages/detection-mixin.js | 167 --------------------------------
 4 files changed, 278 insertions(+), 190 deletions(-)
 create mode 100644 src/assets/detect-worker.js

diff --git a/src/assets/detect-worker.js b/src/assets/detect-worker.js
new file mode 100644
index 0000000..828d1d4
--- /dev/null
+++ b/src/assets/detect-worker.js
@@ -0,0 +1,182 @@
+import * as tf  from '@tensorflow/tfjs'
+import { f7 } from 'framework7-vue'
+
+let model = null
+
+self.onconnect = (e) => {
+  const port = e.ports[0];
+
+  port.onmessage = function (e) {
+    switch (e.data.call) {
+      case 'loadModel':
+        loadModel('.' + e.data.weights,e.data.preload).then(() => {
+          port.postMessage({success: 'model'})
+        }).catch((err) => {
+          port.postMessage({error: true, message: err.message})
+        })
+        break
+      case 'localDetect':
+        localDetect(e.data.image).then((dets) => {
+          port.postMessage({success: 'detection', detections: dets})
+        }).catch((err) => {
+          port.postMessage({error: true, message: err.message})
+        })
+        e.data.image.close()
+        break
+      case 'videoFrame':
+        videoFrame(e.data.image).then((frameDet) =>{
+          port.postMessage({succes: 'frame', coords: frameDet.cds, modelWidth: frameDet.mW, modelHeight: frameDet.mH})
+        }).catch((err) => {
+          port.postMessage({error: true, message: err.message})
+        })
+        e.data.image.close()
+        break
+      default:
+        console.log('Worker message incoming:')
+        console.log(e)
+        port.postMessage({result1: 'First result', result2: 'Second result'})
+        break
+    }
+  }
+
+  port.start()
+}
+
+async function loadModel(weights, preload) {
+  if (model && model.modelURL == weights) {
+    return model
+  } else if (model) {
+    tf.dispose(model)
+  }
+  model = await tf.loadGraphModel(weights)
+  const [modelWidth, modelHeight] = model.inputs[0].shape.slice(1, 3)
+  /*****************
+   * If preloading then run model
+   * once on fake data to preload
+   * weights for a faster response
+   *****************/
+  if (preload) {
+    const dummyT = tf.ones([1,modelWidth,modelHeight,3])
+    model.predict(dummyT)
+  }
+  return model
+}
+
+async function localDetect(imageData) {
+  console.time('pre-process')
+  const [modelWidth, modelHeight] = model.inputs[0].shape.slice(1, 3)
+  let gTense = null
+  const input = tf.tidy(() => {
+    gTense = tf.image.rgbToGrayscale(tf.image.resizeBilinear(tf.browser.fromPixels(imageData), [modelWidth, modelHeight])).div(255.0).expandDims(0)
+    return tf.concat([gTense,gTense,gTense],3)
+  })
+  tf.dispose(gTense)
+  console.timeEnd('pre-process')
+
+  console.time('run prediction')
+  const res = model.predict(input)
+  const tRes = tf.transpose(res,[0,2,1])
+  const rawRes = tRes.arraySync()[0]
+  console.timeEnd('run prediction')
+
+  console.time('post-process')
+  const outputSize = res.shape[1]
+  let rawBoxes = []
+  let rawScores = []
+
+  for (var i = 0; i < rawRes.length; i++) {
+    var getScores = rawRes[i].slice(4)
+    if (getScores.every( s => s < .05)) { continue }
+    var getBox = rawRes[i].slice(0,4)
+    var boxCalc = [
+      (getBox[0] - (getBox[2] / 2)) / modelWidth,
+      (getBox[1] - (getBox[3] / 2)) / modelHeight,
+      (getBox[0] + (getBox[2] / 2)) / modelWidth,
+      (getBox[1] + (getBox[3] / 2)) / modelHeight,
+    ]
+    rawBoxes.push(boxCalc)
+    rawScores.push(getScores)
+  }
+
+  if (rawBoxes.length > 0) {
+    const tBoxes = tf.tensor2d(rawBoxes)
+    let tScores = null
+    let resBoxes = null
+    let validBoxes = []
+    let structureScores = null
+    let boxes_data = []
+    let scores_data = []
+    let classes_data = []
+    for (var c = 0; c < outputSize - 4; c++) {
+      structureScores = rawScores.map(x => x[c])
+      tScores = tf.tensor1d(structureScores)
+      resBoxes = await tf.image.nonMaxSuppressionAsync(tBoxes,tScores,10,0.5,.05)
+      validBoxes = resBoxes.dataSync()
+      tf.dispose(resBoxes)
+      if (validBoxes) {
+        boxes_data.push(...rawBoxes.filter( (_, idx) => validBoxes.includes(idx)))
+        var outputScores = structureScores.filter( (_, idx) => validBoxes.includes(idx))
+        scores_data.push(...outputScores)
+        classes_data.push(...outputScores.fill(c))
+      }
+    }
+
+    validBoxes = []
+    tf.dispose(tBoxes)
+    tf.dispose(tScores)
+    tf.dispose(tRes)
+    const valid_detections_data = classes_data.length
+    var output = {
+      detections: []
+    }
+    for (var i =0; i < valid_detections_data; i++) {
+      var [dLeft, dTop, dRight, dBottom] = boxes_data[i]
+      output.detections.push({
+        "top": dTop,
+        "left": dLeft,
+        "bottom": dBottom,
+        "right": dRight,
+//        "label": this.detectorLabels[classes_data[i]].name,
+        "label": classes_data[i],
+        "confidence": scores_data[i] * 100
+      })
+    }
+  }
+  tf.dispose(res)
+  tf.dispose(input)
+  console.timeEnd('post-process')
+
+  return output || { detections: [] }
+}
+
+async function videoFrame (vidData) {
+  const [modelWidth, modelHeight] = model.inputs[0].shape.slice(1, 3)
+  console.time('frame-process')
+  let rawCoords = []
+  try {
+    const input = tf.tidy(() => {
+      return tf.image.resizeBilinear(tf.browser.fromPixels(vidData), [modelWidth, modelHeight]).div(255.0).expandDims(0)
+    })
+    const res = model.predict(input)
+    const rawRes = tf.transpose(res,[0,2,1]).arraySync()[0]
+
+    if (rawRes) {
+      for (var i = 0; i < rawRes.length; i++) {
+        let getScores = rawRes[i].slice(4) 
+        if (getScores.some( s => s > .5)) {
+          let foundTarget = rawRes[i].slice(0,2)
+          foundTarget.push(Math.max(...getScores))
+          rawCoords.push(foundTarget)
+        }
+      }
+
+    }
+    tf.dispose(input)
+    tf.dispose(res)
+    tf.dispose(rawRes)
+  } catch (e) {
+    console.log(e)
+  }
+  console.timeEnd('frame-process')
+  return {cds: rawCoords, mW: modelWidth, mH: modelHeight}
+}
\ No newline at end of file
diff --git a/src/pages/camera-mixin.js b/src/pages/camera-mixin.js
index 314f596..93e8e01 100644
--- a/src/pages/camera-mixin.js
+++ b/src/pages/camera-mixin.js
@@ -1,3 +1,5 @@
+import { f7 } from 'framework7-vue'
+
 export default {
   methods: {
     async openCamera(imContain) {
@@ -38,6 +40,51 @@ export default {
       const tempCtx = tempCVS.getContext('2d')
       tempCtx.drawImage(vidViewer, 0, 0)
       this.getImage(tempCVS.toDataURL())
+    },
+    async videoFrameDetect (vidData) {
+      const vidWorker = new SharedWorker('../assets/detect-worker.js',{type: 'module'})
+      vidWorker.port.onmessage = (eVid) => {
+        self = this
+        if (eVid.data.error) {
+          console.log(eVid.data.message)
+          f7.dialog.alert(`ALVINN AI model error: ${eVid.data.message}`)
+        } else if (this.videoAvailable) {
+          createImageBitmap(vidData).then(imVideoFrame => {
+            vidWorker.port.postMessage({call: 'videoFrame', image: imVideoFrame}, [imVideoFrame])
+          })
+          if (eVid.data.coords) {
+            imageCtx.clearRect(0,0,imCanvas.width,imCanvas.height)
+            for (var coord of eVid.data.coords) {
+              let pointX = (imCanvas.width - imgWidth) / 2 + (coord[0] / eVid.data.modelWidth) * imgWidth - 10
+              let pointY = (imCanvas.height - imgHeight) / 2 + (coord[1] / eVid.data.modelHeight) * imgHeight - 10
+              console.debug(`cx: ${pointX}, cy: ${pointY}`)
+              imageCtx.globalAlpha = coord[2]
+              imageCtx.drawImage(target, pointX, pointY, 20, 20)
+            }
+          }
+        }
+      }
+
+      vidWorker.port.postMessage({call: 'loadModel', weights: this.miniLocation, preload: true})
+      const imCanvas = this.$refs.image_cvs
+      const imageCtx = imCanvas.getContext("2d")
+      const target = this.$refs.target_image
+      var imgWidth
+      var imgHeight
+      f7.utils.nextFrame(() => {
+        imCanvas.width = imCanvas.clientWidth
+        imCanvas.height = imCanvas.clientHeight
+        imageCtx.clearRect(0,0,imCanvas.width,imCanvas.height)
+        const imgAspect = vidData.width / vidData.height
+        const rendAspect = imCanvas.width / imCanvas.height
+        if (imgAspect >= rendAspect) {
+          imgWidth = imCanvas.width
+          imgHeight = imCanvas.width / imgAspect
+        } else {
+          imgWidth = imCanvas.height * imgAspect
+          imgHeight = imCanvas.height
+        }
+      })
     }
   }
 }
\ No newline at end of file
diff --git a/src/pages/detect.vue b/src/pages/detect.vue
index 8e83f6a..ac81e18 100644
--- a/src/pages/detect.vue
+++ b/src/pages/detect.vue
@@ -177,7 +177,8 @@
         videoDeviceAvailable: false,
         videoAvailable: false,
         cameraStream: null,
-        infoLinkPos: {}
+        infoLinkPos: {},
+        workerScript: null
       }
     },
     setup() {
@@ -204,7 +205,7 @@
       }
       this.modelLocation = `${modelRoot}/models/${this.detectorName}${this.otherSettings.mini ? '-mini' : ''}/model.json`
       this.miniLocation = `${modelRoot}/models/${this.detectorName}-mini/model.json`
-      fetch(`${this.isCordova ? 'https://localhost' : '.'}/models/${this.detectorName}/classes.json`)
+      fetch(`${modelRoot}/models/${this.detectorName}/classes.json`)
         .then((mod) => { return mod.json() })
         .then((classes) => {
           this.classesList = classes
@@ -214,18 +215,22 @@
       if (loadServerSettings) this.serverSettings = JSON.parse(loadServerSettings)
     },
     mounted () {
+      const mountWorker = new SharedWorker('../assets/detect-worker.js',{type: 'module'})
+      mountWorker.port.onmessage = (eMount) => {
+        self = this
+        if (eMount.data.error) {
+          console.log(eMount.data.message)
+          f7.dialog.alert(`ALVINN AI model error: ${eMount.data.message}`)
+        }
+        self.modelLoading = false
+      }
+
       if (this.serverSettings && this.serverSettings.use) {
         this.getRemoteLabels()
         this.modelLoading = false
       } else {
         this.modelLoading = true
-        this.loadModel(this.modelLocation, true).then(() => {
-          this.modelLoading = false
-        }).catch((e) => {
-          console.log(e.message)
-          f7.dialog.alert(`ALVINN AI model error: ${e.message}`)
-          this.modelLoading = false
-        })
+        mountWorker.port.postMessage({call: 'loadModel', weights: this.modelLocation, preload: true})
       }
       window.onresize = (e) => { if (this.$refs.image_cvs) this.selectChip('redraw') }
     },
@@ -287,22 +292,43 @@
         return `--chip-media-gradient: conic-gradient(from ${270 - (confFactor * 360 / 2)}deg, hsl(${confFactor * 120}deg, 100%, 50%) ${confFactor}turn, hsl(${confFactor * 120}deg, 50%, 66%) ${confFactor}turn)`
       },
       async setData () {
-        if (this.reloadModel) {
-          await this.loadModel(this.modelLocation)
-          this.reloadModel = false
+        const detectWorker = new SharedWorker('../assets/detect-worker.js',{type: 'module'})
+        detectWorker.port.onmessage = (eDetect) => {
+          self = this
+          if (eDetect.data.error) {
+            self.detecting = false
+            self.resultData = {}
+            f7.dialog.alert(`ALVINN structure finding error: ${eDetect.data.message}`)
+          } else if (eDetect.data.success == 'detection') {
+            self.detecting = false
+            self.resultData = eDetect.data.detections
+            if (self.resultData) {
+              self.resultData.detections.map(d => {d.label = self.detectorLabels[d.label].name})
+            }
+            self.uploadDirty = true
+          } else if (eDetect.data.success == 'model') {
+            this.reloadModel = false
+            loadSuccess(true)
+          }
         }
+
+        let loadSuccess = null
+        let loadFailure = null
+        let modelReloading = new Promise((res, rej) => {
+          loadSuccess = res
+          loadFailure = rej
+          if (this.reloadModel) {
+            detectWorker.port.postMessage({call: 'loadModel', weights: this.modelLocation})
+          } else {
+            loadSuccess(true)
+          }
+        })
+
         if (this.serverSettings && this.serverSettings.use) {
           this.remoteDetect()
         } else {
-          this.localDetect(this.imageView).then(dets => {
-            this.detecting = false
-            this.resultData = dets
-            this.uploadDirty = true
-          }).catch((e) => {
-            console.log(e.message)
-            this.detecting = false
-            this.resultData = {}
-            f7.dialog.alert(`ALVINN structure finding error: ${e.message}`)
+          Promise.all([modelReloading,createImageBitmap(this.imageView)]).then(res => {
+            detectWorker.port.postMessage({call: 'localDetect', image: res[1]}, [res[1]])
           })
         }
       },
@@ -449,9 +475,9 @@
            * setTimeout is not a good solution, but it's the only way
            * I can find to not cut off drawing of the canvas background
            ******/
-          setTimeout(() => {
+//          setTimeout(() => {
             this.setData()
-          }, 1)
+//          }, 1)
         }).catch((e) => {
           console.log(e.message)
           f7.dialog.alert(`Error loading image: ${e.message}`)
diff --git a/src/pages/detection-mixin.js b/src/pages/detection-mixin.js
index d0b7197..438742c 100644
--- a/src/pages/detection-mixin.js
+++ b/src/pages/detection-mixin.js
@@ -1,114 +1,7 @@
-import * as tf  from '@tensorflow/tfjs'
 import { f7 } from 'framework7-vue'
 
-let model = null
-
 export default {
   methods: {
-    async loadModel(weights, preload) {
-      if (model && model.modelURL == weights) {
-        return model
-      } else if (model) {
-        tf.dispose(model)
-      }
-      model = await tf.loadGraphModel(weights)
-      const [modelWidth, modelHeight] = model.inputs[0].shape.slice(1, 3)
-      /*****************
-       * If preloading then run model
-       * once on fake data to preload
-       * weights for a faster response
-       *****************/
-      if (preload) {
-        const dummyT = tf.ones([1,modelWidth,modelHeight,3])
-        model.predict(dummyT)
-      }
-      return model
-    },
-    async localDetect(imageData) {
-      console.time('pre-process')
-      const [modelWidth, modelHeight] = model.inputs[0].shape.slice(1, 3)
-      let gTense = null
-      const input = tf.tidy(() => {
-        gTense = tf.image.rgbToGrayscale(tf.image.resizeBilinear(tf.browser.fromPixels(imageData), [modelWidth, modelHeight])).div(255.0).expandDims(0)
-        return tf.concat([gTense,gTense,gTense],3)
-      })
-      tf.dispose(gTense)
-      console.timeEnd('pre-process')
-
-      console.time('run prediction')
-      const res = model.predict(input)
-      const tRes = tf.transpose(res,[0,2,1])
-      const rawRes = tRes.arraySync()[0]
-      console.timeEnd('run prediction')
-
-      console.time('post-process')
-      const outputSize = res.shape[1]
-      let rawBoxes = []
-      let rawScores = []
-
-      for (var i = 0; i < rawRes.length; i++) {
-        var getScores = rawRes[i].slice(4)
-        if (getScores.every( s => s < .05)) { continue }
-        var getBox = rawRes[i].slice(0,4)
-        var boxCalc = [
-          (getBox[0] - (getBox[2] / 2)) / modelWidth,
-          (getBox[1] - (getBox[3] / 2)) / modelHeight,
-          (getBox[0] + (getBox[2] / 2)) / modelWidth,
-          (getBox[1] + (getBox[3] / 2)) / modelHeight,
-        ]
-        rawBoxes.push(boxCalc)
-        rawScores.push(getScores)
-      }
-
-      if (rawBoxes.length > 0) {
-        const tBoxes = tf.tensor2d(rawBoxes)
-        let tScores = null
-        let resBoxes = null
-        let validBoxes = []
-        let structureScores = null
-        let boxes_data = []
-        let scores_data = []
-        let classes_data = []
-        for (var c = 0; c < outputSize - 4; c++) {
-          structureScores = rawScores.map(x => x[c])
-          tScores = tf.tensor1d(structureScores)
-          resBoxes = await tf.image.nonMaxSuppressionAsync(tBoxes,tScores,10,0.5,.05)
-          validBoxes = resBoxes.dataSync()
-          tf.dispose(resBoxes)
-          if (validBoxes) {
-            boxes_data.push(...rawBoxes.filter( (_, idx) => validBoxes.includes(idx)))
-            var outputScores = structureScores.filter( (_, idx) => validBoxes.includes(idx))
-            scores_data.push(...outputScores)
-            classes_data.push(...outputScores.fill(c))
-          }
-        }
-
-        validBoxes = []
-        tf.dispose(tBoxes)
-        tf.dispose(tScores)
-        tf.dispose(tRes)
-        const valid_detections_data = classes_data.length
-        var output = {
-          detections: []
-        }
-        for (var i =0; i < valid_detections_data; i++) {
-          var [dLeft, dTop, dRight, dBottom] = boxes_data[i]
-          output.detections.push({
-            "top": dTop,
-            "left": dLeft,
-            "bottom": dBottom,
-            "right": dRight,
-            "label": this.detectorLabels[classes_data[i]].name,
-            "confidence": scores_data[i] * 100
-          })
-        }
-      }
-      tf.dispose(res)
-      tf.dispose(input)
-      console.timeEnd('post-process')
-
-      return output || { detections: [] }
-    },
     getRemoteLabels() {
       var self = this
       var modelURL = `http://${this.serverSettings.address}:${this.serverSettings.port}/detectors`
@@ -172,65 +65,5 @@ export default {
       this.detecting = false
       f7.dialog.alert('No connection to remote ALVINN instance.  Please check app settings.')
     },
-    async videoFrameDetect (vidData) {
-      await this.loadModel(this.miniLocation)
-      const [modelWidth, modelHeight] = model.inputs[0].shape.slice(1, 3)
-      const imCanvas = this.$refs.image_cvs
-      const imageCtx = imCanvas.getContext("2d")
-      const target = this.$refs.target_image
-      await tf.nextFrame();
-      imCanvas.width = imCanvas.clientWidth
-      imCanvas.height = imCanvas.clientHeight
-      imageCtx.clearRect(0,0,imCanvas.width,imCanvas.height)
-      var imgWidth
-      var imgHeight
-      const imgAspect = vidData.width / vidData.height
-      const rendAspect = imCanvas.width / imCanvas.height
-      if (imgAspect >= rendAspect) {
-        imgWidth = imCanvas.width
-        imgHeight = imCanvas.width / imgAspect
-      } else {
-        imgWidth = imCanvas.height * imgAspect
-        imgHeight = imCanvas.height
-      }
-      while (this.videoAvailable) {
-        console.time('frame-process')
-        try {
-          const input = tf.tidy(() => {
-            return tf.image.resizeBilinear(tf.browser.fromPixels(vidData), [modelWidth, modelHeight]).div(255.0).expandDims(0)
-          })
-          const res = model.predict(input)
-          const rawRes = tf.transpose(res,[0,2,1]).arraySync()[0]
-
-          let rawCoords = []
-          if (rawRes) {
-            for (var i = 0; i < rawRes.length; i++) {
-              let getScores = rawRes[i].slice(4) 
-              if (getScores.some( s => s > .5)) {
-                let foundTarget = rawRes[i].slice(0,2)
-                foundTarget.push(Math.max(...getScores))
-                rawCoords.push(foundTarget)
-              }
-            }
-
-            imageCtx.clearRect(0,0,imCanvas.width,imCanvas.height)
-            for (var coord of rawCoords) {
-              console.log(`x: ${coord[0]}, y: ${coord[1]}`)
-              let pointX = (imCanvas.width - imgWidth) / 2 + (coord[0] / modelWidth) * imgWidth -5
-              let pointY = (imCanvas.height - imgHeight) / 2 + (coord[1] / modelHeight) * imgHeight -5
-              imageCtx.globalAlpha = coord[2]
-              imageCtx.drawImage(target, pointX, pointY, 20, 20)
-            }
-          }
-          tf.dispose(input)
-          tf.dispose(res)
-          tf.dispose(rawRes)
-        } catch (e) {
-          console.log(e)
-        }
-        console.timeEnd('frame-process')
-        await tf.nextFrame();
-      }
-    }
   }
 }
\ No newline at end of file