Add full yolov8 compatible post-processing (#115)

This updates the local detection to yolov8-based models from the older yolov5 based models. This includes significant additional post-processing of the raw yolov8 output, but also means that the smaller nano yolo model is available as well as all of the updated ALVINN thorax training data. Signed-off-by: Justin Georgi <justin.georgi@gmail.com> Reviewed-on: Georgi_Lab/ALVINN_f7#115
2024-03-04 15:03:20 -07:00
parent 7f1aa63f07
commit 069ad74e30
24 changed files with 252 additions and 42 deletions
--- a/src/pages/detect.vue
+++ b/src/pages/detect.vue
@@ -289,7 +289,7 @@
  import submitMixin from './submit-mixin'
  import detectMixin from './local-detect'

-  import thoraxClasses from '../models/thorax_tfwm/classes.json'
+  import thoraxClasses from '../models/thorax-0.1.0-n960/classes.json'

  export default {
    mixins: [submitMixin, detectMixin],
@@ -334,9 +334,9 @@
          this.detectorName = 'thorax'
          this.classesList = thoraxClasses
          /* VITE setting */
-          this.modelLocation = '../models/thorax_tfwm/model.json'
+          this.modelLocation = '../models/thorax-0.1.0-n960/model.json'
          /* PWA Build setting */
-          //this.modelLocation = './models/thorax_tfwm/model.json'
+          //this.modelLocation = './models/thorax-0.1.0-n960/model.json'
          this.modelLocationCordova = 'https://localhost/models/thorax_tfwm/model.json'
          break;
        case 'abdomen':
--- a/src/pages/local-detect.js
+++ b/src/pages/local-detect.js
@@ -8,41 +8,69 @@ export default {
      model = await tf.loadGraphModel(weights).then(graphModel => {
        return graphModel
      })
-    },
+  },
    async localDetect(imageData) {
      const [modelWidth, modelHeight] = model.inputs[0].shape.slice(1, 3);

      const input = tf.tidy(() => {
        return tf.image.resizeBilinear(tf.browser.fromPixels(imageData), [modelWidth, modelHeight]).div(255.0).expandDims(0)
      })
-      var results = model.executeAsync(input).then(res => {
-        const [boxes, scores, classes, valid_detections] = res;
-        const boxes_data = boxes.dataSync();
-        const scores_data = scores.dataSync();
-        const classes_data = classes.dataSync();
-        const valid_detections_data = valid_detections.dataSync()[0];

-        tf.dispose(res)
+      const res = model.predict(input)
+      const detectAttempts = res.shape[2]
+      const outputSize = res.shape[1]
+      const rawRes = tf.transpose(res,[0,2,1]).dataSync()
+      let rawBoxes = []
+      let rawScores = []

-        var output = {
-          detections: []
-        }
-        for (var i =0; i < valid_detections_data; i++) {
-          var [dLeft, dTop, dRight, dBottom] = boxes_data.slice(i * 4, (i + 1) * 4);
-          output.detections.push({
-            "top": dTop,
-            "left": dLeft,
-            "bottom": dBottom,
-            "right": dRight,
-            "label": this.detectorLabels[classes_data[i]].name,
-            "confidence": scores_data[i] * 100
-          })
+      for (var i = 0; i < detectAttempts; i++) {
+        var getBox = rawRes.slice((i * outputSize),(i * outputSize) + 4)
+        var boxCalc = [
+          (getBox[0] - (getBox[2] / 2)) / modelWidth,
+          (getBox[1] - (getBox[3] / 2)) / modelHeight,
+          (getBox[0] + (getBox[2] / 2)) / modelWidth,
+          (getBox[1] + (getBox[3] / 2)) / modelHeight,
+        ]
+        rawBoxes.push(boxCalc)
+        rawScores.push(rawRes.slice((i * outputSize) + 4,(i + 1) * outputSize))
+      }
+      const tBoxes = tf.tensor2d(rawBoxes)
+      let tScores = null
+      let boxes_data = []
+      let scores_data = []
+      let classes_data = []
+      for (var c = 0; c < outputSize - 4; c++) {
+        tScores = rawScores.map(x => x[c])
+        var validBoxes = await tf.image.nonMaxSuppressionAsync(tBoxes,tf.tensor1d(tScores),10,0.5,.05)
+        validBoxes = validBoxes.dataSync()
+        if (validBoxes) {
+          boxes_data.push(...rawBoxes.filter( (_, idx) => validBoxes.includes(idx)))
+          var outputScores = tScores.filter( (_, idx) => validBoxes.includes(idx))
+          scores_data.push(...outputScores)
+          classes_data.push(...outputScores.fill(c))
        }
+      }

-        return output
-      })
+      const valid_detections_data = classes_data.length
+      var output = {
+        detections: []
+      }
+      for (var i =0; i < valid_detections_data; i++) {
+        var [dLeft, dTop, dRight, dBottom] = boxes_data[i]
+        output.detections.push({
+          "top": dTop,
+          "left": dLeft,
+          "bottom": dBottom,
+          "right": dRight,
+          "label": this.detectorLabels[classes_data[i]].name,
+          "confidence": scores_data[i] * 100
+        })
+      }

-      return results
+      tf.dispose(res)
+      tf.dispose(tBoxes)
+
+      return output
    }
  }
 }