Add real-time detection to camera stream (#143)

Closes: #30

When the camera is being used to find an image to capture, the region mini model now runs in real time to give an estimate of where there are identifiable structures.

Reviewed-on: #143
This commit is contained in:
2024-03-24 08:51:08 -07:00
parent f09180875a
commit 79316bb83b
21 changed files with 417 additions and 21 deletions

14
src/assets/target.svg Normal file
View File

@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg version="1.1" viewBox="0 0 10 10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<defs>
<filter id="filter1847" x="-.075" y="-.075" width="1.15" height="1.15" color-interpolation-filters="sRGB">
<feGaussianBlur stdDeviation="0.3125"/>
</filter>
<radialGradient id="radialGradient1903" cx="5" cy="5" r="5.75" gradientUnits="userSpaceOnUse">
<stop stop-color="#ff0" offset="0"/>
<stop stop-color="#ff0" stop-opacity="0" offset="1"/>
</radialGradient>
</defs>
<path d="m0 5 3.833274-1.166726 1.166726-3.833274 1.166726 3.833274 3.833274 1.166726-3.833274 1.166726-1.166726 3.833274-1.166726-3.833274z" color="#000000" fill="url(#radialGradient1903)" fill-rule="evenodd" filter="url(#filter1847)" opacity=".63"/>
</svg>

After

Width:  |  Height:  |  Size: 870 B

View File

@@ -0,0 +1,11 @@
{
"version": "0.0.0-n1",
"region": "Coco",
"size": 640,
"epochs": 1000,
"name": "coco128 test",
"yolo-version": "8.1.20 docker",
"date": "2024-03-12",
"export": "coco128.yaml"
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,92 @@
description: Ultralytics best model trained on /usr/src/ultralytics/ultralytics/cfg/datasets/coco128.yaml
author: Ultralytics
license: AGPL-3.0 https://ultralytics.com/license
date: '2024-03-12T16:25:00.089873'
version: 8.1.20
stride: 32
task: detect
batch: 1
imgsz:
- 640
- 640
names:
0: person
1: bicycle
2: car
3: motorcycle
4: airplane
5: bus
6: train
7: truck
8: boat
9: traffic light
10: fire hydrant
11: stop sign
12: parking meter
13: bench
14: bird
15: cat
16: dog
17: horse
18: sheep
19: cow
20: elephant
21: bear
22: zebra
23: giraffe
24: backpack
25: umbrella
26: handbag
27: tie
28: suitcase
29: frisbee
30: skis
31: snowboard
32: sports ball
33: kite
34: baseball bat
35: baseball glove
36: skateboard
37: surfboard
38: tennis racket
39: bottle
40: wine glass
41: cup
42: fork
43: knife
44: spoon
45: bowl
46: banana
47: apple
48: sandwich
49: orange
50: broccoli
51: carrot
52: hot dog
53: pizza
54: donut
55: cake
56: chair
57: couch
58: potted plant
59: bed
60: dining table
61: toilet
62: tv
63: laptop
64: mouse
65: remote
66: keyboard
67: cell phone
68: microwave
69: oven
70: toaster
71: sink
72: refrigerator
73: book
74: clock
75: vase
76: scissors
77: teddy bear
78: hair drier
79: toothbrush

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,73 @@
[
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"stop sign",
"bench",
"bird",
"cat",
"dog",
"horse",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"cell phone",
"microwave",
"oven",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"toothbrush"
]

View File

@@ -0,0 +1,11 @@
{
"version": "0.0.0-n1",
"region": "Coco",
"size": 640,
"epochs": 1000,
"name": "coco128 test",
"yolo-version": "8.1.20 docker",
"date": "2024-03-12",
"export": "coco128.yaml"
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,92 @@
description: Ultralytics best model trained on /usr/src/ultralytics/ultralytics/cfg/datasets/coco128.yaml
author: Ultralytics
license: AGPL-3.0 https://ultralytics.com/license
date: '2024-03-12T16:25:00.089873'
version: 8.1.20
stride: 32
task: detect
batch: 1
imgsz:
- 640
- 640
names:
0: person
1: bicycle
2: car
3: motorcycle
4: airplane
5: bus
6: train
7: truck
8: boat
9: traffic light
10: fire hydrant
11: stop sign
12: parking meter
13: bench
14: bird
15: cat
16: dog
17: horse
18: sheep
19: cow
20: elephant
21: bear
22: zebra
23: giraffe
24: backpack
25: umbrella
26: handbag
27: tie
28: suitcase
29: frisbee
30: skis
31: snowboard
32: sports ball
33: kite
34: baseball bat
35: baseball glove
36: skateboard
37: surfboard
38: tennis racket
39: bottle
40: wine glass
41: cup
42: fork
43: knife
44: spoon
45: bowl
46: banana
47: apple
48: sandwich
49: orange
50: broccoli
51: carrot
52: hot dog
53: pizza
54: donut
55: cake
56: chair
57: couch
58: potted plant
59: bed
60: dining table
61: toilet
62: tv
63: laptop
64: mouse
65: remote
66: keyboard
67: cell phone
68: microwave
69: oven
70: toaster
71: sink
72: refrigerator
73: book
74: clock
75: vase
76: scissors
77: teddy bear
78: hair drier
79: toothbrush

File diff suppressed because one or more lines are too long

View File

@@ -5,7 +5,6 @@ export default {
const devicesList = await navigator.mediaDevices.enumerateDevices() const devicesList = await navigator.mediaDevices.enumerateDevices()
this.videoDeviceAvailable = devicesList.some( d => d.kind == "videoinput") this.videoDeviceAvailable = devicesList.some( d => d.kind == "videoinput")
if (this.videoDeviceAvailable) { if (this.videoDeviceAvailable) {
navigator.mediaDevices.getUserMedia({video: true})
var vidConstraint = { var vidConstraint = {
video: { video: {
width: { width: {
@@ -37,9 +36,6 @@ export default {
const tempCtx = tempCVS.getContext('2d') const tempCtx = tempCVS.getContext('2d')
tempCtx.drawImage(vidViewer, 0, 0) tempCtx.drawImage(vidViewer, 0, 0)
this.getImage(tempCVS.toDataURL()) this.getImage(tempCVS.toDataURL())
},
async videoStream() {
//TODO
} }
} }
} }

View File

@@ -6,12 +6,12 @@
</f7-navbar> </f7-navbar>
<f7-block class="detect-grid"> <f7-block class="detect-grid">
<div class="image-container" ref="image_container"> <div class="image-container" ref="image_container">
<canvas id="im-draw" ref="image_cvs" @click="structureClick" :style="`display: ${imageLoaded ? 'block' : 'none'}; flex: 1 1 0%; max-width: 100%; max-height: 100%; min-width: 0; min-height: 0; background-size: contain; background-position: center; background-repeat: no-repeat`" />
<SvgIcon v-if="!imageView && !videoAvailable" :icon="f7route.params.region" fill-color="var(--avn-theme-color)" @click="selectImage" /> <SvgIcon v-if="!imageView && !videoAvailable" :icon="f7route.params.region" fill-color="var(--avn-theme-color)" @click="selectImage" />
<div class="vid-container" v-if="videoAvailable" style="width: 100%; height: 100%"> <div class="vid-container" :style="`display: ${videoAvailable ? 'block' : 'none'}; position: absolute; width: 100%; height: 100%;`">
<video id="vid-view" ref="vid_viewer" :srcObject="cameraStream" :autoPlay="true" style="width: 100%; height: 100%"></video> <video id="vid-view" ref="vid_viewer" :srcObject="cameraStream" :autoPlay="true" style="width: 100%; height: 100%"></video>
<f7-button @click="captureVidFrame()" style="position: absolute; bottom: 32px; left: 50%; transform: translateX(-50%);" fill large>Capture</f7-button> <f7-button @click="captureVidFrame()" style="position: absolute; bottom: 32px; left: 50%; transform: translateX(-50%); z-index: 3;" fill large>Capture</f7-button>
</div> </div>
<canvas id="im-draw" ref="image_cvs" @click="structureClick" :style="`display: ${(imageLoaded || videoAvailable) ? 'block' : 'none'}; flex: 1 1 0%; max-width: 100%; max-height: 100%; min-width: 0; min-height: 0; background-size: contain; background-position: center; background-repeat: no-repeat; z-index: 2;`" />
</div> </div>
<div class="chip-results" style="grid-area: result-view; flex: 0 0 auto; align-self: center;"> <div class="chip-results" style="grid-area: result-view; flex: 0 0 auto; align-self: center;">
<f7-chip v-for="result in showResults.filter( r => { return r.aboveThreshold && r.isSearched && !r.isDeleted })" <f7-chip v-for="result in showResults.filter( r => { return r.aboveThreshold && r.isSearched && !r.isDeleted })"
@@ -56,6 +56,7 @@
</f7-button> </f7-button>
</f7-segmented> </f7-segmented>
<input type="file" ref="image_chooser" @change="getImage()" accept="image/*" style="display: none;"/> <input type="file" ref="image_chooser" @change="getImage()" accept="image/*" style="display: none;"/>
<img src="../assets/target.svg" ref="target_image" style="display: none;" />
</f7-block> </f7-block>
<f7-panel :id="detectorName + '-settings'" right cover :backdrop="false" :container-el="`#${detectorName}-detect-page`"> <f7-panel :id="detectorName + '-settings'" right cover :backdrop="false" :container-el="`#${detectorName}-detect-page`">
@@ -147,7 +148,9 @@
uploadUid: null, uploadUid: null,
uploadDirty: false, uploadDirty: false,
modelLocation: '', modelLocation: '',
miniLocation: '',
modelLoading: true, modelLoading: true,
reloadModel: false,
videoDeviceAvailable: false, videoDeviceAvailable: false,
videoAvailable: false, videoAvailable: false,
cameraStream: null cameraStream: null
@@ -163,17 +166,23 @@
case 'thorax': case 'thorax':
this.activeRegion = 0 this.activeRegion = 0
this.detectorName = 'thorax' this.detectorName = 'thorax'
//this.classesList = thoraxClasses
/* VITE setting */ /* VITE setting */
this.modelLocation = `../models/thorax${this.otherSettings.mini ? '-mini' : ''}/model.json` this.modelLocation = `../models/thorax${this.otherSettings.mini ? '-mini' : ''}/model.json`
this.miniLocation = `../models/thorax-mini/model.json`
/* PWA Build setting */ /* PWA Build setting */
//this.modelLocation = `./models/thorax${this.otherSettings.mini ? '-mini' : ''}/model.json` //this.modelLocation = `./models/thorax${this.otherSettings.mini ? '-mini' : ''}/model.json`
this.modelLocationCordova = `https://localhost/models/thorax${this.otherSettings.mini ? '-mini' : ''}/model.json` this.modelLocationCordova = `https://localhost/models/thorax${this.otherSettings.mini ? '-mini' : ''}/model.json`
break; break;
case 'abdomen': case 'abdomen':
this.activeRegion = 1 this.activeRegion = 1
this.detectorName = 'combined' this.detectorName = 'abdomen'
break; /* VITE setting */
this.modelLocation = `../models/abdomen${this.otherSettings.mini ? '-mini' : ''}/model.json`
this.miniLocation = `../models/abdomen-mini/model.json`
/* PWA Build setting */
//this.modelLocation = `./models/abdomen${this.otherSettings.mini ? '-mini' : ''}/model.json`
this.modelLocationCordova = `https://localhost/models/abdomen${this.otherSettings.mini ? '-mini' : ''}/model.json`
break;
case 'limbs': case 'limbs':
this.activeRegion = 2 this.activeRegion = 2
this.detectorName = 'defaultNew' this.detectorName = 'defaultNew'
@@ -195,7 +204,7 @@
this.modelLoading = false this.modelLoading = false
} else { } else {
this.modelLoading = true this.modelLoading = true
this.loadModel(this.isCordova ? this.modelLocationCordova : this.modelLocation).then(() => { this.loadModel(this.isCordova ? this.modelLocationCordova : this.modelLocation, true).then(() => {
this.modelLoading = false this.modelLoading = false
}).catch((e) => { }).catch((e) => {
console.log(e.message) console.log(e.message)
@@ -253,7 +262,11 @@
chipGradient (confVal) { chipGradient (confVal) {
return `--chip-media-background: hsl(${confVal / 100 * 120}deg 100% 50%)` return `--chip-media-background: hsl(${confVal / 100 * 120}deg 100% 50%)`
}, },
setData () { async setData () {
if (this.reloadModel) {
await this.loadModel(this.isCordova ? this.modelLocationCordova : this.modelLocation)
this.reloadModel = false
}
if (this.serverSettings && this.serverSettings.use) { if (this.serverSettings && this.serverSettings.use) {
this.remoteDetect() this.remoteDetect()
} else { } else {
@@ -284,7 +297,20 @@
} }
if (mode == "camera") { if (mode == "camera") {
this.videoAvailable = await this.openCamera(this.$refs.image_container) this.videoAvailable = await this.openCamera(this.$refs.image_container)
if (this.videoAvailable) { return } if (this.videoAvailable) {
this.imageLoaded = false
this.imageView = null
this.$refs.image_cvs.style['background-image'] = 'none'
this.resultData = {}
var trackDetails = this.cameraStream.getVideoTracks()[0].getSettings()
var vidElement = this.$refs.vid_viewer
vidElement.width = trackDetails.width
vidElement.height = trackDetails.height
if (!this.otherSettings.disableVideo) {
this.videoFrameDetect(vidElement)
}
return
}
} }
if (mode == 'sample') { if (mode == 'sample') {
f7.dialog.create({ f7.dialog.create({
@@ -350,6 +376,7 @@
if (this.videoAvailable) { if (this.videoAvailable) {
this.closeCamera() this.closeCamera()
this.detecting = true this.detecting = true
this.reloadModel = true
resolve(searchImage) resolve(searchImage)
} else if (this.isCordova && this.imageLoadMode == "camera") { } else if (this.isCordova && this.imageLoadMode == "camera") {
this.detecting = true this.detecting = true
@@ -421,7 +448,7 @@
this.selectChip(findBox >= 0 ? this.resultData.detections[findBox].resultIndex : this.selectedChip) this.selectChip(findBox >= 0 ? this.resultData.detections[findBox].resultIndex : this.selectedChip)
}, },
box2cvs(boxInput) { box2cvs(boxInput) {
if (!boxInput) return [] if (!boxInput || boxInput.length == 0) return []
const boxList = boxInput.length ? boxInput : [boxInput] const boxList = boxInput.length ? boxInput : [boxInput]
const [imCanvas, imageCtx] = this.resetView() const [imCanvas, imageCtx] = this.resetView()
var imgWidth var imgWidth

View File

@@ -5,11 +5,23 @@ var model = null
export default { export default {
methods: { methods: {
async loadModel(weights) { async loadModel(weights, preload) {
if (model && model.modelURL == weights) {
return model
} else if (model) {
model.dispose()
}
model = await tf.loadGraphModel(weights) model = await tf.loadGraphModel(weights)
const [modelWidth, modelHeight] = model.inputs[0].shape.slice(1, 3) const [modelWidth, modelHeight] = model.inputs[0].shape.slice(1, 3)
const dummyT = tf.ones([1,modelWidth,modelHeight,3]) /*****************
model.predict(dummyT) //Run model once to preload weights for better response time * If preloading then run model
* once on fake data to preload
* weights for a faster response
*****************/
if (preload) {
const dummyT = tf.ones([1,modelWidth,modelHeight,3])
model.predict(dummyT)
}
return model return model
}, },
async localDetect(imageData) { async localDetect(imageData) {
@@ -150,7 +162,60 @@ export default {
remoteTimeout () { remoteTimeout () {
this.detecting = false this.detecting = false
f7.dialog.alert('No connection to remote ALVINN instance. Please check app settings.') f7.dialog.alert('No connection to remote ALVINN instance. Please check app settings.')
} },
async videoFrameDetect (vidData) {
await this.loadModel(this.miniLocation)
const [modelWidth, modelHeight] = model.inputs[0].shape.slice(1, 3)
const imCanvas = this.$refs.image_cvs
const imageCtx = imCanvas.getContext("2d")
const target = this.$refs.target_image
await tf.nextFrame();
imCanvas.width = imCanvas.clientWidth
imCanvas.height = imCanvas.clientHeight
imageCtx.clearRect(0,0,imCanvas.width,imCanvas.height)
var imgWidth
var imgHeight
const imgAspect = vidData.clientWidth / vidData.clientHeight
const rendAspect = imCanvas.width / imCanvas.height
if (imgAspect >= rendAspect) {
imgWidth = imCanvas.width
imgHeight = imCanvas.width / imgAspect
} else {
imgWidth = imCanvas.height * imgAspect
imgHeight = imCanvas.height
}
while (this.videoAvailable) {
console.time('frame-process')
try {
const input = tf.tidy(() => {
return tf.image.resizeBilinear(tf.browser.fromPixels(vidData), [modelWidth, modelHeight]).div(255.0).expandDims(0)
})
const res = model.predict(input)
const rawRes = tf.transpose(res,[0,2,1]).arraySync()[0]
let rawCoords = []
if (rawRes) {
for (var i = 0; i < rawRes.length; i++) {
var getScores = rawRes[i].slice(4)
if (getScores.some( s => s > .5)) {
rawCoords.push(rawRes[i].slice(0,2))
}
}
imageCtx.clearRect(0,0,imCanvas.width,imCanvas.height)
for (var coord of rawCoords) {
console.log(`x: ${coord[0]}, y: ${coord[1]}`)
let pointX = (imCanvas.width - imgWidth) / 2 + (coord[0] / modelWidth) * imgWidth -5
let pointY = (imCanvas.height - imgHeight) / 2 + (coord[1] / modelHeight) * imgHeight -5
imageCtx.drawImage(target, pointX, pointY, 20, 20)
}
}
} catch (e) {
console.log(e)
}
console.timeEnd('frame-process')
await tf.nextFrame();
}
}
} }
} }

View File

@@ -27,6 +27,10 @@
<span style="margin-left: 16px;">Enable demo mode</span> <span style="margin-left: 16px;">Enable demo mode</span>
<f7-toggle v-model:checked="otherSettings.demo" style="margin-right: 16px;" @change="setDirty()" /> <f7-toggle v-model:checked="otherSettings.demo" style="margin-right: 16px;" @change="setDirty()" />
</div> </div>
<div style="display:flex; justify-content:space-between; width: 100%; margin-bottom: 10px;">
<span style="margin-left: 16px;">Disable video estimates<f7-icon size="16" style="padding-left: 5px;" f7="question_diamond_fill" tooltip="faster: recommended for slower devices" /></span>
<f7-toggle v-model:checked="otherSettings.disableVideo" style="margin-right: 16px;" />
</div>
<div style="display:flex; justify-content:space-between; width: 100%"> <div style="display:flex; justify-content:space-between; width: 100%">
<span style="margin-left: 16px;">Use external server</span> <span style="margin-left: 16px;">Use external server</span>
<f7-toggle v-model:checked="serverSettings.use" style="margin-right: 16px;" @change="setDirty()" /> <f7-toggle v-model:checked="serverSettings.use" style="margin-right: 16px;" @change="setDirty()" />

View File

@@ -13,7 +13,8 @@
<f7-list style="width: 100%;"> <f7-list style="width: 100%;">
<f7-list-item :class="otherSettings.mini ? 'unused-model' : ''" title="Thorax" :after="thoraxDetails.version"></f7-list-item> <f7-list-item :class="otherSettings.mini ? 'unused-model' : ''" title="Thorax" :after="thoraxDetails.version"></f7-list-item>
<f7-list-item title="Thorax-m" :after="miniThoraxDetails.version"></f7-list-item> <f7-list-item title="Thorax-m" :after="miniThoraxDetails.version"></f7-list-item>
<f7-list-item title="Abdomen/Pelvis" :after="abdomenDetails.version"></f7-list-item> <f7-list-item :class="otherSettings.mini ? 'unused-model' : ''" title="Abdomen/Pelvis" :after="abdomenDetails.version"></f7-list-item>
<f7-list-item title="Abd/Pel-m" :after="miniAbdomenDetails.version"></f7-list-item>
<f7-list-item title="Limbs" :after="limbsDetails.version"></f7-list-item> <f7-list-item title="Limbs" :after="limbsDetails.version"></f7-list-item>
<f7-list-item title="Head/Neck" :after="headneckDetails.version"></f7-list-item> <f7-list-item title="Head/Neck" :after="headneckDetails.version"></f7-list-item>
</f7-list> </f7-list>
@@ -39,7 +40,8 @@
return { return {
thoraxDetails: {}, thoraxDetails: {},
miniThoraxDetails: {}, miniThoraxDetails: {},
abdomenDetails: { "version": "N/A" }, abdomenDetails: {},
miniAbdomenDetails: {},
limbsDetails: { "version": "N/A" }, limbsDetails: { "version": "N/A" },
headneckDetails: { "version": "N/A" }, headneckDetails: { "version": "N/A" },
alvinnVersion: store().getVersion, alvinnVersion: store().getVersion,
@@ -60,7 +62,14 @@
.then((mod) => { .then((mod) => {
this.miniThoraxDetails = mod.default this.miniThoraxDetails = mod.default
}) })
import('../models/abdomen/descript.json')
.then((mod) => {
this.abdomenDetails = mod.default
})
import('../models/abdomen-mini/descript.json')
.then((mod) => {
this.miniAbdomenDetails = mod.default
})
}, },
methods: { methods: {
} }