detection
This commit is contained in:
parent
c445068773
commit
f647a509d5
|
|
@ -0,0 +1,81 @@
|
||||||
|
Unknown
|
||||||
|
person
|
||||||
|
bicycle
|
||||||
|
car
|
||||||
|
motorcycle
|
||||||
|
airplane
|
||||||
|
bus
|
||||||
|
train
|
||||||
|
truck
|
||||||
|
boat
|
||||||
|
traffic light
|
||||||
|
fire hydrant
|
||||||
|
stop sign
|
||||||
|
parking meter
|
||||||
|
bench
|
||||||
|
bird
|
||||||
|
cat
|
||||||
|
dog
|
||||||
|
horse
|
||||||
|
sheep
|
||||||
|
cow
|
||||||
|
elephant
|
||||||
|
bear
|
||||||
|
zebra
|
||||||
|
giraffe
|
||||||
|
backpack
|
||||||
|
umbrella
|
||||||
|
handbag
|
||||||
|
tie
|
||||||
|
suitcase
|
||||||
|
frisbee
|
||||||
|
skis
|
||||||
|
snowboard
|
||||||
|
sports ball
|
||||||
|
kite
|
||||||
|
baseball bat
|
||||||
|
baseball glove
|
||||||
|
skateboard
|
||||||
|
surfboard
|
||||||
|
tennis racket
|
||||||
|
bottle
|
||||||
|
wine glass
|
||||||
|
cup
|
||||||
|
fork
|
||||||
|
knife
|
||||||
|
spoon
|
||||||
|
bowl
|
||||||
|
banana
|
||||||
|
apple
|
||||||
|
sandwich
|
||||||
|
orange
|
||||||
|
broccoli
|
||||||
|
carrot
|
||||||
|
hot dog
|
||||||
|
pizza
|
||||||
|
donut
|
||||||
|
cake
|
||||||
|
chair
|
||||||
|
couch
|
||||||
|
potted plant
|
||||||
|
bed
|
||||||
|
dining table
|
||||||
|
toilet
|
||||||
|
tv
|
||||||
|
laptop
|
||||||
|
mouse
|
||||||
|
remote
|
||||||
|
keyboard
|
||||||
|
cell phone
|
||||||
|
microwave
|
||||||
|
oven
|
||||||
|
toaster
|
||||||
|
sink
|
||||||
|
refrigerator
|
||||||
|
book
|
||||||
|
clock
|
||||||
|
vase
|
||||||
|
scissors
|
||||||
|
teddy bear
|
||||||
|
hair drier
|
||||||
|
toothbrush
|
||||||
|
|
@ -1,116 +1,94 @@
|
||||||
package com.example.livingai.data.ml
|
package com.example.livingai.data.ml
|
||||||
|
|
||||||
|
import android.content.Context
|
||||||
import android.graphics.Bitmap
|
import android.graphics.Bitmap
|
||||||
import android.graphics.Color
|
|
||||||
import android.graphics.Rect
|
import android.graphics.Rect
|
||||||
import com.example.livingai.domain.ml.AIModel
|
import com.example.livingai.domain.ml.AIModel
|
||||||
import com.google.mlkit.vision.common.InputImage
|
import org.tensorflow.lite.Interpreter
|
||||||
import com.google.mlkit.vision.segmentation.subject.SubjectSegmentation
|
import org.tensorflow.lite.support.common.FileUtil
|
||||||
import com.google.mlkit.vision.segmentation.subject.SubjectSegmenterOptions
|
import org.tensorflow.lite.support.image.ImageProcessor
|
||||||
import kotlinx.coroutines.suspendCancellableCoroutine
|
import org.tensorflow.lite.support.image.TensorImage
|
||||||
import kotlin.coroutines.resume
|
import org.tensorflow.lite.support.image.ops.ResizeOp
|
||||||
import kotlin.coroutines.resumeWithException
|
import java.nio.ByteBuffer
|
||||||
|
import java.nio.ByteOrder
|
||||||
|
|
||||||
private const val MASK_COLOR = 0x5500FF00 // semi-transparent green overlay
|
class AIModelImpl(private val context: Context) : AIModel {
|
||||||
|
|
||||||
class AIModelImpl : AIModel {
|
private val objectDetector: Interpreter
|
||||||
|
private val labels: List<String>
|
||||||
|
|
||||||
private val segmenter by lazy {
|
init {
|
||||||
val options = SubjectSegmenterOptions.Builder()
|
// Load the TFLite model from assets
|
||||||
.enableForegroundBitmap()
|
val modelBuffer = FileUtil.loadMappedFile(context, "efficientdet-lite0.tflite")
|
||||||
|
val options = Interpreter.Options().apply { numThreads = 4 }
|
||||||
|
objectDetector = Interpreter(modelBuffer, options)
|
||||||
|
|
||||||
|
// Load labels from assets
|
||||||
|
labels = try {
|
||||||
|
FileUtil.loadLabels(context, "labels.txt")
|
||||||
|
} catch (e: Exception) {
|
||||||
|
e.printStackTrace()
|
||||||
|
emptyList()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult? {
|
||||||
|
// Preprocess the image
|
||||||
|
val imageProcessor = ImageProcessor.Builder()
|
||||||
|
.add(ResizeOp(320, 320, ResizeOp.ResizeMethod.BILINEAR))
|
||||||
.build()
|
.build()
|
||||||
SubjectSegmentation.getClient(options)
|
|
||||||
|
var tensorImage = TensorImage.fromBitmap(bitmap)
|
||||||
|
tensorImage = imageProcessor.process(tensorImage)
|
||||||
|
|
||||||
|
// Prepare model inputs and outputs
|
||||||
|
// Based on crash: [1, 25, 4] vs [1, 10, 4]. The model outputs 25 detections, not 10.
|
||||||
|
val locations = Array(1) { Array(25) { FloatArray(4) } }
|
||||||
|
val classes = Array(1) { FloatArray(25) }
|
||||||
|
val scores = Array(1) { FloatArray(25) }
|
||||||
|
val numDetections = FloatArray(1)
|
||||||
|
|
||||||
|
val outputs = mapOf(
|
||||||
|
0 to locations,
|
||||||
|
1 to classes,
|
||||||
|
2 to scores,
|
||||||
|
3 to numDetections
|
||||||
|
)
|
||||||
|
|
||||||
|
// Run inference
|
||||||
|
objectDetector.runForMultipleInputsOutputs(arrayOf(tensorImage.buffer), outputs)
|
||||||
|
|
||||||
|
// Post-process the results
|
||||||
|
val bestDetection = scores[0].withIndex()
|
||||||
|
.maxByOrNull { it.value }
|
||||||
|
?.takeIf { it.value > 0.5f } // Confidence threshold
|
||||||
|
|
||||||
|
if (bestDetection != null) {
|
||||||
|
val index = bestDetection.index
|
||||||
|
val score = bestDetection.value
|
||||||
|
val location = locations[0][index] // [ymin, xmin, ymax, xmax]
|
||||||
|
val labelIndex = classes[0][index].toInt()
|
||||||
|
val label = labels.getOrElse(labelIndex) { "Unknown" }
|
||||||
|
|
||||||
|
// Convert normalized coordinates to absolute pixel values
|
||||||
|
val ymin = location[0] * bitmap.height
|
||||||
|
val xmin = location[1] * bitmap.width
|
||||||
|
val ymax = location[2] * bitmap.height
|
||||||
|
val xmax = location[3] * bitmap.width
|
||||||
|
|
||||||
|
val boundingBox = Rect(xmin.toInt(), ymin.toInt(), xmax.toInt(), ymax.toInt())
|
||||||
|
|
||||||
|
return ObjectDetectionResult(boundingBox, label, score)
|
||||||
|
}
|
||||||
|
|
||||||
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun deriveInference(bitmap: Bitmap): String = "Inference Result"
|
// This is no longer the primary function, but kept for interface compliance
|
||||||
|
|
||||||
override suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>? {
|
override suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>? {
|
||||||
return suspendCancellableCoroutine { cont ->
|
// Returning null as we are focusing on object detection now
|
||||||
val image = InputImage.fromBitmap(bitmap, 0)
|
return null
|
||||||
|
|
||||||
segmenter.process(image)
|
|
||||||
.addOnSuccessListener { result ->
|
|
||||||
val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null)
|
|
||||||
|
|
||||||
// Instead of coloring it here, just pass the original mask bitmap
|
|
||||||
// or ensure it's suitable for further processing.
|
|
||||||
// The foreground bitmap from MLKit is usually the object cut out with transparent background.
|
|
||||||
|
|
||||||
val booleanMask = createBooleanMask(fg)
|
|
||||||
// We return the raw foreground bitmap as the 'maskBitmap' for now,
|
|
||||||
// or a colorized version if that's what UI expects.
|
|
||||||
// But for IOU/Overlap calculation, we might want the binary info.
|
|
||||||
// The UI seems to overlay 'colorMask'.
|
|
||||||
// DistanceEstimator uses 'segMaskBitmap'.
|
|
||||||
|
|
||||||
val colorMask = createColorizedMask(fg)
|
|
||||||
val bbox = computeBoundingBox(booleanMask, fg.width, fg.height)
|
|
||||||
|
|
||||||
// Returning colorMask as the first element because UI expects a visual overlay.
|
|
||||||
// But note: DistanceEstimator might need the binary mask or the foreground.
|
|
||||||
// If DistanceEstimator treats this bitmap as a mask, colorized is fine as long as alpha is preserved.
|
|
||||||
cont.resume(Triple(colorMask, booleanMask, bbox))
|
|
||||||
}
|
|
||||||
.addOnFailureListener { e ->
|
|
||||||
cont.resumeWithException(e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun createColorizedMask(maskBitmap: Bitmap): Bitmap {
|
override fun deriveInference(bitmap: Bitmap): String = "Object Detection"
|
||||||
val w = maskBitmap.width
|
|
||||||
val h = maskBitmap.height
|
|
||||||
val pixels = IntArray(w * h)
|
|
||||||
|
|
||||||
maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h)
|
|
||||||
|
|
||||||
for (i in pixels.indices) {
|
|
||||||
// ML Kit Foreground Bitmap: Non-transparent pixels are the object.
|
|
||||||
if (Color.alpha(pixels[i]) > 0) {
|
|
||||||
pixels[i] = MASK_COLOR
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return Bitmap.createBitmap(pixels, w, h, Bitmap.Config.ARGB_8888)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun createBooleanMask(bitmap: Bitmap): BooleanArray {
|
|
||||||
val w = bitmap.width
|
|
||||||
val h = bitmap.height
|
|
||||||
val mask = BooleanArray(w * h)
|
|
||||||
val pixels = IntArray(w * h)
|
|
||||||
|
|
||||||
bitmap.getPixels(pixels, 0, w, 0, 0, w, h)
|
|
||||||
|
|
||||||
for (i in pixels.indices) {
|
|
||||||
mask[i] = Color.alpha(pixels[i]) > 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return mask
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun computeBoundingBox(mask: BooleanArray, w: Int, h: Int): Rect {
|
|
||||||
var minX = Int.MAX_VALUE
|
|
||||||
var minY = Int.MAX_VALUE
|
|
||||||
var maxX = Int.MIN_VALUE
|
|
||||||
var maxY = Int.MIN_VALUE
|
|
||||||
|
|
||||||
for (y in 0 until h) {
|
|
||||||
for (x in 0 until w) {
|
|
||||||
val idx = y * w + x
|
|
||||||
if (mask[idx]) {
|
|
||||||
if (x < minX) minX = x
|
|
||||||
if (y < minY) minY = y
|
|
||||||
if (x > maxX) maxX = x
|
|
||||||
if (y > maxY) maxY = y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return if (minX == Int.MAX_VALUE) {
|
|
||||||
Rect(0, 0, 0, 0)
|
|
||||||
} else {
|
|
||||||
Rect(minX, minY, maxX, maxY)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
package com.example.livingai.data.ml
|
||||||
|
|
||||||
|
import android.graphics.Rect
|
||||||
|
|
||||||
|
data class ObjectDetectionResult(
|
||||||
|
val boundingBox: Rect,
|
||||||
|
val label: String,
|
||||||
|
val confidence: Float
|
||||||
|
)
|
||||||
|
|
@ -4,6 +4,7 @@ import android.content.ContentValues
|
||||||
import android.content.Context
|
import android.content.Context
|
||||||
import android.graphics.Bitmap
|
import android.graphics.Bitmap
|
||||||
import android.graphics.Matrix
|
import android.graphics.Matrix
|
||||||
|
import android.graphics.Rect
|
||||||
import android.provider.MediaStore
|
import android.provider.MediaStore
|
||||||
import androidx.camera.core.ImageProxy
|
import androidx.camera.core.ImageProxy
|
||||||
import com.example.livingai.data.ml.DistanceEstimatorImpl
|
import com.example.livingai.data.ml.DistanceEstimatorImpl
|
||||||
|
|
@ -48,16 +49,69 @@ class CameraRepositoryImpl(
|
||||||
bitmap: Bitmap,
|
bitmap: Bitmap,
|
||||||
requestedOrientation: Orientation,
|
requestedOrientation: Orientation,
|
||||||
silhouetteBitmap: Bitmap,
|
silhouetteBitmap: Bitmap,
|
||||||
realObjectHeightMeters: Float?, // ★ NEW PARAM
|
realObjectHeightMeters: Float?,
|
||||||
focalLengthPixels: Float // from camera intrinsics
|
focalLengthPixels: Float,
|
||||||
|
boundingBox: Rect?
|
||||||
): OrientationState = withContext(Dispatchers.Default) {
|
): OrientationState = withContext(Dispatchers.Default) {
|
||||||
|
|
||||||
// 1. Collect segmentation
|
// 1. Collect segmentation
|
||||||
val meta = FrameMetadataProvider.collectMetadata(bitmap)
|
// Use the passed boundingBox if available, otherwise it relies on FrameMetadataProvider running segmentation again
|
||||||
val bbox = meta.segmentationBox
|
// But FrameMetadataProvider.collectMetadata runs segmentation internally.
|
||||||
val mask = meta.segmentationMaskBitmap
|
// To avoid re-running detection/segmentation if we already have bbox, we can pass it.
|
||||||
|
// However, FrameMetadataProvider currently calls getSegmentation(bitmap) which calls aiModel.segmentImage(bitmap).
|
||||||
|
// AIModel.segmentImage is returning null in current impl.
|
||||||
|
|
||||||
if (bbox == null || mask == null) {
|
// ISSUE: processFrame relies on FrameMetadataProvider.collectMetadata -> getSegmentation -> aiModel.segmentImage
|
||||||
|
// But AIModelImpl.segmentImage returns null!
|
||||||
|
// So bbox will be null, and processFrame returns early with "Segmentation missing".
|
||||||
|
|
||||||
|
// FIX: We need to use the detection result we already have from CameraViewModel.
|
||||||
|
// We will mock the segmentation result using the bounding box from object detection.
|
||||||
|
// And for the mask, since we don't have segmentation, we can either:
|
||||||
|
// a) Create a dummy mask filled within the bbox (simple box mask)
|
||||||
|
// b) Or just proceed if DistanceEstimator can handle it (it needs mask).
|
||||||
|
|
||||||
|
// Let's create a synthetic mask from the bbox.
|
||||||
|
val syntheticMeta = if (boundingBox != null) {
|
||||||
|
// Create a simple mask where pixels inside bbox are true
|
||||||
|
// This is computationally expensive to do full bitmap, so be careful.
|
||||||
|
// But we need a Bitmap mask for DistanceEstimator.
|
||||||
|
// Let's create a black bitmap with white rect.
|
||||||
|
|
||||||
|
// NOTE: This runs on Default dispatcher, so should be okay-ish.
|
||||||
|
|
||||||
|
// However, FrameMetadataProvider.collectMetadata does more (IMU, Depth).
|
||||||
|
// Let's manually construct metadata.
|
||||||
|
|
||||||
|
val maskBitmap = Bitmap.createBitmap(bitmap.width, bitmap.height, Bitmap.Config.ARGB_8888)
|
||||||
|
val canvas = android.graphics.Canvas(maskBitmap)
|
||||||
|
val paint = android.graphics.Paint().apply { color = android.graphics.Color.WHITE }
|
||||||
|
canvas.drawRect(boundingBox, paint)
|
||||||
|
|
||||||
|
val imu = FrameMetadataProvider.getIMU()
|
||||||
|
val rot = FrameMetadataProvider.getRotation()
|
||||||
|
val depth = FrameMetadataProvider.getDepthData()
|
||||||
|
|
||||||
|
FrameMetadataProvider.FrameCollectedMetadata(
|
||||||
|
segmentationMaskBitmap = maskBitmap,
|
||||||
|
segmentationBox = boundingBox,
|
||||||
|
depthMeters = depth.depthMeters,
|
||||||
|
depthWidth = depth.width,
|
||||||
|
depthHeight = depth.height,
|
||||||
|
depthConfidence = depth.confidence,
|
||||||
|
pitch = imu.pitch,
|
||||||
|
roll = imu.roll,
|
||||||
|
yaw = imu.yaw,
|
||||||
|
rotationDegrees = rot
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
FrameMetadataProvider.collectMetadata(bitmap)
|
||||||
|
}
|
||||||
|
|
||||||
|
val bbox = syntheticMeta.segmentationBox
|
||||||
|
// val mask = syntheticMeta.segmentationMaskBitmap // Mask is used inside distanceEstimator
|
||||||
|
|
||||||
|
if (bbox == null) {
|
||||||
return@withContext OrientationState(
|
return@withContext OrientationState(
|
||||||
success = false,
|
success = false,
|
||||||
reason = "Segmentation missing",
|
reason = "Segmentation missing",
|
||||||
|
|
@ -78,7 +132,7 @@ class CameraRepositoryImpl(
|
||||||
)
|
)
|
||||||
|
|
||||||
// 3. Build FrameData with relative depth only
|
// 3. Build FrameData with relative depth only
|
||||||
val frameData = meta.toFrameData(bitmap).copy(
|
val frameData = syntheticMeta.toFrameData(bitmap).copy(
|
||||||
medianDepth = midasResult?.relativeDepth
|
medianDepth = midasResult?.relativeDepth
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -111,7 +111,7 @@ val appModule = module {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ML Model
|
// ML Model
|
||||||
single<AIModel> { AIModelImpl() }
|
single<AIModel> { AIModelImpl(androidContext()) }
|
||||||
single<ObjectDetector> {
|
single<ObjectDetector> {
|
||||||
ObjectDetectorImpl(
|
ObjectDetectorImpl(
|
||||||
context = androidContext(),
|
context = androidContext(),
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,10 @@ package com.example.livingai.domain.ml
|
||||||
|
|
||||||
import android.graphics.Bitmap
|
import android.graphics.Bitmap
|
||||||
import android.graphics.Rect
|
import android.graphics.Rect
|
||||||
|
import com.example.livingai.data.ml.ObjectDetectionResult
|
||||||
|
|
||||||
interface AIModel {
|
interface AIModel {
|
||||||
fun deriveInference(bitmap: Bitmap): String
|
fun deriveInference(bitmap: Bitmap): String
|
||||||
suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>?
|
suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>?
|
||||||
|
suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult?
|
||||||
}
|
}
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package com.example.livingai.domain.repository
|
package com.example.livingai.domain.repository
|
||||||
|
|
||||||
import android.graphics.Bitmap
|
import android.graphics.Bitmap
|
||||||
|
import android.graphics.Rect
|
||||||
import androidx.camera.core.ImageProxy
|
import androidx.camera.core.ImageProxy
|
||||||
import com.example.livingai.domain.ml.Orientation
|
import com.example.livingai.domain.ml.Orientation
|
||||||
import com.example.livingai.domain.ml.OrientationState
|
import com.example.livingai.domain.ml.OrientationState
|
||||||
|
|
@ -12,7 +13,8 @@ interface CameraRepository {
|
||||||
requestedOrientation: Orientation,
|
requestedOrientation: Orientation,
|
||||||
silhouetteBitmap: Bitmap,
|
silhouetteBitmap: Bitmap,
|
||||||
realObjectHeightMeters: Float?,
|
realObjectHeightMeters: Float?,
|
||||||
focalLengthPixels: Float
|
focalLengthPixels: Float,
|
||||||
|
boundingBox: Rect? = null
|
||||||
): OrientationState
|
): OrientationState
|
||||||
suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String
|
suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,10 +7,14 @@ import androidx.camera.core.ImageProxy
|
||||||
import androidx.camera.view.LifecycleCameraController
|
import androidx.camera.view.LifecycleCameraController
|
||||||
import androidx.compose.foundation.Image
|
import androidx.compose.foundation.Image
|
||||||
import androidx.compose.foundation.background
|
import androidx.compose.foundation.background
|
||||||
|
import androidx.compose.foundation.border
|
||||||
import androidx.compose.foundation.layout.Box
|
import androidx.compose.foundation.layout.Box
|
||||||
|
import androidx.compose.foundation.layout.BoxWithConstraints
|
||||||
import androidx.compose.foundation.layout.Column
|
import androidx.compose.foundation.layout.Column
|
||||||
import androidx.compose.foundation.layout.fillMaxSize
|
import androidx.compose.foundation.layout.fillMaxSize
|
||||||
|
import androidx.compose.foundation.layout.offset
|
||||||
import androidx.compose.foundation.layout.padding
|
import androidx.compose.foundation.layout.padding
|
||||||
|
import androidx.compose.foundation.layout.size
|
||||||
import androidx.compose.foundation.shape.RoundedCornerShape
|
import androidx.compose.foundation.shape.RoundedCornerShape
|
||||||
import androidx.compose.material.icons.Icons
|
import androidx.compose.material.icons.Icons
|
||||||
import androidx.compose.material.icons.filled.Camera
|
import androidx.compose.material.icons.filled.Camera
|
||||||
|
|
@ -18,7 +22,6 @@ import androidx.compose.material3.CircularProgressIndicator
|
||||||
import androidx.compose.material3.FabPosition
|
import androidx.compose.material3.FabPosition
|
||||||
import androidx.compose.material3.FloatingActionButton
|
import androidx.compose.material3.FloatingActionButton
|
||||||
import androidx.compose.material3.Icon
|
import androidx.compose.material3.Icon
|
||||||
import androidx.compose.material3.MaterialTheme
|
|
||||||
import androidx.compose.material3.Scaffold
|
import androidx.compose.material3.Scaffold
|
||||||
import androidx.compose.material3.Text
|
import androidx.compose.material3.Text
|
||||||
import androidx.compose.runtime.Composable
|
import androidx.compose.runtime.Composable
|
||||||
|
|
@ -32,6 +35,7 @@ import androidx.compose.ui.graphics.Color
|
||||||
import androidx.compose.ui.graphics.asImageBitmap
|
import androidx.compose.ui.graphics.asImageBitmap
|
||||||
import androidx.compose.ui.layout.ContentScale
|
import androidx.compose.ui.layout.ContentScale
|
||||||
import androidx.compose.ui.platform.LocalContext
|
import androidx.compose.ui.platform.LocalContext
|
||||||
|
import androidx.compose.ui.platform.LocalDensity
|
||||||
import androidx.compose.ui.unit.dp
|
import androidx.compose.ui.unit.dp
|
||||||
import androidx.navigation.NavController
|
import androidx.navigation.NavController
|
||||||
import androidx.core.content.ContextCompat
|
import androidx.core.content.ContextCompat
|
||||||
|
|
@ -67,6 +71,7 @@ fun CameraScreen(
|
||||||
PermissionWrapper {
|
PermissionWrapper {
|
||||||
val state by viewModel.state.collectAsState()
|
val state by viewModel.state.collectAsState()
|
||||||
val context = LocalContext.current
|
val context = LocalContext.current
|
||||||
|
val density = LocalDensity.current
|
||||||
|
|
||||||
val controller = remember {
|
val controller = remember {
|
||||||
LifecycleCameraController(context).apply {
|
LifecycleCameraController(context).apply {
|
||||||
|
|
@ -83,31 +88,14 @@ fun CameraScreen(
|
||||||
viewModel.onEvent(CameraEvent.ImageCaptured(image))
|
viewModel.onEvent(CameraEvent.ImageCaptured(image))
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun onError(exception: ImageCaptureException) {
|
override fun onError(exception: ImageCaptureException) {}
|
||||||
// Handle error, e.g., log it or show a message
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
LaunchedEffect(state.shouldAutoCapture) {
|
|
||||||
if (state.shouldAutoCapture) {
|
|
||||||
takePhoto()
|
|
||||||
viewModel.onEvent(CameraEvent.AutoCaptureTriggered)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
LaunchedEffect(state.capturedImageUri) {
|
LaunchedEffect(state.capturedImageUri) {
|
||||||
state.capturedImageUri?.let {
|
state.capturedImageUri?.let {
|
||||||
navController.navigate(
|
navController.navigate(Route.ViewImageScreen(it.toString(), true, orientation, true, false, animalId))
|
||||||
Route.ViewImageScreen(
|
|
||||||
imageUri = it.toString(),
|
|
||||||
shouldAllowRetake = true,
|
|
||||||
showAccept = true,
|
|
||||||
orientation = orientation,
|
|
||||||
animalId = animalId
|
|
||||||
)
|
|
||||||
)
|
|
||||||
viewModel.onEvent(CameraEvent.ClearCapturedImage)
|
viewModel.onEvent(CameraEvent.ClearCapturedImage)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -120,71 +108,95 @@ fun CameraScreen(
|
||||||
},
|
},
|
||||||
floatingActionButtonPosition = FabPosition.Center
|
floatingActionButtonPosition = FabPosition.Center
|
||||||
) { paddingValues ->
|
) { paddingValues ->
|
||||||
Box(
|
BoxWithConstraints(modifier = Modifier.fillMaxSize().padding(paddingValues)) {
|
||||||
modifier = Modifier.fillMaxSize(),
|
val screenWidth = maxWidth
|
||||||
) {
|
val screenHeight = maxHeight
|
||||||
Box(
|
|
||||||
modifier = Modifier.fillMaxSize()
|
CameraPreview(
|
||||||
) {
|
modifier = Modifier.fillMaxSize(),
|
||||||
CameraPreview(
|
controller = controller,
|
||||||
modifier = Modifier.fillMaxSize(),
|
onFrame = { bitmap, rotation, fxPixels ->
|
||||||
controller = controller,
|
viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
|
||||||
onFrame = { bitmap, rotation, fxPixels ->
|
}
|
||||||
viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
|
)
|
||||||
}
|
|
||||||
|
state.detectionResult?.let { detection ->
|
||||||
|
val imageWidth = state.imageWidth.toFloat()
|
||||||
|
val imageHeight = state.imageHeight.toFloat()
|
||||||
|
if (imageWidth == 0f || imageHeight == 0f) return@let
|
||||||
|
|
||||||
|
val screenW = with(density) { screenWidth.toPx() }
|
||||||
|
val screenH = with(density) { screenHeight.toPx() }
|
||||||
|
|
||||||
|
val scaleX = screenW / imageWidth
|
||||||
|
val scaleY = screenH / imageHeight
|
||||||
|
val scale = maxOf(scaleX, scaleY) // For FILL_CENTER behavior
|
||||||
|
|
||||||
|
val offsetX = (screenW - imageWidth * scale) / 2f
|
||||||
|
val offsetY = (screenH - imageHeight * scale) / 2f
|
||||||
|
|
||||||
|
val bbox = detection.boundingBox
|
||||||
|
val left = bbox.left * scale + offsetX
|
||||||
|
val top = bbox.top * scale + offsetY
|
||||||
|
|
||||||
|
val leftDp = with(density) { left.toDp() }
|
||||||
|
val topDp = with(density) { top.toDp() }
|
||||||
|
val widthDp = with(density) { (bbox.width() * scale).toDp() }
|
||||||
|
val heightDp = with(density) { (bbox.height() * scale).toDp() }
|
||||||
|
|
||||||
|
Box(
|
||||||
|
modifier = Modifier
|
||||||
|
.offset(x = leftDp, y = topDp)
|
||||||
|
.size(width = widthDp, height = heightDp)
|
||||||
|
.border(2.dp, Color.Yellow)
|
||||||
)
|
)
|
||||||
|
|
||||||
// The ML segmentation mask
|
// Overlay 1: Object Label & Confidence (Above the box)
|
||||||
state.segmentationMask?.let { mask ->
|
Column(
|
||||||
Image(
|
modifier = Modifier
|
||||||
bitmap = mask.asImageBitmap(),
|
.offset(x = leftDp, y = topDp - 25.dp)
|
||||||
contentDescription = "Segmentation Overlay",
|
.background(Color.Black.copy(alpha = 0.7f))
|
||||||
modifier = Modifier.fillMaxSize(),
|
.padding(4.dp)
|
||||||
contentScale = ContentScale.FillBounds,
|
) {
|
||||||
alpha = 0.5f
|
Text(
|
||||||
)
|
text = "${detection.label} (${(detection.confidence * 100).toInt()}%)",
|
||||||
}
|
color = Color.White
|
||||||
|
|
||||||
state.silhouetteMask?.let {
|
|
||||||
Image(
|
|
||||||
bitmap = it.asImageBitmap(),
|
|
||||||
contentDescription = "Silhouette Overlay",
|
|
||||||
modifier = Modifier.fillMaxSize(),
|
|
||||||
contentScale = ContentScale.Fit,
|
|
||||||
alpha = 0.4f
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Debug Overlay
|
// Overlay 2: Fixed top-right corner info
|
||||||
state.orientationState?.let { orient ->
|
state.orientationState?.let { orient ->
|
||||||
Box(
|
Column(
|
||||||
modifier = Modifier
|
modifier = Modifier
|
||||||
.align(Alignment.TopEnd)
|
.align(Alignment.TopEnd)
|
||||||
.padding(16.dp)
|
.padding(16.dp)
|
||||||
.background(Color.Black.copy(alpha = 0.5f), RoundedCornerShape(8.dp))
|
.background(Color.Black.copy(alpha = 0.7f), shape = RoundedCornerShape(8.dp))
|
||||||
.padding(8.dp)
|
.padding(8.dp)
|
||||||
) {
|
) {
|
||||||
Column {
|
if (orient.relativeDepth != null) {
|
||||||
Text("Success: ${orient.success}", color = Color.White)
|
Text(
|
||||||
Text("Reason: ${orient.reason}", color = Color.White)
|
text = "Rel Depth: %.2f".format(orient.relativeDepth),
|
||||||
|
color = Color.Cyan
|
||||||
orient.pixelMetrics?.let { pm ->
|
)
|
||||||
Text("Width (px): ${pm.widthPx}", color = Color.White)
|
}
|
||||||
Text("Height (px): ${pm.heightPx}", color = Color.White)
|
if (orient.absoluteDistanceMeters != null) {
|
||||||
}
|
Text(
|
||||||
|
text = "Dist: %.2fm".format(orient.absoluteDistanceMeters),
|
||||||
// Display depth metrics from OrientationState
|
color = Color.Green
|
||||||
orient.relativeDepth?.let { rel ->
|
)
|
||||||
Text("Rel Depth: %.4f".format(rel), color = Color.White)
|
}
|
||||||
}
|
if (orient.iouScore != null) {
|
||||||
|
Text(
|
||||||
orient.absoluteDistanceMeters?.let { abs ->
|
text = "IoU: %.2f".format(orient.iouScore),
|
||||||
Text("Dist (m): %.2f".format(abs), color = Color.White)
|
color = Color.Yellow
|
||||||
}
|
)
|
||||||
|
}
|
||||||
Text("IOU: ${orient.iouScore}", color = Color.White)
|
orient.pixelMetrics?.let { metrics ->
|
||||||
Text("Matched: ${orient.orientationMatched}", color = Color.White)
|
Text(
|
||||||
|
text = "W: ${metrics.widthPx}px H: ${metrics.heightPx}px",
|
||||||
|
color = Color.White
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import android.net.Uri
|
||||||
import androidx.camera.core.ImageProxy
|
import androidx.camera.core.ImageProxy
|
||||||
import androidx.lifecycle.ViewModel
|
import androidx.lifecycle.ViewModel
|
||||||
import androidx.lifecycle.viewModelScope
|
import androidx.lifecycle.viewModelScope
|
||||||
|
import com.example.livingai.data.ml.ObjectDetectionResult
|
||||||
import com.example.livingai.domain.ml.AIModel
|
import com.example.livingai.domain.ml.AIModel
|
||||||
import com.example.livingai.domain.ml.Orientation
|
import com.example.livingai.domain.ml.Orientation
|
||||||
import com.example.livingai.domain.ml.OrientationState
|
import com.example.livingai.domain.ml.OrientationState
|
||||||
|
|
@ -72,7 +73,8 @@ class CameraViewModel(
|
||||||
private fun clearCaptured() {
|
private fun clearCaptured() {
|
||||||
_state.value = _state.value.copy(
|
_state.value = _state.value.copy(
|
||||||
capturedImageUri = null,
|
capturedImageUri = null,
|
||||||
segmentationMask = null
|
segmentationMask = null,
|
||||||
|
detectionResult = null // Clear detection result as well
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -89,80 +91,49 @@ class CameraViewModel(
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) {
|
private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) {
|
||||||
if (_state.value.isCapturing || _state.value.shouldAutoCapture) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isProcessingFrame.compareAndSet(false, true)) {
|
if (isProcessingFrame.compareAndSet(false, true)) {
|
||||||
viewModelScope.launch {
|
viewModelScope.launch {
|
||||||
try {
|
try {
|
||||||
val currentOrientationStr = _state.value.orientation
|
// Rotate bitmap to be upright before processing
|
||||||
val silhouette = _state.value.savedMaskBitmap
|
val rotatedBitmap = if (rotationDegrees != 0) {
|
||||||
|
val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) }
|
||||||
val orientationState = if (currentOrientationStr != null && silhouette != null) {
|
Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
|
||||||
val orientationEnum = mapStringToOrientation(currentOrientationStr)
|
|
||||||
|
|
||||||
cameraRepository.processFrame(
|
|
||||||
bitmap,
|
|
||||||
orientationEnum,
|
|
||||||
silhouette,
|
|
||||||
1.55f,
|
|
||||||
focalLengthPixels
|
|
||||||
)
|
|
||||||
} else {
|
} else {
|
||||||
null
|
bitmap
|
||||||
}
|
}
|
||||||
|
|
||||||
val result = aiModel.segmentImage(bitmap)
|
// Perform Object Detection
|
||||||
if (result != null) {
|
val detectionResult = aiModel.detectObject(rotatedBitmap)
|
||||||
val (maskBitmap, _) = result
|
|
||||||
|
|
||||||
val rotatedMask = if (rotationDegrees != 0) {
|
var orientationState: OrientationState? = null
|
||||||
val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) }
|
val requestedOrientationStr = _state.value.orientation
|
||||||
Bitmap.createBitmap(
|
|
||||||
maskBitmap,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
maskBitmap.width,
|
|
||||||
maskBitmap.height,
|
|
||||||
matrix,
|
|
||||||
true
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
maskBitmap
|
|
||||||
}
|
|
||||||
|
|
||||||
val output = if(_state.value.orientation == "front" || _state.value.orientation == "back")
|
if (requestedOrientationStr != null && detectionResult != null) {
|
||||||
fitImageToCrop(rotatedMask, screenDims.screenWidth, screenDims.screenHeight)
|
// We need a silhouette bitmap for processFrame. If not available, we can pass a dummy or handle inside.
|
||||||
else
|
// But for now, let's use the one we loaded in setContext
|
||||||
fitImageToCrop(rotatedMask, screenDims.screenHeight, screenDims.screenWidth)
|
val silhouette = _state.value.silhouetteMask
|
||||||
|
|
||||||
_state.value = _state.value.copy(
|
if (silhouette != null) {
|
||||||
segmentationMask = output,
|
orientationState = cameraRepository.processFrame(
|
||||||
orientationState = orientationState
|
bitmap = rotatedBitmap,
|
||||||
)
|
requestedOrientation = mapStringToOrientation(requestedOrientationStr),
|
||||||
|
silhouetteBitmap = silhouette,
|
||||||
if (_state.value.isAutoCaptureEnabled &&
|
realObjectHeightMeters = null, // Or some default
|
||||||
_state.value.savedMaskBitmap != null &&
|
focalLengthPixels = focalLengthPixels,
|
||||||
output != null
|
boundingBox = detectionResult.boundingBox // Pass the bbox we just found
|
||||||
) {
|
)
|
||||||
val isValidCapture = calculateDistance(
|
}
|
||||||
_state.value.distanceMethod,
|
|
||||||
_state.value.savedMaskBitmap!!,
|
|
||||||
output,
|
|
||||||
_state.value.matchThreshold
|
|
||||||
)
|
|
||||||
|
|
||||||
if (isValidCapture) {
|
|
||||||
_state.value = _state.value.copy(shouldAutoCapture = true)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
_state.value = _state.value.copy(
|
|
||||||
segmentationMask = null,
|
|
||||||
orientationState = orientationState
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_state.value = _state.value.copy(
|
||||||
|
detectionResult = detectionResult,
|
||||||
|
orientationState = orientationState, // Update state
|
||||||
|
imageWidth = rotatedBitmap.width,
|
||||||
|
imageHeight = rotatedBitmap.height
|
||||||
|
)
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
e.printStackTrace()
|
||||||
} finally {
|
} finally {
|
||||||
isProcessingFrame.set(false)
|
isProcessingFrame.set(false)
|
||||||
}
|
}
|
||||||
|
|
@ -195,7 +166,10 @@ data class CameraUiState(
|
||||||
val matchThreshold: Int = 50,
|
val matchThreshold: Int = 50,
|
||||||
val distanceMethod: String = "Jaccard",
|
val distanceMethod: String = "Jaccard",
|
||||||
val shouldAutoCapture: Boolean = false,
|
val shouldAutoCapture: Boolean = false,
|
||||||
val orientationState: OrientationState? = null
|
val orientationState: OrientationState? = null,
|
||||||
|
val detectionResult: ObjectDetectionResult? = null,
|
||||||
|
val imageWidth: Int = 0,
|
||||||
|
val imageHeight: Int = 0
|
||||||
)
|
)
|
||||||
|
|
||||||
sealed class CameraEvent {
|
sealed class CameraEvent {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue