detection
This commit is contained in:
parent
c445068773
commit
f647a509d5
|
|
@ -0,0 +1,81 @@
|
|||
Unknown
|
||||
person
|
||||
bicycle
|
||||
car
|
||||
motorcycle
|
||||
airplane
|
||||
bus
|
||||
train
|
||||
truck
|
||||
boat
|
||||
traffic light
|
||||
fire hydrant
|
||||
stop sign
|
||||
parking meter
|
||||
bench
|
||||
bird
|
||||
cat
|
||||
dog
|
||||
horse
|
||||
sheep
|
||||
cow
|
||||
elephant
|
||||
bear
|
||||
zebra
|
||||
giraffe
|
||||
backpack
|
||||
umbrella
|
||||
handbag
|
||||
tie
|
||||
suitcase
|
||||
frisbee
|
||||
skis
|
||||
snowboard
|
||||
sports ball
|
||||
kite
|
||||
baseball bat
|
||||
baseball glove
|
||||
skateboard
|
||||
surfboard
|
||||
tennis racket
|
||||
bottle
|
||||
wine glass
|
||||
cup
|
||||
fork
|
||||
knife
|
||||
spoon
|
||||
bowl
|
||||
banana
|
||||
apple
|
||||
sandwich
|
||||
orange
|
||||
broccoli
|
||||
carrot
|
||||
hot dog
|
||||
pizza
|
||||
donut
|
||||
cake
|
||||
chair
|
||||
couch
|
||||
potted plant
|
||||
bed
|
||||
dining table
|
||||
toilet
|
||||
tv
|
||||
laptop
|
||||
mouse
|
||||
remote
|
||||
keyboard
|
||||
cell phone
|
||||
microwave
|
||||
oven
|
||||
toaster
|
||||
sink
|
||||
refrigerator
|
||||
book
|
||||
clock
|
||||
vase
|
||||
scissors
|
||||
teddy bear
|
||||
hair drier
|
||||
toothbrush
|
||||
|
|
@ -1,116 +1,94 @@
|
|||
package com.example.livingai.data.ml
|
||||
|
||||
import android.content.Context
|
||||
import android.graphics.Bitmap
|
||||
import android.graphics.Color
|
||||
import android.graphics.Rect
|
||||
import com.example.livingai.domain.ml.AIModel
|
||||
import com.google.mlkit.vision.common.InputImage
|
||||
import com.google.mlkit.vision.segmentation.subject.SubjectSegmentation
|
||||
import com.google.mlkit.vision.segmentation.subject.SubjectSegmenterOptions
|
||||
import kotlinx.coroutines.suspendCancellableCoroutine
|
||||
import kotlin.coroutines.resume
|
||||
import kotlin.coroutines.resumeWithException
|
||||
import org.tensorflow.lite.Interpreter
|
||||
import org.tensorflow.lite.support.common.FileUtil
|
||||
import org.tensorflow.lite.support.image.ImageProcessor
|
||||
import org.tensorflow.lite.support.image.TensorImage
|
||||
import org.tensorflow.lite.support.image.ops.ResizeOp
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.ByteOrder
|
||||
|
||||
private const val MASK_COLOR = 0x5500FF00 // semi-transparent green overlay
|
||||
class AIModelImpl(private val context: Context) : AIModel {
|
||||
|
||||
class AIModelImpl : AIModel {
|
||||
private val objectDetector: Interpreter
|
||||
private val labels: List<String>
|
||||
|
||||
private val segmenter by lazy {
|
||||
val options = SubjectSegmenterOptions.Builder()
|
||||
.enableForegroundBitmap()
|
||||
init {
|
||||
// Load the TFLite model from assets
|
||||
val modelBuffer = FileUtil.loadMappedFile(context, "efficientdet-lite0.tflite")
|
||||
val options = Interpreter.Options().apply { numThreads = 4 }
|
||||
objectDetector = Interpreter(modelBuffer, options)
|
||||
|
||||
// Load labels from assets
|
||||
labels = try {
|
||||
FileUtil.loadLabels(context, "labels.txt")
|
||||
} catch (e: Exception) {
|
||||
e.printStackTrace()
|
||||
emptyList()
|
||||
}
|
||||
}
|
||||
|
||||
override suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult? {
|
||||
// Preprocess the image
|
||||
val imageProcessor = ImageProcessor.Builder()
|
||||
.add(ResizeOp(320, 320, ResizeOp.ResizeMethod.BILINEAR))
|
||||
.build()
|
||||
SubjectSegmentation.getClient(options)
|
||||
|
||||
var tensorImage = TensorImage.fromBitmap(bitmap)
|
||||
tensorImage = imageProcessor.process(tensorImage)
|
||||
|
||||
// Prepare model inputs and outputs
|
||||
// Based on crash: [1, 25, 4] vs [1, 10, 4]. The model outputs 25 detections, not 10.
|
||||
val locations = Array(1) { Array(25) { FloatArray(4) } }
|
||||
val classes = Array(1) { FloatArray(25) }
|
||||
val scores = Array(1) { FloatArray(25) }
|
||||
val numDetections = FloatArray(1)
|
||||
|
||||
val outputs = mapOf(
|
||||
0 to locations,
|
||||
1 to classes,
|
||||
2 to scores,
|
||||
3 to numDetections
|
||||
)
|
||||
|
||||
// Run inference
|
||||
objectDetector.runForMultipleInputsOutputs(arrayOf(tensorImage.buffer), outputs)
|
||||
|
||||
// Post-process the results
|
||||
val bestDetection = scores[0].withIndex()
|
||||
.maxByOrNull { it.value }
|
||||
?.takeIf { it.value > 0.5f } // Confidence threshold
|
||||
|
||||
if (bestDetection != null) {
|
||||
val index = bestDetection.index
|
||||
val score = bestDetection.value
|
||||
val location = locations[0][index] // [ymin, xmin, ymax, xmax]
|
||||
val labelIndex = classes[0][index].toInt()
|
||||
val label = labels.getOrElse(labelIndex) { "Unknown" }
|
||||
|
||||
// Convert normalized coordinates to absolute pixel values
|
||||
val ymin = location[0] * bitmap.height
|
||||
val xmin = location[1] * bitmap.width
|
||||
val ymax = location[2] * bitmap.height
|
||||
val xmax = location[3] * bitmap.width
|
||||
|
||||
val boundingBox = Rect(xmin.toInt(), ymin.toInt(), xmax.toInt(), ymax.toInt())
|
||||
|
||||
return ObjectDetectionResult(boundingBox, label, score)
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
override fun deriveInference(bitmap: Bitmap): String = "Inference Result"
|
||||
|
||||
|
||||
// This is no longer the primary function, but kept for interface compliance
|
||||
override suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>? {
|
||||
return suspendCancellableCoroutine { cont ->
|
||||
val image = InputImage.fromBitmap(bitmap, 0)
|
||||
|
||||
segmenter.process(image)
|
||||
.addOnSuccessListener { result ->
|
||||
val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null)
|
||||
|
||||
// Instead of coloring it here, just pass the original mask bitmap
|
||||
// or ensure it's suitable for further processing.
|
||||
// The foreground bitmap from MLKit is usually the object cut out with transparent background.
|
||||
|
||||
val booleanMask = createBooleanMask(fg)
|
||||
// We return the raw foreground bitmap as the 'maskBitmap' for now,
|
||||
// or a colorized version if that's what UI expects.
|
||||
// But for IOU/Overlap calculation, we might want the binary info.
|
||||
// The UI seems to overlay 'colorMask'.
|
||||
// DistanceEstimator uses 'segMaskBitmap'.
|
||||
|
||||
val colorMask = createColorizedMask(fg)
|
||||
val bbox = computeBoundingBox(booleanMask, fg.width, fg.height)
|
||||
|
||||
// Returning colorMask as the first element because UI expects a visual overlay.
|
||||
// But note: DistanceEstimator might need the binary mask or the foreground.
|
||||
// If DistanceEstimator treats this bitmap as a mask, colorized is fine as long as alpha is preserved.
|
||||
cont.resume(Triple(colorMask, booleanMask, bbox))
|
||||
}
|
||||
.addOnFailureListener { e ->
|
||||
cont.resumeWithException(e)
|
||||
}
|
||||
}
|
||||
// Returning null as we are focusing on object detection now
|
||||
return null
|
||||
}
|
||||
|
||||
private fun createColorizedMask(maskBitmap: Bitmap): Bitmap {
|
||||
val w = maskBitmap.width
|
||||
val h = maskBitmap.height
|
||||
val pixels = IntArray(w * h)
|
||||
|
||||
maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h)
|
||||
|
||||
for (i in pixels.indices) {
|
||||
// ML Kit Foreground Bitmap: Non-transparent pixels are the object.
|
||||
if (Color.alpha(pixels[i]) > 0) {
|
||||
pixels[i] = MASK_COLOR
|
||||
}
|
||||
}
|
||||
|
||||
return Bitmap.createBitmap(pixels, w, h, Bitmap.Config.ARGB_8888)
|
||||
}
|
||||
|
||||
private fun createBooleanMask(bitmap: Bitmap): BooleanArray {
|
||||
val w = bitmap.width
|
||||
val h = bitmap.height
|
||||
val mask = BooleanArray(w * h)
|
||||
val pixels = IntArray(w * h)
|
||||
|
||||
bitmap.getPixels(pixels, 0, w, 0, 0, w, h)
|
||||
|
||||
for (i in pixels.indices) {
|
||||
mask[i] = Color.alpha(pixels[i]) > 0
|
||||
}
|
||||
|
||||
return mask
|
||||
}
|
||||
|
||||
private fun computeBoundingBox(mask: BooleanArray, w: Int, h: Int): Rect {
|
||||
var minX = Int.MAX_VALUE
|
||||
var minY = Int.MAX_VALUE
|
||||
var maxX = Int.MIN_VALUE
|
||||
var maxY = Int.MIN_VALUE
|
||||
|
||||
for (y in 0 until h) {
|
||||
for (x in 0 until w) {
|
||||
val idx = y * w + x
|
||||
if (mask[idx]) {
|
||||
if (x < minX) minX = x
|
||||
if (y < minY) minY = y
|
||||
if (x > maxX) maxX = x
|
||||
if (y > maxY) maxY = y
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return if (minX == Int.MAX_VALUE) {
|
||||
Rect(0, 0, 0, 0)
|
||||
} else {
|
||||
Rect(minX, minY, maxX, maxY)
|
||||
}
|
||||
}
|
||||
override fun deriveInference(bitmap: Bitmap): String = "Object Detection"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
package com.example.livingai.data.ml
|
||||
|
||||
import android.graphics.Rect
|
||||
|
||||
data class ObjectDetectionResult(
|
||||
val boundingBox: Rect,
|
||||
val label: String,
|
||||
val confidence: Float
|
||||
)
|
||||
|
|
@ -4,6 +4,7 @@ import android.content.ContentValues
|
|||
import android.content.Context
|
||||
import android.graphics.Bitmap
|
||||
import android.graphics.Matrix
|
||||
import android.graphics.Rect
|
||||
import android.provider.MediaStore
|
||||
import androidx.camera.core.ImageProxy
|
||||
import com.example.livingai.data.ml.DistanceEstimatorImpl
|
||||
|
|
@ -48,16 +49,69 @@ class CameraRepositoryImpl(
|
|||
bitmap: Bitmap,
|
||||
requestedOrientation: Orientation,
|
||||
silhouetteBitmap: Bitmap,
|
||||
realObjectHeightMeters: Float?, // ★ NEW PARAM
|
||||
focalLengthPixels: Float // from camera intrinsics
|
||||
realObjectHeightMeters: Float?,
|
||||
focalLengthPixels: Float,
|
||||
boundingBox: Rect?
|
||||
): OrientationState = withContext(Dispatchers.Default) {
|
||||
|
||||
// 1. Collect segmentation
|
||||
val meta = FrameMetadataProvider.collectMetadata(bitmap)
|
||||
val bbox = meta.segmentationBox
|
||||
val mask = meta.segmentationMaskBitmap
|
||||
// Use the passed boundingBox if available, otherwise it relies on FrameMetadataProvider running segmentation again
|
||||
// But FrameMetadataProvider.collectMetadata runs segmentation internally.
|
||||
// To avoid re-running detection/segmentation if we already have bbox, we can pass it.
|
||||
// However, FrameMetadataProvider currently calls getSegmentation(bitmap) which calls aiModel.segmentImage(bitmap).
|
||||
// AIModel.segmentImage is returning null in current impl.
|
||||
|
||||
// ISSUE: processFrame relies on FrameMetadataProvider.collectMetadata -> getSegmentation -> aiModel.segmentImage
|
||||
// But AIModelImpl.segmentImage returns null!
|
||||
// So bbox will be null, and processFrame returns early with "Segmentation missing".
|
||||
|
||||
// FIX: We need to use the detection result we already have from CameraViewModel.
|
||||
// We will mock the segmentation result using the bounding box from object detection.
|
||||
// And for the mask, since we don't have segmentation, we can either:
|
||||
// a) Create a dummy mask filled within the bbox (simple box mask)
|
||||
// b) Or just proceed if DistanceEstimator can handle it (it needs mask).
|
||||
|
||||
// Let's create a synthetic mask from the bbox.
|
||||
val syntheticMeta = if (boundingBox != null) {
|
||||
// Create a simple mask where pixels inside bbox are true
|
||||
// This is computationally expensive to do full bitmap, so be careful.
|
||||
// But we need a Bitmap mask for DistanceEstimator.
|
||||
// Let's create a black bitmap with white rect.
|
||||
|
||||
// NOTE: This runs on Default dispatcher, so should be okay-ish.
|
||||
|
||||
// However, FrameMetadataProvider.collectMetadata does more (IMU, Depth).
|
||||
// Let's manually construct metadata.
|
||||
|
||||
val maskBitmap = Bitmap.createBitmap(bitmap.width, bitmap.height, Bitmap.Config.ARGB_8888)
|
||||
val canvas = android.graphics.Canvas(maskBitmap)
|
||||
val paint = android.graphics.Paint().apply { color = android.graphics.Color.WHITE }
|
||||
canvas.drawRect(boundingBox, paint)
|
||||
|
||||
val imu = FrameMetadataProvider.getIMU()
|
||||
val rot = FrameMetadataProvider.getRotation()
|
||||
val depth = FrameMetadataProvider.getDepthData()
|
||||
|
||||
FrameMetadataProvider.FrameCollectedMetadata(
|
||||
segmentationMaskBitmap = maskBitmap,
|
||||
segmentationBox = boundingBox,
|
||||
depthMeters = depth.depthMeters,
|
||||
depthWidth = depth.width,
|
||||
depthHeight = depth.height,
|
||||
depthConfidence = depth.confidence,
|
||||
pitch = imu.pitch,
|
||||
roll = imu.roll,
|
||||
yaw = imu.yaw,
|
||||
rotationDegrees = rot
|
||||
)
|
||||
} else {
|
||||
FrameMetadataProvider.collectMetadata(bitmap)
|
||||
}
|
||||
|
||||
if (bbox == null || mask == null) {
|
||||
val bbox = syntheticMeta.segmentationBox
|
||||
// val mask = syntheticMeta.segmentationMaskBitmap // Mask is used inside distanceEstimator
|
||||
|
||||
if (bbox == null) {
|
||||
return@withContext OrientationState(
|
||||
success = false,
|
||||
reason = "Segmentation missing",
|
||||
|
|
@ -78,7 +132,7 @@ class CameraRepositoryImpl(
|
|||
)
|
||||
|
||||
// 3. Build FrameData with relative depth only
|
||||
val frameData = meta.toFrameData(bitmap).copy(
|
||||
val frameData = syntheticMeta.toFrameData(bitmap).copy(
|
||||
medianDepth = midasResult?.relativeDepth
|
||||
)
|
||||
|
||||
|
|
@ -136,4 +190,4 @@ class CameraRepositoryImpl(
|
|||
|
||||
uri.toString()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ val appModule = module {
|
|||
}
|
||||
|
||||
// ML Model
|
||||
single<AIModel> { AIModelImpl() }
|
||||
single<AIModel> { AIModelImpl(androidContext()) }
|
||||
single<ObjectDetector> {
|
||||
ObjectDetectorImpl(
|
||||
context = androidContext(),
|
||||
|
|
|
|||
|
|
@ -2,8 +2,10 @@ package com.example.livingai.domain.ml
|
|||
|
||||
import android.graphics.Bitmap
|
||||
import android.graphics.Rect
|
||||
import com.example.livingai.data.ml.ObjectDetectionResult
|
||||
|
||||
interface AIModel {
|
||||
fun deriveInference(bitmap: Bitmap): String
|
||||
suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>?
|
||||
}
|
||||
suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult?
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package com.example.livingai.domain.repository
|
||||
|
||||
import android.graphics.Bitmap
|
||||
import android.graphics.Rect
|
||||
import androidx.camera.core.ImageProxy
|
||||
import com.example.livingai.domain.ml.Orientation
|
||||
import com.example.livingai.domain.ml.OrientationState
|
||||
|
|
@ -12,7 +13,8 @@ interface CameraRepository {
|
|||
requestedOrientation: Orientation,
|
||||
silhouetteBitmap: Bitmap,
|
||||
realObjectHeightMeters: Float?,
|
||||
focalLengthPixels: Float
|
||||
focalLengthPixels: Float,
|
||||
boundingBox: Rect? = null
|
||||
): OrientationState
|
||||
suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,10 +7,14 @@ import androidx.camera.core.ImageProxy
|
|||
import androidx.camera.view.LifecycleCameraController
|
||||
import androidx.compose.foundation.Image
|
||||
import androidx.compose.foundation.background
|
||||
import androidx.compose.foundation.border
|
||||
import androidx.compose.foundation.layout.Box
|
||||
import androidx.compose.foundation.layout.BoxWithConstraints
|
||||
import androidx.compose.foundation.layout.Column
|
||||
import androidx.compose.foundation.layout.fillMaxSize
|
||||
import androidx.compose.foundation.layout.offset
|
||||
import androidx.compose.foundation.layout.padding
|
||||
import androidx.compose.foundation.layout.size
|
||||
import androidx.compose.foundation.shape.RoundedCornerShape
|
||||
import androidx.compose.material.icons.Icons
|
||||
import androidx.compose.material.icons.filled.Camera
|
||||
|
|
@ -18,7 +22,6 @@ import androidx.compose.material3.CircularProgressIndicator
|
|||
import androidx.compose.material3.FabPosition
|
||||
import androidx.compose.material3.FloatingActionButton
|
||||
import androidx.compose.material3.Icon
|
||||
import androidx.compose.material3.MaterialTheme
|
||||
import androidx.compose.material3.Scaffold
|
||||
import androidx.compose.material3.Text
|
||||
import androidx.compose.runtime.Composable
|
||||
|
|
@ -32,6 +35,7 @@ import androidx.compose.ui.graphics.Color
|
|||
import androidx.compose.ui.graphics.asImageBitmap
|
||||
import androidx.compose.ui.layout.ContentScale
|
||||
import androidx.compose.ui.platform.LocalContext
|
||||
import androidx.compose.ui.platform.LocalDensity
|
||||
import androidx.compose.ui.unit.dp
|
||||
import androidx.navigation.NavController
|
||||
import androidx.core.content.ContextCompat
|
||||
|
|
@ -67,6 +71,7 @@ fun CameraScreen(
|
|||
PermissionWrapper {
|
||||
val state by viewModel.state.collectAsState()
|
||||
val context = LocalContext.current
|
||||
val density = LocalDensity.current
|
||||
|
||||
val controller = remember {
|
||||
LifecycleCameraController(context).apply {
|
||||
|
|
@ -83,31 +88,14 @@ fun CameraScreen(
|
|||
viewModel.onEvent(CameraEvent.ImageCaptured(image))
|
||||
}
|
||||
|
||||
override fun onError(exception: ImageCaptureException) {
|
||||
// Handle error, e.g., log it or show a message
|
||||
}
|
||||
override fun onError(exception: ImageCaptureException) {}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
LaunchedEffect(state.shouldAutoCapture) {
|
||||
if (state.shouldAutoCapture) {
|
||||
takePhoto()
|
||||
viewModel.onEvent(CameraEvent.AutoCaptureTriggered)
|
||||
}
|
||||
}
|
||||
|
||||
LaunchedEffect(state.capturedImageUri) {
|
||||
state.capturedImageUri?.let {
|
||||
navController.navigate(
|
||||
Route.ViewImageScreen(
|
||||
imageUri = it.toString(),
|
||||
shouldAllowRetake = true,
|
||||
showAccept = true,
|
||||
orientation = orientation,
|
||||
animalId = animalId
|
||||
)
|
||||
)
|
||||
navController.navigate(Route.ViewImageScreen(it.toString(), true, orientation, true, false, animalId))
|
||||
viewModel.onEvent(CameraEvent.ClearCapturedImage)
|
||||
}
|
||||
}
|
||||
|
|
@ -120,71 +108,95 @@ fun CameraScreen(
|
|||
},
|
||||
floatingActionButtonPosition = FabPosition.Center
|
||||
) { paddingValues ->
|
||||
Box(
|
||||
modifier = Modifier.fillMaxSize(),
|
||||
) {
|
||||
Box(
|
||||
modifier = Modifier.fillMaxSize()
|
||||
) {
|
||||
CameraPreview(
|
||||
modifier = Modifier.fillMaxSize(),
|
||||
controller = controller,
|
||||
onFrame = { bitmap, rotation, fxPixels ->
|
||||
viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
|
||||
}
|
||||
BoxWithConstraints(modifier = Modifier.fillMaxSize().padding(paddingValues)) {
|
||||
val screenWidth = maxWidth
|
||||
val screenHeight = maxHeight
|
||||
|
||||
CameraPreview(
|
||||
modifier = Modifier.fillMaxSize(),
|
||||
controller = controller,
|
||||
onFrame = { bitmap, rotation, fxPixels ->
|
||||
viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
|
||||
}
|
||||
)
|
||||
|
||||
state.detectionResult?.let { detection ->
|
||||
val imageWidth = state.imageWidth.toFloat()
|
||||
val imageHeight = state.imageHeight.toFloat()
|
||||
if (imageWidth == 0f || imageHeight == 0f) return@let
|
||||
|
||||
val screenW = with(density) { screenWidth.toPx() }
|
||||
val screenH = with(density) { screenHeight.toPx() }
|
||||
|
||||
val scaleX = screenW / imageWidth
|
||||
val scaleY = screenH / imageHeight
|
||||
val scale = maxOf(scaleX, scaleY) // For FILL_CENTER behavior
|
||||
|
||||
val offsetX = (screenW - imageWidth * scale) / 2f
|
||||
val offsetY = (screenH - imageHeight * scale) / 2f
|
||||
|
||||
val bbox = detection.boundingBox
|
||||
val left = bbox.left * scale + offsetX
|
||||
val top = bbox.top * scale + offsetY
|
||||
|
||||
val leftDp = with(density) { left.toDp() }
|
||||
val topDp = with(density) { top.toDp() }
|
||||
val widthDp = with(density) { (bbox.width() * scale).toDp() }
|
||||
val heightDp = with(density) { (bbox.height() * scale).toDp() }
|
||||
|
||||
Box(
|
||||
modifier = Modifier
|
||||
.offset(x = leftDp, y = topDp)
|
||||
.size(width = widthDp, height = heightDp)
|
||||
.border(2.dp, Color.Yellow)
|
||||
)
|
||||
|
||||
// The ML segmentation mask
|
||||
state.segmentationMask?.let { mask ->
|
||||
Image(
|
||||
bitmap = mask.asImageBitmap(),
|
||||
contentDescription = "Segmentation Overlay",
|
||||
modifier = Modifier.fillMaxSize(),
|
||||
contentScale = ContentScale.FillBounds,
|
||||
alpha = 0.5f
|
||||
)
|
||||
}
|
||||
|
||||
state.silhouetteMask?.let {
|
||||
Image(
|
||||
bitmap = it.asImageBitmap(),
|
||||
contentDescription = "Silhouette Overlay",
|
||||
modifier = Modifier.fillMaxSize(),
|
||||
contentScale = ContentScale.Fit,
|
||||
alpha = 0.4f
|
||||
// Overlay 1: Object Label & Confidence (Above the box)
|
||||
Column(
|
||||
modifier = Modifier
|
||||
.offset(x = leftDp, y = topDp - 25.dp)
|
||||
.background(Color.Black.copy(alpha = 0.7f))
|
||||
.padding(4.dp)
|
||||
) {
|
||||
Text(
|
||||
text = "${detection.label} (${(detection.confidence * 100).toInt()}%)",
|
||||
color = Color.White
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Debug Overlay
|
||||
|
||||
// Overlay 2: Fixed top-right corner info
|
||||
state.orientationState?.let { orient ->
|
||||
Box(
|
||||
Column(
|
||||
modifier = Modifier
|
||||
.align(Alignment.TopEnd)
|
||||
.padding(16.dp)
|
||||
.background(Color.Black.copy(alpha = 0.5f), RoundedCornerShape(8.dp))
|
||||
.background(Color.Black.copy(alpha = 0.7f), shape = RoundedCornerShape(8.dp))
|
||||
.padding(8.dp)
|
||||
) {
|
||||
Column {
|
||||
Text("Success: ${orient.success}", color = Color.White)
|
||||
Text("Reason: ${orient.reason}", color = Color.White)
|
||||
|
||||
orient.pixelMetrics?.let { pm ->
|
||||
Text("Width (px): ${pm.widthPx}", color = Color.White)
|
||||
Text("Height (px): ${pm.heightPx}", color = Color.White)
|
||||
}
|
||||
|
||||
// Display depth metrics from OrientationState
|
||||
orient.relativeDepth?.let { rel ->
|
||||
Text("Rel Depth: %.4f".format(rel), color = Color.White)
|
||||
}
|
||||
|
||||
orient.absoluteDistanceMeters?.let { abs ->
|
||||
Text("Dist (m): %.2f".format(abs), color = Color.White)
|
||||
}
|
||||
|
||||
Text("IOU: ${orient.iouScore}", color = Color.White)
|
||||
Text("Matched: ${orient.orientationMatched}", color = Color.White)
|
||||
if (orient.relativeDepth != null) {
|
||||
Text(
|
||||
text = "Rel Depth: %.2f".format(orient.relativeDepth),
|
||||
color = Color.Cyan
|
||||
)
|
||||
}
|
||||
if (orient.absoluteDistanceMeters != null) {
|
||||
Text(
|
||||
text = "Dist: %.2fm".format(orient.absoluteDistanceMeters),
|
||||
color = Color.Green
|
||||
)
|
||||
}
|
||||
if (orient.iouScore != null) {
|
||||
Text(
|
||||
text = "IoU: %.2f".format(orient.iouScore),
|
||||
color = Color.Yellow
|
||||
)
|
||||
}
|
||||
orient.pixelMetrics?.let { metrics ->
|
||||
Text(
|
||||
text = "W: ${metrics.widthPx}px H: ${metrics.heightPx}px",
|
||||
color = Color.White
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import android.net.Uri
|
|||
import androidx.camera.core.ImageProxy
|
||||
import androidx.lifecycle.ViewModel
|
||||
import androidx.lifecycle.viewModelScope
|
||||
import com.example.livingai.data.ml.ObjectDetectionResult
|
||||
import com.example.livingai.domain.ml.AIModel
|
||||
import com.example.livingai.domain.ml.Orientation
|
||||
import com.example.livingai.domain.ml.OrientationState
|
||||
|
|
@ -72,7 +73,8 @@ class CameraViewModel(
|
|||
private fun clearCaptured() {
|
||||
_state.value = _state.value.copy(
|
||||
capturedImageUri = null,
|
||||
segmentationMask = null
|
||||
segmentationMask = null,
|
||||
detectionResult = null // Clear detection result as well
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -89,80 +91,49 @@ class CameraViewModel(
|
|||
}
|
||||
|
||||
private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) {
|
||||
if (_state.value.isCapturing || _state.value.shouldAutoCapture) {
|
||||
return
|
||||
}
|
||||
|
||||
if (isProcessingFrame.compareAndSet(false, true)) {
|
||||
viewModelScope.launch {
|
||||
try {
|
||||
val currentOrientationStr = _state.value.orientation
|
||||
val silhouette = _state.value.savedMaskBitmap
|
||||
|
||||
val orientationState = if (currentOrientationStr != null && silhouette != null) {
|
||||
val orientationEnum = mapStringToOrientation(currentOrientationStr)
|
||||
|
||||
cameraRepository.processFrame(
|
||||
bitmap,
|
||||
orientationEnum,
|
||||
silhouette,
|
||||
1.55f,
|
||||
focalLengthPixels
|
||||
)
|
||||
// Rotate bitmap to be upright before processing
|
||||
val rotatedBitmap = if (rotationDegrees != 0) {
|
||||
val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) }
|
||||
Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
|
||||
} else {
|
||||
null
|
||||
bitmap
|
||||
}
|
||||
|
||||
// Perform Object Detection
|
||||
val detectionResult = aiModel.detectObject(rotatedBitmap)
|
||||
|
||||
var orientationState: OrientationState? = null
|
||||
val requestedOrientationStr = _state.value.orientation
|
||||
|
||||
val result = aiModel.segmentImage(bitmap)
|
||||
if (result != null) {
|
||||
val (maskBitmap, _) = result
|
||||
|
||||
val rotatedMask = if (rotationDegrees != 0) {
|
||||
val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) }
|
||||
Bitmap.createBitmap(
|
||||
maskBitmap,
|
||||
0,
|
||||
0,
|
||||
maskBitmap.width,
|
||||
maskBitmap.height,
|
||||
matrix,
|
||||
true
|
||||
)
|
||||
} else {
|
||||
maskBitmap
|
||||
}
|
||||
|
||||
val output = if(_state.value.orientation == "front" || _state.value.orientation == "back")
|
||||
fitImageToCrop(rotatedMask, screenDims.screenWidth, screenDims.screenHeight)
|
||||
else
|
||||
fitImageToCrop(rotatedMask, screenDims.screenHeight, screenDims.screenWidth)
|
||||
|
||||
_state.value = _state.value.copy(
|
||||
segmentationMask = output,
|
||||
orientationState = orientationState
|
||||
)
|
||||
|
||||
if (_state.value.isAutoCaptureEnabled &&
|
||||
_state.value.savedMaskBitmap != null &&
|
||||
output != null
|
||||
) {
|
||||
val isValidCapture = calculateDistance(
|
||||
_state.value.distanceMethod,
|
||||
_state.value.savedMaskBitmap!!,
|
||||
output,
|
||||
_state.value.matchThreshold
|
||||
)
|
||||
|
||||
if (isValidCapture) {
|
||||
_state.value = _state.value.copy(shouldAutoCapture = true)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_state.value = _state.value.copy(
|
||||
segmentationMask = null,
|
||||
orientationState = orientationState
|
||||
)
|
||||
if (requestedOrientationStr != null && detectionResult != null) {
|
||||
// We need a silhouette bitmap for processFrame. If not available, we can pass a dummy or handle inside.
|
||||
// But for now, let's use the one we loaded in setContext
|
||||
val silhouette = _state.value.silhouetteMask
|
||||
|
||||
if (silhouette != null) {
|
||||
orientationState = cameraRepository.processFrame(
|
||||
bitmap = rotatedBitmap,
|
||||
requestedOrientation = mapStringToOrientation(requestedOrientationStr),
|
||||
silhouetteBitmap = silhouette,
|
||||
realObjectHeightMeters = null, // Or some default
|
||||
focalLengthPixels = focalLengthPixels,
|
||||
boundingBox = detectionResult.boundingBox // Pass the bbox we just found
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
_state.value = _state.value.copy(
|
||||
detectionResult = detectionResult,
|
||||
orientationState = orientationState, // Update state
|
||||
imageWidth = rotatedBitmap.width,
|
||||
imageHeight = rotatedBitmap.height
|
||||
)
|
||||
|
||||
} catch (e: Exception) {
|
||||
e.printStackTrace()
|
||||
} finally {
|
||||
isProcessingFrame.set(false)
|
||||
}
|
||||
|
|
@ -195,7 +166,10 @@ data class CameraUiState(
|
|||
val matchThreshold: Int = 50,
|
||||
val distanceMethod: String = "Jaccard",
|
||||
val shouldAutoCapture: Boolean = false,
|
||||
val orientationState: OrientationState? = null
|
||||
val orientationState: OrientationState? = null,
|
||||
val detectionResult: ObjectDetectionResult? = null,
|
||||
val imageWidth: Int = 0,
|
||||
val imageHeight: Int = 0
|
||||
)
|
||||
|
||||
sealed class CameraEvent {
|
||||
|
|
|
|||
Loading…
Reference in New Issue