detection

2025-12-10 11:25:53 +05:30 · 2025-12-10 11:25:53 +05:30 · f647a509d5
parent c445068773
commit f647a509d5
10 changed files with 368 additions and 256 deletions
--- a/.kotlin/sessions/kotlin-compiler-13669449669175938098.salive
+++ b/.kotlin/sessions/kotlin-compiler-13669449669175938098.salive
--- a/app/src/main/assets/labels.txt
+++ b/app/src/main/assets/labels.txt
@ -0,0 +1,81 @@
+Unknown
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt
+++ b/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt
@ -1,116 +1,94 @@
 package com.example.livingai.data.ml

+import android.content.Context
 import android.graphics.Bitmap
-import android.graphics.Color
 import android.graphics.Rect
 import com.example.livingai.domain.ml.AIModel
-import com.google.mlkit.vision.common.InputImage
-import com.google.mlkit.vision.segmentation.subject.SubjectSegmentation
-import com.google.mlkit.vision.segmentation.subject.SubjectSegmenterOptions
-import kotlinx.coroutines.suspendCancellableCoroutine
-import kotlin.coroutines.resume
-import kotlin.coroutines.resumeWithException
+import org.tensorflow.lite.Interpreter
+import org.tensorflow.lite.support.common.FileUtil
+import org.tensorflow.lite.support.image.ImageProcessor
+import org.tensorflow.lite.support.image.TensorImage
+import org.tensorflow.lite.support.image.ops.ResizeOp
+import java.nio.ByteBuffer
+import java.nio.ByteOrder

-private const val MASK_COLOR = 0x5500FF00 // semi-transparent green overlay
+class AIModelImpl(private val context: Context) : AIModel {

-class AIModelImpl : AIModel {
+    private val objectDetector: Interpreter
+    private val labels: List<String>

-    private val segmenter by lazy {
-        val options = SubjectSegmenterOptions.Builder()
-            .enableForegroundBitmap()
+    init {
+        // Load the TFLite model from assets
+        val modelBuffer = FileUtil.loadMappedFile(context, "efficientdet-lite0.tflite")
+        val options = Interpreter.Options().apply { numThreads = 4 }
+        objectDetector = Interpreter(modelBuffer, options)
+
+        // Load labels from assets
+        labels = try {
+            FileUtil.loadLabels(context, "labels.txt")
+        } catch (e: Exception) {
+            e.printStackTrace()
+            emptyList()
+        }
+    }
+
+    override suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult? {
+        // Preprocess the image
+        val imageProcessor = ImageProcessor.Builder()
+            .add(ResizeOp(320, 320, ResizeOp.ResizeMethod.BILINEAR))
            .build()
-        SubjectSegmentation.getClient(options)
+        
+        var tensorImage = TensorImage.fromBitmap(bitmap)
+        tensorImage = imageProcessor.process(tensorImage)
+
+        // Prepare model inputs and outputs
+        // Based on crash: [1, 25, 4] vs [1, 10, 4]. The model outputs 25 detections, not 10.
+        val locations = Array(1) { Array(25) { FloatArray(4) } }
+        val classes = Array(1) { FloatArray(25) }
+        val scores = Array(1) { FloatArray(25) }
+        val numDetections = FloatArray(1)
+
+        val outputs = mapOf(
+            0 to locations,
+            1 to classes,
+            2 to scores,
+            3 to numDetections
+        )
+
+        // Run inference
+        objectDetector.runForMultipleInputsOutputs(arrayOf(tensorImage.buffer), outputs)
+
+        // Post-process the results
+        val bestDetection = scores[0].withIndex()
+            .maxByOrNull { it.value }
+            ?.takeIf { it.value > 0.5f } // Confidence threshold
+
+        if (bestDetection != null) {
+            val index = bestDetection.index
+            val score = bestDetection.value
+            val location = locations[0][index] // [ymin, xmin, ymax, xmax]
+            val labelIndex = classes[0][index].toInt()
+            val label = labels.getOrElse(labelIndex) { "Unknown" }
+
+            // Convert normalized coordinates to absolute pixel values
+            val ymin = location[0] * bitmap.height
+            val xmin = location[1] * bitmap.width
+            val ymax = location[2] * bitmap.height
+            val xmax = location[3] * bitmap.width
+
+            val boundingBox = Rect(xmin.toInt(), ymin.toInt(), xmax.toInt(), ymax.toInt())
+
+            return ObjectDetectionResult(boundingBox, label, score)
+        }
+
+        return null
    }
-
-    override fun deriveInference(bitmap: Bitmap): String = "Inference Result"
-
+    
+    // This is no longer the primary function, but kept for interface compliance
    override suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>? {
-        return suspendCancellableCoroutine { cont ->
-            val image = InputImage.fromBitmap(bitmap, 0)
-
-            segmenter.process(image)
-                .addOnSuccessListener { result ->
-                    val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null)
-
-                    // Instead of coloring it here, just pass the original mask bitmap
-                    // or ensure it's suitable for further processing.
-                    // The foreground bitmap from MLKit is usually the object cut out with transparent background.
-                    
-                    val booleanMask = createBooleanMask(fg)
-                    // We return the raw foreground bitmap as the 'maskBitmap' for now, 
-                    // or a colorized version if that's what UI expects. 
-                    // But for IOU/Overlap calculation, we might want the binary info.
-                    // The UI seems to overlay 'colorMask'. 
-                    // DistanceEstimator uses 'segMaskBitmap'.
-                    
-                    val colorMask = createColorizedMask(fg)
-                    val bbox = computeBoundingBox(booleanMask, fg.width, fg.height)
-
-                    // Returning colorMask as the first element because UI expects a visual overlay.
-                    // But note: DistanceEstimator might need the binary mask or the foreground.
-                    // If DistanceEstimator treats this bitmap as a mask, colorized is fine as long as alpha is preserved.
-                    cont.resume(Triple(colorMask, booleanMask, bbox))
-                }
-                .addOnFailureListener { e ->
-                    cont.resumeWithException(e)
-                }
-        }
+        // Returning null as we are focusing on object detection now
+        return null
    }

-    private fun createColorizedMask(maskBitmap: Bitmap): Bitmap {
-        val w = maskBitmap.width
-        val h = maskBitmap.height
-        val pixels = IntArray(w * h)
-
-        maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h)
-
-        for (i in pixels.indices) {
-            // ML Kit Foreground Bitmap: Non-transparent pixels are the object.
-            if (Color.alpha(pixels[i]) > 0) {
-                pixels[i] = MASK_COLOR
-            }
-        }
-
-        return Bitmap.createBitmap(pixels, w, h, Bitmap.Config.ARGB_8888)
-    }
-
-    private fun createBooleanMask(bitmap: Bitmap): BooleanArray {
-        val w = bitmap.width
-        val h = bitmap.height
-        val mask = BooleanArray(w * h)
-        val pixels = IntArray(w * h)
-
-        bitmap.getPixels(pixels, 0, w, 0, 0, w, h)
-
-        for (i in pixels.indices) {
-            mask[i] = Color.alpha(pixels[i]) > 0
-        }
-
-        return mask
-    }
-
-    private fun computeBoundingBox(mask: BooleanArray, w: Int, h: Int): Rect {
-        var minX = Int.MAX_VALUE
-        var minY = Int.MAX_VALUE
-        var maxX = Int.MIN_VALUE
-        var maxY = Int.MIN_VALUE
-
-        for (y in 0 until h) {
-            for (x in 0 until w) {
-                val idx = y * w + x
-                if (mask[idx]) {
-                    if (x < minX) minX = x
-                    if (y < minY) minY = y
-                    if (x > maxX) maxX = x
-                    if (y > maxY) maxY = y
-                }
-            }
-        }
-
-        return if (minX == Int.MAX_VALUE) {
-            Rect(0, 0, 0, 0)
-        } else {
-            Rect(minX, minY, maxX, maxY)
-        }
-    }
+    override fun deriveInference(bitmap: Bitmap): String = "Object Detection"
 }
--- a/app/src/main/java/com/example/livingai/data/ml/ObjectDetectionResult.kt
+++ b/app/src/main/java/com/example/livingai/data/ml/ObjectDetectionResult.kt
@ -0,0 +1,9 @@
+package com.example.livingai.data.ml
+
+import android.graphics.Rect
+
+data class ObjectDetectionResult(
+    val boundingBox: Rect,
+    val label: String,
+    val confidence: Float
+)
--- a/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt
+++ b/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt
@ -4,6 +4,7 @@ import android.content.ContentValues
 import android.content.Context
 import android.graphics.Bitmap
 import android.graphics.Matrix
+import android.graphics.Rect
 import android.provider.MediaStore
 import androidx.camera.core.ImageProxy
 import com.example.livingai.data.ml.DistanceEstimatorImpl
@ -48,16 +49,69 @@ class CameraRepositoryImpl(
        bitmap: Bitmap,
        requestedOrientation: Orientation,
        silhouetteBitmap: Bitmap,
-        realObjectHeightMeters: Float?,   // ★ NEW PARAM
-        focalLengthPixels: Float          // from camera intrinsics
+        realObjectHeightMeters: Float?,
+        focalLengthPixels: Float,
+        boundingBox: Rect?
    ): OrientationState = withContext(Dispatchers.Default) {

        // 1. Collect segmentation
-        val meta = FrameMetadataProvider.collectMetadata(bitmap)
-        val bbox = meta.segmentationBox
-        val mask = meta.segmentationMaskBitmap
+        // Use the passed boundingBox if available, otherwise it relies on FrameMetadataProvider running segmentation again
+        // But FrameMetadataProvider.collectMetadata runs segmentation internally.
+        // To avoid re-running detection/segmentation if we already have bbox, we can pass it.
+        // However, FrameMetadataProvider currently calls getSegmentation(bitmap) which calls aiModel.segmentImage(bitmap).
+        // AIModel.segmentImage is returning null in current impl.
+        
+        // ISSUE: processFrame relies on FrameMetadataProvider.collectMetadata -> getSegmentation -> aiModel.segmentImage
+        // But AIModelImpl.segmentImage returns null!
+        // So bbox will be null, and processFrame returns early with "Segmentation missing".
+        
+        // FIX: We need to use the detection result we already have from CameraViewModel.
+        // We will mock the segmentation result using the bounding box from object detection.
+        // And for the mask, since we don't have segmentation, we can either:
+        // a) Create a dummy mask filled within the bbox (simple box mask)
+        // b) Or just proceed if DistanceEstimator can handle it (it needs mask).
+        
+        // Let's create a synthetic mask from the bbox.
+        val syntheticMeta = if (boundingBox != null) {
+            // Create a simple mask where pixels inside bbox are true
+            // This is computationally expensive to do full bitmap, so be careful.
+            // But we need a Bitmap mask for DistanceEstimator.
+            // Let's create a black bitmap with white rect.
+            
+            // NOTE: This runs on Default dispatcher, so should be okay-ish.
+            
+            // However, FrameMetadataProvider.collectMetadata does more (IMU, Depth).
+            // Let's manually construct metadata.
+            
+            val maskBitmap = Bitmap.createBitmap(bitmap.width, bitmap.height, Bitmap.Config.ARGB_8888)
+            val canvas = android.graphics.Canvas(maskBitmap)
+            val paint = android.graphics.Paint().apply { color = android.graphics.Color.WHITE }
+            canvas.drawRect(boundingBox, paint)
+            
+            val imu = FrameMetadataProvider.getIMU()
+            val rot = FrameMetadataProvider.getRotation()
+            val depth = FrameMetadataProvider.getDepthData()
+            
+            FrameMetadataProvider.FrameCollectedMetadata(
+                segmentationMaskBitmap = maskBitmap,
+                segmentationBox = boundingBox,
+                depthMeters = depth.depthMeters,
+                depthWidth = depth.width,
+                depthHeight = depth.height,
+                depthConfidence = depth.confidence,
+                pitch = imu.pitch,
+                roll = imu.roll,
+                yaw = imu.yaw,
+                rotationDegrees = rot
+            )
+        } else {
+             FrameMetadataProvider.collectMetadata(bitmap)
+        }

-        if (bbox == null || mask == null) {
+        val bbox = syntheticMeta.segmentationBox
+        // val mask = syntheticMeta.segmentationMaskBitmap // Mask is used inside distanceEstimator
+
+        if (bbox == null) {
            return@withContext OrientationState(
                success = false,
                reason = "Segmentation missing",
@ -78,7 +132,7 @@ class CameraRepositoryImpl(
        )

        // 3. Build FrameData with relative depth only
-        val frameData = meta.toFrameData(bitmap).copy(
+        val frameData = syntheticMeta.toFrameData(bitmap).copy(
            medianDepth = midasResult?.relativeDepth
        )

@ -136,4 +190,4 @@ class CameraRepositoryImpl(

        uri.toString()
    }
-}
+}
--- a/app/src/main/java/com/example/livingai/di/AppModule.kt
+++ b/app/src/main/java/com/example/livingai/di/AppModule.kt
@ -111,7 +111,7 @@ val appModule = module {
    }

    // ML Model
-    single<AIModel> { AIModelImpl() }
+    single<AIModel> { AIModelImpl(androidContext()) }
    single<ObjectDetector> { 
        ObjectDetectorImpl(
            context = androidContext(),
--- a/app/src/main/java/com/example/livingai/domain/ml/AIModel.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/AIModel.kt
@ -2,8 +2,10 @@ package com.example.livingai.domain.ml

 import android.graphics.Bitmap
 import android.graphics.Rect
+import com.example.livingai.data.ml.ObjectDetectionResult

 interface AIModel {
    fun deriveInference(bitmap: Bitmap): String
    suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>?
-}
+    suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult?
+}
--- a/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt
+++ b/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt
@ -1,6 +1,7 @@
 package com.example.livingai.domain.repository

 import android.graphics.Bitmap
+import android.graphics.Rect
 import androidx.camera.core.ImageProxy
 import com.example.livingai.domain.ml.Orientation
 import com.example.livingai.domain.ml.OrientationState
@ -12,7 +13,8 @@ interface CameraRepository {
        requestedOrientation: Orientation,
        silhouetteBitmap: Bitmap,
        realObjectHeightMeters: Float?,
-        focalLengthPixels: Float
+        focalLengthPixels: Float,
+        boundingBox: Rect? = null
    ): OrientationState
    suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String
 }
--- a/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt
+++ b/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt
@ -7,10 +7,14 @@ import androidx.camera.core.ImageProxy
 import androidx.camera.view.LifecycleCameraController
 import androidx.compose.foundation.Image
 import androidx.compose.foundation.background
+import androidx.compose.foundation.border
 import androidx.compose.foundation.layout.Box
+import androidx.compose.foundation.layout.BoxWithConstraints
 import androidx.compose.foundation.layout.Column
 import androidx.compose.foundation.layout.fillMaxSize
+import androidx.compose.foundation.layout.offset
 import androidx.compose.foundation.layout.padding
+import androidx.compose.foundation.layout.size
 import androidx.compose.foundation.shape.RoundedCornerShape
 import androidx.compose.material.icons.Icons
 import androidx.compose.material.icons.filled.Camera
@ -18,7 +22,6 @@ import androidx.compose.material3.CircularProgressIndicator
 import androidx.compose.material3.FabPosition
 import androidx.compose.material3.FloatingActionButton
 import androidx.compose.material3.Icon
-import androidx.compose.material3.MaterialTheme
 import androidx.compose.material3.Scaffold
 import androidx.compose.material3.Text
 import androidx.compose.runtime.Composable
@ -32,6 +35,7 @@ import androidx.compose.ui.graphics.Color
 import androidx.compose.ui.graphics.asImageBitmap
 import androidx.compose.ui.layout.ContentScale
 import androidx.compose.ui.platform.LocalContext
+import androidx.compose.ui.platform.LocalDensity
 import androidx.compose.ui.unit.dp
 import androidx.navigation.NavController
 import androidx.core.content.ContextCompat
@ -67,6 +71,7 @@ fun CameraScreen(
    PermissionWrapper {
        val state by viewModel.state.collectAsState()
        val context = LocalContext.current
+        val density = LocalDensity.current

        val controller = remember {
            LifecycleCameraController(context).apply {
@ -83,31 +88,14 @@ fun CameraScreen(
                        viewModel.onEvent(CameraEvent.ImageCaptured(image))
                    }

-                    override fun onError(exception: ImageCaptureException) {
-                        // Handle error, e.g., log it or show a message
-                    }
+                    override fun onError(exception: ImageCaptureException) {}
                }
            )
        }

-        LaunchedEffect(state.shouldAutoCapture) {
-            if (state.shouldAutoCapture) {
-                takePhoto()
-                viewModel.onEvent(CameraEvent.AutoCaptureTriggered)
-            }
-        }
-
        LaunchedEffect(state.capturedImageUri) {
            state.capturedImageUri?.let {
-                navController.navigate(
-                    Route.ViewImageScreen(
-                        imageUri = it.toString(),
-                        shouldAllowRetake = true,
-                        showAccept = true,
-                        orientation = orientation,
-                        animalId = animalId
-                    )
-                )
+                navController.navigate(Route.ViewImageScreen(it.toString(), true, orientation, true, false, animalId))
                viewModel.onEvent(CameraEvent.ClearCapturedImage)
            }
        }
@ -120,71 +108,95 @@ fun CameraScreen(
            },
            floatingActionButtonPosition = FabPosition.Center
        ) { paddingValues ->
-            Box(
-                modifier = Modifier.fillMaxSize(),
-            ) {
-                Box(
-                    modifier = Modifier.fillMaxSize()
-                ) {
-                    CameraPreview(
-                        modifier = Modifier.fillMaxSize(),
-                        controller = controller,
-                        onFrame = { bitmap, rotation, fxPixels ->
-                            viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
-                        }
+            BoxWithConstraints(modifier = Modifier.fillMaxSize().padding(paddingValues)) {
+                val screenWidth = maxWidth
+                val screenHeight = maxHeight
+
+                CameraPreview(
+                    modifier = Modifier.fillMaxSize(),
+                    controller = controller,
+                    onFrame = { bitmap, rotation, fxPixels ->
+                        viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
+                    }
+                )
+
+                state.detectionResult?.let { detection ->
+                    val imageWidth = state.imageWidth.toFloat()
+                    val imageHeight = state.imageHeight.toFloat()
+                    if (imageWidth == 0f || imageHeight == 0f) return@let
+
+                    val screenW = with(density) { screenWidth.toPx() }
+                    val screenH = with(density) { screenHeight.toPx() }
+
+                    val scaleX = screenW / imageWidth
+                    val scaleY = screenH / imageHeight
+                    val scale = maxOf(scaleX, scaleY) // For FILL_CENTER behavior
+
+                    val offsetX = (screenW - imageWidth * scale) / 2f
+                    val offsetY = (screenH - imageHeight * scale) / 2f
+
+                    val bbox = detection.boundingBox
+                    val left = bbox.left * scale + offsetX
+                    val top = bbox.top * scale + offsetY
+
+                    val leftDp = with(density) { left.toDp() }
+                    val topDp = with(density) { top.toDp() }
+                    val widthDp = with(density) { (bbox.width() * scale).toDp() }
+                    val heightDp = with(density) { (bbox.height() * scale).toDp() }
+
+                    Box(
+                        modifier = Modifier
+                            .offset(x = leftDp, y = topDp)
+                            .size(width = widthDp, height = heightDp)
+                            .border(2.dp, Color.Yellow)
                    )

-                    // The ML segmentation mask
-                    state.segmentationMask?.let { mask ->
-                        Image(
-                            bitmap = mask.asImageBitmap(),
-                            contentDescription = "Segmentation Overlay",
-                            modifier = Modifier.fillMaxSize(),
-                            contentScale = ContentScale.FillBounds,
-                            alpha = 0.5f
-                        )
-                    }
-
-                    state.silhouetteMask?.let {
-                        Image(
-                            bitmap = it.asImageBitmap(),
-                            contentDescription = "Silhouette Overlay",
-                            modifier = Modifier.fillMaxSize(),
-                            contentScale = ContentScale.Fit,
-                            alpha = 0.4f
+                    // Overlay 1: Object Label & Confidence (Above the box)
+                    Column(
+                        modifier = Modifier
+                            .offset(x = leftDp, y = topDp - 25.dp)
+                            .background(Color.Black.copy(alpha = 0.7f))
+                            .padding(4.dp)
+                    ) {
+                        Text(
+                            text = "${detection.label} (${(detection.confidence * 100).toInt()}%)",
+                            color = Color.White
                        )
                    }
                }
-
-                // Debug Overlay
+                
+                // Overlay 2: Fixed top-right corner info
                state.orientationState?.let { orient ->
-                    Box(
+                    Column(
                        modifier = Modifier
                            .align(Alignment.TopEnd)
                            .padding(16.dp)
-                            .background(Color.Black.copy(alpha = 0.5f), RoundedCornerShape(8.dp))
+                            .background(Color.Black.copy(alpha = 0.7f), shape = RoundedCornerShape(8.dp))
                            .padding(8.dp)
                    ) {
-                        Column {
-                            Text("Success: ${orient.success}", color = Color.White)
-                            Text("Reason: ${orient.reason}", color = Color.White)
-                            
-                            orient.pixelMetrics?.let { pm ->
-                                Text("Width (px): ${pm.widthPx}", color = Color.White)
-                                Text("Height (px): ${pm.heightPx}", color = Color.White)
-                            }
-                            
-                            // Display depth metrics from OrientationState
-                            orient.relativeDepth?.let { rel ->
-                                Text("Rel Depth: %.4f".format(rel), color = Color.White)
-                            }
-                            
-                            orient.absoluteDistanceMeters?.let { abs ->
-                                Text("Dist (m): %.2f".format(abs), color = Color.White)
-                            }
-
-                            Text("IOU: ${orient.iouScore}", color = Color.White)
-                            Text("Matched: ${orient.orientationMatched}", color = Color.White)
+                        if (orient.relativeDepth != null) {
+                            Text(
+                                text = "Rel Depth: %.2f".format(orient.relativeDepth),
+                                color = Color.Cyan
+                            )
+                        }
+                        if (orient.absoluteDistanceMeters != null) {
+                            Text(
+                                text = "Dist: %.2fm".format(orient.absoluteDistanceMeters),
+                                color = Color.Green
+                            )
+                        }
+                        if (orient.iouScore != null) {
+                            Text(
+                                text = "IoU: %.2f".format(orient.iouScore),
+                                color = Color.Yellow
+                            )
+                        }
+                        orient.pixelMetrics?.let { metrics ->
+                            Text(
+                                text = "W: ${metrics.widthPx}px H: ${metrics.heightPx}px",
+                                color = Color.White
+                            )
                        }
                    }
                }
--- a/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt
+++ b/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt
@ -6,6 +6,7 @@ import android.net.Uri
 import androidx.camera.core.ImageProxy
 import androidx.lifecycle.ViewModel
 import androidx.lifecycle.viewModelScope
+import com.example.livingai.data.ml.ObjectDetectionResult
 import com.example.livingai.domain.ml.AIModel
 import com.example.livingai.domain.ml.Orientation
 import com.example.livingai.domain.ml.OrientationState
@ -72,7 +73,8 @@ class CameraViewModel(
    private fun clearCaptured() {
        _state.value = _state.value.copy(
            capturedImageUri = null,
-            segmentationMask = null
+            segmentationMask = null,
+            detectionResult = null // Clear detection result as well
        )
    }

@ -89,80 +91,49 @@ class CameraViewModel(
    }

    private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) {
-        if (_state.value.isCapturing || _state.value.shouldAutoCapture) {
-            return
-        }
-
        if (isProcessingFrame.compareAndSet(false, true)) {
            viewModelScope.launch {
                try {
-                    val currentOrientationStr = _state.value.orientation
-                    val silhouette = _state.value.savedMaskBitmap
-
-                    val orientationState = if (currentOrientationStr != null && silhouette != null) {
-                        val orientationEnum = mapStringToOrientation(currentOrientationStr)
-                        
-                        cameraRepository.processFrame(
-                            bitmap, 
-                            orientationEnum, 
-                            silhouette,
-                            1.55f, 
-                            focalLengthPixels
-                        )
+                    // Rotate bitmap to be upright before processing
+                    val rotatedBitmap = if (rotationDegrees != 0) {
+                        val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) }
+                        Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
                    } else {
-                        null
+                        bitmap
                    }
+
+                    // Perform Object Detection
+                    val detectionResult = aiModel.detectObject(rotatedBitmap)
+
+                    var orientationState: OrientationState? = null
+                    val requestedOrientationStr = _state.value.orientation
                    
-                    val result = aiModel.segmentImage(bitmap)
-                    if (result != null) {
-                        val (maskBitmap, _) = result
-
-                        val rotatedMask = if (rotationDegrees != 0) {
-                            val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) }
-                            Bitmap.createBitmap(
-                                maskBitmap,
-                                0,
-                                0,
-                                maskBitmap.width,
-                                maskBitmap.height,
-                                matrix,
-                                true
-                            )
-                        } else {
-                            maskBitmap
-                        }
-
-                        val output = if(_state.value.orientation == "front" || _state.value.orientation == "back")
-                            fitImageToCrop(rotatedMask, screenDims.screenWidth, screenDims.screenHeight)
-                        else
-                            fitImageToCrop(rotatedMask, screenDims.screenHeight, screenDims.screenWidth)
-
-                        _state.value = _state.value.copy(
-                            segmentationMask = output,
-                            orientationState = orientationState
-                        )
-
-                        if (_state.value.isAutoCaptureEnabled && 
-                            _state.value.savedMaskBitmap != null && 
-                            output != null
-                        ) {
-                            val isValidCapture = calculateDistance(
-                                _state.value.distanceMethod,
-                                _state.value.savedMaskBitmap!!,
-                                output,
-                                _state.value.matchThreshold
-                            )
-                            
-                            if (isValidCapture) {
-                                _state.value = _state.value.copy(shouldAutoCapture = true)
-                            }
-                        }
-                    } else {
-                        _state.value = _state.value.copy(
-                            segmentationMask = null,
-                            orientationState = orientationState
-                        )
+                    if (requestedOrientationStr != null && detectionResult != null) {
+                         // We need a silhouette bitmap for processFrame. If not available, we can pass a dummy or handle inside.
+                         // But for now, let's use the one we loaded in setContext
+                         val silhouette = _state.value.silhouetteMask
+                         
+                         if (silhouette != null) {
+                             orientationState = cameraRepository.processFrame(
+                                 bitmap = rotatedBitmap,
+                                 requestedOrientation = mapStringToOrientation(requestedOrientationStr),
+                                 silhouetteBitmap = silhouette,
+                                 realObjectHeightMeters = null, // Or some default
+                                 focalLengthPixels = focalLengthPixels,
+                                 boundingBox = detectionResult.boundingBox // Pass the bbox we just found
+                             )
+                         }
                    }
+
+                    _state.value = _state.value.copy(
+                        detectionResult = detectionResult,
+                        orientationState = orientationState, // Update state
+                        imageWidth = rotatedBitmap.width,
+                        imageHeight = rotatedBitmap.height
+                    )
+                    
+                } catch (e: Exception) {
+                    e.printStackTrace()
                } finally {
                    isProcessingFrame.set(false)
                }
@ -195,7 +166,10 @@ data class CameraUiState(
    val matchThreshold: Int = 50,
    val distanceMethod: String = "Jaccard",
    val shouldAutoCapture: Boolean = false,
-    val orientationState: OrientationState? = null
+    val orientationState: OrientationState? = null,
+    val detectionResult: ObjectDetectionResult? = null,
+    val imageWidth: Int = 0,
+    val imageHeight: Int = 0
 )

 sealed class CameraEvent {