detection

2025-12-10 11:25:53 +05:30 · 2025-12-10 11:25:53 +05:30 · f647a509d5
parent c445068773
commit f647a509d5
10 changed files with 368 additions and 256 deletions
--- a/.kotlin/sessions/kotlin-compiler-13669449669175938098.salive
+++ b/.kotlin/sessions/kotlin-compiler-13669449669175938098.salive
--- a/app/src/main/assets/labels.txt
+++ b/app/src/main/assets/labels.txt
@ -0,0 +1,81 @@
 Unknown
 person
 bicycle
 car
 motorcycle
 airplane
 bus
 train
 truck
 boat
 traffic light
 fire hydrant
 stop sign
 parking meter
 bench
 bird
 cat
 dog
 horse
 sheep
 cow
 elephant
 bear
 zebra
 giraffe
 backpack
 umbrella
 handbag
 tie
 suitcase
 frisbee
 skis
 snowboard
 sports ball
 kite
 baseball bat
 baseball glove
 skateboard
 surfboard
 tennis racket
 bottle
 wine glass
 cup
 fork
 knife
 spoon
 bowl
 banana
 apple
 sandwich
 orange
 broccoli
 carrot
 hot dog
 pizza
 donut
 cake
 chair
 couch
 potted plant
 bed
 dining table
 toilet
 tv
 laptop
 mouse
 remote
 keyboard
 cell phone
 microwave
 oven
 toaster
 sink
 refrigerator
 book
 clock
 vase
 scissors
 teddy bear
 hair drier
 toothbrush
--- a/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt
+++ b/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt
@ -1,116 +1,94 @@
 package com.example.livingai.data.ml
 import android.content.Context
 import android.graphics.Bitmap
 import android.graphics.Color
 import android.graphics.Rect
 import com.example.livingai.domain.ml.AIModel
-import com.google.mlkit.vision.common.InputImage
+import org.tensorflow.lite.Interpreter
-import com.google.mlkit.vision.segmentation.subject.SubjectSegmentation
+import org.tensorflow.lite.support.common.FileUtil
-import com.google.mlkit.vision.segmentation.subject.SubjectSegmenterOptions
+import org.tensorflow.lite.support.image.ImageProcessor
-import kotlinx.coroutines.suspendCancellableCoroutine
+import org.tensorflow.lite.support.image.TensorImage
-import kotlin.coroutines.resume
+import org.tensorflow.lite.support.image.ops.ResizeOp
-import kotlin.coroutines.resumeWithException
+import java.nio.ByteBuffer
 import java.nio.ByteOrder
-private const val MASK_COLOR = 0x5500FF00 // semi-transparent green overlay
+class AIModelImpl(private val context: Context) : AIModel {
-class AIModelImpl : AIModel {
+    private val objectDetector: Interpreter
    private val labels: List<String>
-    private val segmenter by lazy {
+    init {
-        val options = SubjectSegmenterOptions.Builder()
+        // Load the TFLite model from assets
-            .enableForegroundBitmap()
+        val modelBuffer = FileUtil.loadMappedFile(context, "efficientdet-lite0.tflite")
        val options = Interpreter.Options().apply { numThreads = 4 }
        objectDetector = Interpreter(modelBuffer, options)
        // Load labels from assets
        labels = try {
            FileUtil.loadLabels(context, "labels.txt")
        } catch (e: Exception) {
            e.printStackTrace()
            emptyList()
        }
    }
    override suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult? {
        // Preprocess the image
        val imageProcessor = ImageProcessor.Builder()
            .add(ResizeOp(320, 320, ResizeOp.ResizeMethod.BILINEAR))
            .build()
-        SubjectSegmentation.getClient(options)
+        
        var tensorImage = TensorImage.fromBitmap(bitmap)
        tensorImage = imageProcessor.process(tensorImage)
        // Prepare model inputs and outputs
        // Based on crash: [1, 25, 4] vs [1, 10, 4]. The model outputs 25 detections, not 10.
        val locations = Array(1) { Array(25) { FloatArray(4) } }
        val classes = Array(1) { FloatArray(25) }
        val scores = Array(1) { FloatArray(25) }
        val numDetections = FloatArray(1)
        val outputs = mapOf(
            0 to locations,
            1 to classes,
            2 to scores,
            3 to numDetections
        )
        // Run inference
        objectDetector.runForMultipleInputsOutputs(arrayOf(tensorImage.buffer), outputs)
        // Post-process the results
        val bestDetection = scores[0].withIndex()
            .maxByOrNull { it.value }
            ?.takeIf { it.value > 0.5f } // Confidence threshold
        if (bestDetection != null) {
            val index = bestDetection.index
            val score = bestDetection.value
            val location = locations[0][index] // [ymin, xmin, ymax, xmax]
            val labelIndex = classes[0][index].toInt()
            val label = labels.getOrElse(labelIndex) { "Unknown" }
            // Convert normalized coordinates to absolute pixel values
            val ymin = location[0] * bitmap.height
            val xmin = location[1] * bitmap.width
            val ymax = location[2] * bitmap.height
            val xmax = location[3] * bitmap.width
            val boundingBox = Rect(xmin.toInt(), ymin.toInt(), xmax.toInt(), ymax.toInt())
            return ObjectDetectionResult(boundingBox, label, score)
        }
        return null
    }
-    override fun deriveInference(bitmap: Bitmap): String = "Inference Result"
+    // This is no longer the primary function, but kept for interface compliance
    override suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>? {
-        return suspendCancellableCoroutine { cont ->
+        // Returning null as we are focusing on object detection now
-            val image = InputImage.fromBitmap(bitmap, 0)
+        return null
            segmenter.process(image)
                .addOnSuccessListener { result ->
                    val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null)
                    // Instead of coloring it here, just pass the original mask bitmap
                    // or ensure it's suitable for further processing.
                    // The foreground bitmap from MLKit is usually the object cut out with transparent background.
                    val booleanMask = createBooleanMask(fg)
                    // We return the raw foreground bitmap as the 'maskBitmap' for now, 
                    // or a colorized version if that's what UI expects. 
                    // But for IOU/Overlap calculation, we might want the binary info.
                    // The UI seems to overlay 'colorMask'. 
                    // DistanceEstimator uses 'segMaskBitmap'.
                    val colorMask = createColorizedMask(fg)
                    val bbox = computeBoundingBox(booleanMask, fg.width, fg.height)
                    // Returning colorMask as the first element because UI expects a visual overlay.
                    // But note: DistanceEstimator might need the binary mask or the foreground.
                    // If DistanceEstimator treats this bitmap as a mask, colorized is fine as long as alpha is preserved.
                    cont.resume(Triple(colorMask, booleanMask, bbox))
                }
                .addOnFailureListener { e ->
                    cont.resumeWithException(e)
                }
        }
    }
-    private fun createColorizedMask(maskBitmap: Bitmap): Bitmap {
+    override fun deriveInference(bitmap: Bitmap): String = "Object Detection"
        val w = maskBitmap.width
        val h = maskBitmap.height
        val pixels = IntArray(w * h)
        maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h)
        for (i in pixels.indices) {
            // ML Kit Foreground Bitmap: Non-transparent pixels are the object.
            if (Color.alpha(pixels[i]) > 0) {
                pixels[i] = MASK_COLOR
            }
        }
        return Bitmap.createBitmap(pixels, w, h, Bitmap.Config.ARGB_8888)
    }
    private fun createBooleanMask(bitmap: Bitmap): BooleanArray {
        val w = bitmap.width
        val h = bitmap.height
        val mask = BooleanArray(w * h)
        val pixels = IntArray(w * h)
        bitmap.getPixels(pixels, 0, w, 0, 0, w, h)
        for (i in pixels.indices) {
            mask[i] = Color.alpha(pixels[i]) > 0
        }
        return mask
    }
    private fun computeBoundingBox(mask: BooleanArray, w: Int, h: Int): Rect {
        var minX = Int.MAX_VALUE
        var minY = Int.MAX_VALUE
        var maxX = Int.MIN_VALUE
        var maxY = Int.MIN_VALUE
        for (y in 0 until h) {
            for (x in 0 until w) {
                val idx = y * w + x
                if (mask[idx]) {
                    if (x < minX) minX = x
                    if (y < minY) minY = y
                    if (x > maxX) maxX = x
                    if (y > maxY) maxY = y
                }
            }
        }
        return if (minX == Int.MAX_VALUE) {
            Rect(0, 0, 0, 0)
        } else {
            Rect(minX, minY, maxX, maxY)
        }
    }
 }
--- a/app/src/main/java/com/example/livingai/data/ml/ObjectDetectionResult.kt
+++ b/app/src/main/java/com/example/livingai/data/ml/ObjectDetectionResult.kt
@ -0,0 +1,9 @@
 package com.example.livingai.data.ml
 import android.graphics.Rect
 data class ObjectDetectionResult(
    val boundingBox: Rect,
    val label: String,
    val confidence: Float
 )
--- a/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt
+++ b/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt
@ -4,6 +4,7 @@ import android.content.ContentValues
 import android.content.Context
 import android.graphics.Bitmap
 import android.graphics.Matrix
 import android.graphics.Rect
 import android.provider.MediaStore
 import androidx.camera.core.ImageProxy
 import com.example.livingai.data.ml.DistanceEstimatorImpl
@ -48,16 +49,69 @@ class CameraRepositoryImpl(
        bitmap: Bitmap,
        requestedOrientation: Orientation,
        silhouetteBitmap: Bitmap,
-        realObjectHeightMeters: Float?,   // ★ NEW PARAM
+        realObjectHeightMeters: Float?,
-        focalLengthPixels: Float          // from camera intrinsics
+        focalLengthPixels: Float,
        boundingBox: Rect?
    ): OrientationState = withContext(Dispatchers.Default) {
        // 1. Collect segmentation
-        val meta = FrameMetadataProvider.collectMetadata(bitmap)
+        // Use the passed boundingBox if available, otherwise it relies on FrameMetadataProvider running segmentation again
-        val bbox = meta.segmentationBox
+        // But FrameMetadataProvider.collectMetadata runs segmentation internally.
-        val mask = meta.segmentationMaskBitmap
+        // To avoid re-running detection/segmentation if we already have bbox, we can pass it.
        // However, FrameMetadataProvider currently calls getSegmentation(bitmap) which calls aiModel.segmentImage(bitmap).
        // AIModel.segmentImage is returning null in current impl.
-        if (bbox == null || mask == null) {
+        // ISSUE: processFrame relies on FrameMetadataProvider.collectMetadata -> getSegmentation -> aiModel.segmentImage
        // But AIModelImpl.segmentImage returns null!
        // So bbox will be null, and processFrame returns early with "Segmentation missing".
        // FIX: We need to use the detection result we already have from CameraViewModel.
        // We will mock the segmentation result using the bounding box from object detection.
        // And for the mask, since we don't have segmentation, we can either:
        // a) Create a dummy mask filled within the bbox (simple box mask)
        // b) Or just proceed if DistanceEstimator can handle it (it needs mask).
        // Let's create a synthetic mask from the bbox.
        val syntheticMeta = if (boundingBox != null) {
            // Create a simple mask where pixels inside bbox are true
            // This is computationally expensive to do full bitmap, so be careful.
            // But we need a Bitmap mask for DistanceEstimator.
            // Let's create a black bitmap with white rect.
            // NOTE: This runs on Default dispatcher, so should be okay-ish.
            // However, FrameMetadataProvider.collectMetadata does more (IMU, Depth).
            // Let's manually construct metadata.
            val maskBitmap = Bitmap.createBitmap(bitmap.width, bitmap.height, Bitmap.Config.ARGB_8888)
            val canvas = android.graphics.Canvas(maskBitmap)
            val paint = android.graphics.Paint().apply { color = android.graphics.Color.WHITE }
            canvas.drawRect(boundingBox, paint)
            val imu = FrameMetadataProvider.getIMU()
            val rot = FrameMetadataProvider.getRotation()
            val depth = FrameMetadataProvider.getDepthData()
            FrameMetadataProvider.FrameCollectedMetadata(
                segmentationMaskBitmap = maskBitmap,
                segmentationBox = boundingBox,
                depthMeters = depth.depthMeters,
                depthWidth = depth.width,
                depthHeight = depth.height,
                depthConfidence = depth.confidence,
                pitch = imu.pitch,
                roll = imu.roll,
                yaw = imu.yaw,
                rotationDegrees = rot
            )
        } else {
             FrameMetadataProvider.collectMetadata(bitmap)
        }
        val bbox = syntheticMeta.segmentationBox
        // val mask = syntheticMeta.segmentationMaskBitmap // Mask is used inside distanceEstimator
        if (bbox == null) {
            return@withContext OrientationState(
                success = false,
                reason = "Segmentation missing",
@ -78,7 +132,7 @@ class CameraRepositoryImpl(
        )
        // 3. Build FrameData with relative depth only
-        val frameData = meta.toFrameData(bitmap).copy(
+        val frameData = syntheticMeta.toFrameData(bitmap).copy(
            medianDepth = midasResult?.relativeDepth
        )
--- a/app/src/main/java/com/example/livingai/di/AppModule.kt
+++ b/app/src/main/java/com/example/livingai/di/AppModule.kt
@ -111,7 +111,7 @@ val appModule = module {
    }
    // ML Model
-    single<AIModel> { AIModelImpl() }
+    single<AIModel> { AIModelImpl(androidContext()) }
    single<ObjectDetector> { 
        ObjectDetectorImpl(
            context = androidContext(),
--- a/app/src/main/java/com/example/livingai/domain/ml/AIModel.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/AIModel.kt
@ -2,8 +2,10 @@ package com.example.livingai.domain.ml
 import android.graphics.Bitmap
 import android.graphics.Rect
 import com.example.livingai.data.ml.ObjectDetectionResult
 interface AIModel {
    fun deriveInference(bitmap: Bitmap): String
    suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>?
    suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult?
 }
--- a/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt
+++ b/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt
@ -1,6 +1,7 @@
 package com.example.livingai.domain.repository
 import android.graphics.Bitmap
 import android.graphics.Rect
 import androidx.camera.core.ImageProxy
 import com.example.livingai.domain.ml.Orientation
 import com.example.livingai.domain.ml.OrientationState
@ -12,7 +13,8 @@ interface CameraRepository {
        requestedOrientation: Orientation,
        silhouetteBitmap: Bitmap,
        realObjectHeightMeters: Float?,
-        focalLengthPixels: Float
+        focalLengthPixels: Float,
        boundingBox: Rect? = null
    ): OrientationState
    suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String
 }
--- a/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt
+++ b/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt
@ -7,10 +7,14 @@ import androidx.camera.core.ImageProxy
 import androidx.camera.view.LifecycleCameraController
 import androidx.compose.foundation.Image
 import androidx.compose.foundation.background
 import androidx.compose.foundation.border
 import androidx.compose.foundation.layout.Box
 import androidx.compose.foundation.layout.BoxWithConstraints
 import androidx.compose.foundation.layout.Column
 import androidx.compose.foundation.layout.fillMaxSize
 import androidx.compose.foundation.layout.offset
 import androidx.compose.foundation.layout.padding
 import androidx.compose.foundation.layout.size
 import androidx.compose.foundation.shape.RoundedCornerShape
 import androidx.compose.material.icons.Icons
 import androidx.compose.material.icons.filled.Camera
@ -18,7 +22,6 @@ import androidx.compose.material3.CircularProgressIndicator
 import androidx.compose.material3.FabPosition
 import androidx.compose.material3.FloatingActionButton
 import androidx.compose.material3.Icon
 import androidx.compose.material3.MaterialTheme
 import androidx.compose.material3.Scaffold
 import androidx.compose.material3.Text
 import androidx.compose.runtime.Composable
@ -32,6 +35,7 @@ import androidx.compose.ui.graphics.Color
 import androidx.compose.ui.graphics.asImageBitmap
 import androidx.compose.ui.layout.ContentScale
 import androidx.compose.ui.platform.LocalContext
 import androidx.compose.ui.platform.LocalDensity
 import androidx.compose.ui.unit.dp
 import androidx.navigation.NavController
 import androidx.core.content.ContextCompat
@ -67,6 +71,7 @@ fun CameraScreen(
    PermissionWrapper {
        val state by viewModel.state.collectAsState()
        val context = LocalContext.current
        val density = LocalDensity.current
        val controller = remember {
            LifecycleCameraController(context).apply {
@ -83,31 +88,14 @@ fun CameraScreen(
                        viewModel.onEvent(CameraEvent.ImageCaptured(image))
                    }
-                    override fun onError(exception: ImageCaptureException) {
+                    override fun onError(exception: ImageCaptureException) {}
                        // Handle error, e.g., log it or show a message
                    }
                }
            )
        }
        LaunchedEffect(state.shouldAutoCapture) {
            if (state.shouldAutoCapture) {
                takePhoto()
                viewModel.onEvent(CameraEvent.AutoCaptureTriggered)
            }
        }
        LaunchedEffect(state.capturedImageUri) {
            state.capturedImageUri?.let {
-                navController.navigate(
+                navController.navigate(Route.ViewImageScreen(it.toString(), true, orientation, true, false, animalId))
                    Route.ViewImageScreen(
                        imageUri = it.toString(),
                        shouldAllowRetake = true,
                        showAccept = true,
                        orientation = orientation,
                        animalId = animalId
                    )
                )
                viewModel.onEvent(CameraEvent.ClearCapturedImage)
            }
        }
@ -120,71 +108,95 @@ fun CameraScreen(
            },
            floatingActionButtonPosition = FabPosition.Center
        ) { paddingValues ->
-            Box(
+            BoxWithConstraints(modifier = Modifier.fillMaxSize().padding(paddingValues)) {
-                modifier = Modifier.fillMaxSize(),
+                val screenWidth = maxWidth
-            ) {
+                val screenHeight = maxHeight
-                Box(
+
-                    modifier = Modifier.fillMaxSize()
+                CameraPreview(
-                ) {
+                    modifier = Modifier.fillMaxSize(),
-                    CameraPreview(
+                    controller = controller,
-                        modifier = Modifier.fillMaxSize(),
+                    onFrame = { bitmap, rotation, fxPixels ->
-                        controller = controller,
+                        viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
-                        onFrame = { bitmap, rotation, fxPixels ->
+                    }
-                            viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
+                )
-                        }
+
                state.detectionResult?.let { detection ->
                    val imageWidth = state.imageWidth.toFloat()
                    val imageHeight = state.imageHeight.toFloat()
                    if (imageWidth == 0f || imageHeight == 0f) return@let
                    val screenW = with(density) { screenWidth.toPx() }
                    val screenH = with(density) { screenHeight.toPx() }
                    val scaleX = screenW / imageWidth
                    val scaleY = screenH / imageHeight
                    val scale = maxOf(scaleX, scaleY) // For FILL_CENTER behavior
                    val offsetX = (screenW - imageWidth * scale) / 2f
                    val offsetY = (screenH - imageHeight * scale) / 2f
                    val bbox = detection.boundingBox
                    val left = bbox.left * scale + offsetX
                    val top = bbox.top * scale + offsetY
                    val leftDp = with(density) { left.toDp() }
                    val topDp = with(density) { top.toDp() }
                    val widthDp = with(density) { (bbox.width() * scale).toDp() }
                    val heightDp = with(density) { (bbox.height() * scale).toDp() }
                    Box(
                        modifier = Modifier
                            .offset(x = leftDp, y = topDp)
                            .size(width = widthDp, height = heightDp)
                            .border(2.dp, Color.Yellow)
                    )
-                    // The ML segmentation mask
+                    // Overlay 1: Object Label & Confidence (Above the box)
-                    state.segmentationMask?.let { mask ->
+                    Column(
-                        Image(
+                        modifier = Modifier
-                            bitmap = mask.asImageBitmap(),
+                            .offset(x = leftDp, y = topDp - 25.dp)
-                            contentDescription = "Segmentation Overlay",
+                            .background(Color.Black.copy(alpha = 0.7f))
-                            modifier = Modifier.fillMaxSize(),
+                            .padding(4.dp)
-                            contentScale = ContentScale.FillBounds,
+                    ) {
-                            alpha = 0.5f
+                        Text(
-                        )
+                            text = "${detection.label} (${(detection.confidence * 100).toInt()}%)",
-                    }
+                            color = Color.White
                    state.silhouetteMask?.let {
                        Image(
                            bitmap = it.asImageBitmap(),
                            contentDescription = "Silhouette Overlay",
                            modifier = Modifier.fillMaxSize(),
                            contentScale = ContentScale.Fit,
                            alpha = 0.4f
                        )
                    }
                }
-                // Debug Overlay
+                // Overlay 2: Fixed top-right corner info
                state.orientationState?.let { orient ->
-                    Box(
+                    Column(
                        modifier = Modifier
                            .align(Alignment.TopEnd)
                            .padding(16.dp)
-                            .background(Color.Black.copy(alpha = 0.5f), RoundedCornerShape(8.dp))
+                            .background(Color.Black.copy(alpha = 0.7f), shape = RoundedCornerShape(8.dp))
                            .padding(8.dp)
                    ) {
-                        Column {
+                        if (orient.relativeDepth != null) {
-                            Text("Success: ${orient.success}", color = Color.White)
+                            Text(
-                            Text("Reason: ${orient.reason}", color = Color.White)
+                                text = "Rel Depth: %.2f".format(orient.relativeDepth),
-                            
+                                color = Color.Cyan
-                            orient.pixelMetrics?.let { pm ->
+                            )
-                                Text("Width (px): ${pm.widthPx}", color = Color.White)
+                        }
-                                Text("Height (px): ${pm.heightPx}", color = Color.White)
+                        if (orient.absoluteDistanceMeters != null) {
-                            }
+                            Text(
-                            
+                                text = "Dist: %.2fm".format(orient.absoluteDistanceMeters),
-                            // Display depth metrics from OrientationState
+                                color = Color.Green
-                            orient.relativeDepth?.let { rel ->
+                            )
-                                Text("Rel Depth: %.4f".format(rel), color = Color.White)
+                        }
-                            }
+                        if (orient.iouScore != null) {
-                            
+                            Text(
-                            orient.absoluteDistanceMeters?.let { abs ->
+                                text = "IoU: %.2f".format(orient.iouScore),
-                                Text("Dist (m): %.2f".format(abs), color = Color.White)
+                                color = Color.Yellow
-                            }
+                            )
-
+                        }
-                            Text("IOU: ${orient.iouScore}", color = Color.White)
+                        orient.pixelMetrics?.let { metrics ->
-                            Text("Matched: ${orient.orientationMatched}", color = Color.White)
+                            Text(
                                text = "W: ${metrics.widthPx}px H: ${metrics.heightPx}px",
                                color = Color.White
                            )
                        }
                    }
                }
--- a/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt
+++ b/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt
@ -6,6 +6,7 @@ import android.net.Uri
 import androidx.camera.core.ImageProxy
 import androidx.lifecycle.ViewModel
 import androidx.lifecycle.viewModelScope
 import com.example.livingai.data.ml.ObjectDetectionResult
 import com.example.livingai.domain.ml.AIModel
 import com.example.livingai.domain.ml.Orientation
 import com.example.livingai.domain.ml.OrientationState
@ -72,7 +73,8 @@ class CameraViewModel(
    private fun clearCaptured() {
        _state.value = _state.value.copy(
            capturedImageUri = null,
-            segmentationMask = null
+            segmentationMask = null,
            detectionResult = null // Clear detection result as well
        )
    }
@ -89,80 +91,49 @@ class CameraViewModel(
    }
    private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) {
        if (_state.value.isCapturing || _state.value.shouldAutoCapture) {
            return
        }
        if (isProcessingFrame.compareAndSet(false, true)) {
            viewModelScope.launch {
                try {
-                    val currentOrientationStr = _state.value.orientation
+                    // Rotate bitmap to be upright before processing
-                    val silhouette = _state.value.savedMaskBitmap
+                    val rotatedBitmap = if (rotationDegrees != 0) {
-
+                        val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) }
-                    val orientationState = if (currentOrientationStr != null && silhouette != null) {
+                        Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
                        val orientationEnum = mapStringToOrientation(currentOrientationStr)
                        cameraRepository.processFrame(
                            bitmap, 
                            orientationEnum, 
                            silhouette,
                            1.55f, 
                            focalLengthPixels
                        )
                    } else {
-                        null
+                        bitmap
                    }
-                    val result = aiModel.segmentImage(bitmap)
+                    // Perform Object Detection
-                    if (result != null) {
+                    val detectionResult = aiModel.detectObject(rotatedBitmap)
                        val (maskBitmap, _) = result
-                        val rotatedMask = if (rotationDegrees != 0) {
+                    var orientationState: OrientationState? = null
-                            val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) }
+                    val requestedOrientationStr = _state.value.orientation
                            Bitmap.createBitmap(
                                maskBitmap,
                                0,
                                0,
                                maskBitmap.width,
                                maskBitmap.height,
                                matrix,
                                true
                            )
                        } else {
                            maskBitmap
                        }
-                        val output = if(_state.value.orientation == "front" || _state.value.orientation == "back")
+                    if (requestedOrientationStr != null && detectionResult != null) {
-                            fitImageToCrop(rotatedMask, screenDims.screenWidth, screenDims.screenHeight)
+                         // We need a silhouette bitmap for processFrame. If not available, we can pass a dummy or handle inside.
-                        else
+                         // But for now, let's use the one we loaded in setContext
-                            fitImageToCrop(rotatedMask, screenDims.screenHeight, screenDims.screenWidth)
+                         val silhouette = _state.value.silhouetteMask
-                        _state.value = _state.value.copy(
+                         if (silhouette != null) {
-                            segmentationMask = output,
+                             orientationState = cameraRepository.processFrame(
-                            orientationState = orientationState
+                                 bitmap = rotatedBitmap,
-                        )
+                                 requestedOrientation = mapStringToOrientation(requestedOrientationStr),
-
+                                 silhouetteBitmap = silhouette,
-                        if (_state.value.isAutoCaptureEnabled && 
+                                 realObjectHeightMeters = null, // Or some default
-                            _state.value.savedMaskBitmap != null && 
+                                 focalLengthPixels = focalLengthPixels,
-                            output != null
+                                 boundingBox = detectionResult.boundingBox // Pass the bbox we just found
-                        ) {
+                             )
-                            val isValidCapture = calculateDistance(
+                         }
                                _state.value.distanceMethod,
                                _state.value.savedMaskBitmap!!,
                                output,
                                _state.value.matchThreshold
                            )
                            if (isValidCapture) {
                                _state.value = _state.value.copy(shouldAutoCapture = true)
                            }
                        }
                    } else {
                        _state.value = _state.value.copy(
                            segmentationMask = null,
                            orientationState = orientationState
                        )
                    }
                    _state.value = _state.value.copy(
                        detectionResult = detectionResult,
                        orientationState = orientationState, // Update state
                        imageWidth = rotatedBitmap.width,
                        imageHeight = rotatedBitmap.height
                    )
                } catch (e: Exception) {
                    e.printStackTrace()
                } finally {
                    isProcessingFrame.set(false)
                }
@ -195,7 +166,10 @@ data class CameraUiState(
    val matchThreshold: Int = 50,
    val distanceMethod: String = "Jaccard",
    val shouldAutoCapture: Boolean = false,
-    val orientationState: OrientationState? = null
+    val orientationState: OrientationState? = null,
    val detectionResult: ObjectDetectionResult? = null,
    val imageWidth: Int = 0,
    val imageHeight: Int = 0
 )
 sealed class CameraEvent {