diff --git a/.kotlin/sessions/kotlin-compiler-1168907591040633302.salive b/.kotlin/sessions/kotlin-compiler-13669449669175938098.salive similarity index 100% rename from .kotlin/sessions/kotlin-compiler-1168907591040633302.salive rename to .kotlin/sessions/kotlin-compiler-13669449669175938098.salive diff --git a/app/src/main/assets/labels.txt b/app/src/main/assets/labels.txt new file mode 100644 index 0000000..8625fda --- /dev/null +++ b/app/src/main/assets/labels.txt @@ -0,0 +1,81 @@ +Unknown +person +bicycle +car +motorcycle +airplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +couch +potted plant +bed +dining table +toilet +tv +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt b/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt index 9cdd94d..7054311 100644 --- a/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt +++ b/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt @@ -1,116 +1,94 @@ package com.example.livingai.data.ml +import android.content.Context import android.graphics.Bitmap -import android.graphics.Color import android.graphics.Rect import com.example.livingai.domain.ml.AIModel -import com.google.mlkit.vision.common.InputImage -import com.google.mlkit.vision.segmentation.subject.SubjectSegmentation -import com.google.mlkit.vision.segmentation.subject.SubjectSegmenterOptions -import kotlinx.coroutines.suspendCancellableCoroutine -import kotlin.coroutines.resume -import kotlin.coroutines.resumeWithException +import org.tensorflow.lite.Interpreter +import org.tensorflow.lite.support.common.FileUtil +import org.tensorflow.lite.support.image.ImageProcessor +import org.tensorflow.lite.support.image.TensorImage +import org.tensorflow.lite.support.image.ops.ResizeOp +import java.nio.ByteBuffer +import java.nio.ByteOrder -private const val MASK_COLOR = 0x5500FF00 // semi-transparent green overlay +class AIModelImpl(private val context: Context) : AIModel { -class AIModelImpl : AIModel { + private val objectDetector: Interpreter + private val labels: List - private val segmenter by lazy { - val options = SubjectSegmenterOptions.Builder() - .enableForegroundBitmap() + init { + // Load the TFLite model from assets + val modelBuffer = FileUtil.loadMappedFile(context, "efficientdet-lite0.tflite") + val options = Interpreter.Options().apply { numThreads = 4 } + objectDetector = Interpreter(modelBuffer, options) + + // Load labels from assets + labels = try { + FileUtil.loadLabels(context, "labels.txt") + } catch (e: Exception) { + e.printStackTrace() + emptyList() + } + } + + override suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult? { + // Preprocess the image + val imageProcessor = ImageProcessor.Builder() + .add(ResizeOp(320, 320, ResizeOp.ResizeMethod.BILINEAR)) .build() - SubjectSegmentation.getClient(options) + + var tensorImage = TensorImage.fromBitmap(bitmap) + tensorImage = imageProcessor.process(tensorImage) + + // Prepare model inputs and outputs + // Based on crash: [1, 25, 4] vs [1, 10, 4]. The model outputs 25 detections, not 10. + val locations = Array(1) { Array(25) { FloatArray(4) } } + val classes = Array(1) { FloatArray(25) } + val scores = Array(1) { FloatArray(25) } + val numDetections = FloatArray(1) + + val outputs = mapOf( + 0 to locations, + 1 to classes, + 2 to scores, + 3 to numDetections + ) + + // Run inference + objectDetector.runForMultipleInputsOutputs(arrayOf(tensorImage.buffer), outputs) + + // Post-process the results + val bestDetection = scores[0].withIndex() + .maxByOrNull { it.value } + ?.takeIf { it.value > 0.5f } // Confidence threshold + + if (bestDetection != null) { + val index = bestDetection.index + val score = bestDetection.value + val location = locations[0][index] // [ymin, xmin, ymax, xmax] + val labelIndex = classes[0][index].toInt() + val label = labels.getOrElse(labelIndex) { "Unknown" } + + // Convert normalized coordinates to absolute pixel values + val ymin = location[0] * bitmap.height + val xmin = location[1] * bitmap.width + val ymax = location[2] * bitmap.height + val xmax = location[3] * bitmap.width + + val boundingBox = Rect(xmin.toInt(), ymin.toInt(), xmax.toInt(), ymax.toInt()) + + return ObjectDetectionResult(boundingBox, label, score) + } + + return null } - - override fun deriveInference(bitmap: Bitmap): String = "Inference Result" - + + // This is no longer the primary function, but kept for interface compliance override suspend fun segmentImage(bitmap: Bitmap): Triple? { - return suspendCancellableCoroutine { cont -> - val image = InputImage.fromBitmap(bitmap, 0) - - segmenter.process(image) - .addOnSuccessListener { result -> - val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null) - - // Instead of coloring it here, just pass the original mask bitmap - // or ensure it's suitable for further processing. - // The foreground bitmap from MLKit is usually the object cut out with transparent background. - - val booleanMask = createBooleanMask(fg) - // We return the raw foreground bitmap as the 'maskBitmap' for now, - // or a colorized version if that's what UI expects. - // But for IOU/Overlap calculation, we might want the binary info. - // The UI seems to overlay 'colorMask'. - // DistanceEstimator uses 'segMaskBitmap'. - - val colorMask = createColorizedMask(fg) - val bbox = computeBoundingBox(booleanMask, fg.width, fg.height) - - // Returning colorMask as the first element because UI expects a visual overlay. - // But note: DistanceEstimator might need the binary mask or the foreground. - // If DistanceEstimator treats this bitmap as a mask, colorized is fine as long as alpha is preserved. - cont.resume(Triple(colorMask, booleanMask, bbox)) - } - .addOnFailureListener { e -> - cont.resumeWithException(e) - } - } + // Returning null as we are focusing on object detection now + return null } - private fun createColorizedMask(maskBitmap: Bitmap): Bitmap { - val w = maskBitmap.width - val h = maskBitmap.height - val pixels = IntArray(w * h) - - maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h) - - for (i in pixels.indices) { - // ML Kit Foreground Bitmap: Non-transparent pixels are the object. - if (Color.alpha(pixels[i]) > 0) { - pixels[i] = MASK_COLOR - } - } - - return Bitmap.createBitmap(pixels, w, h, Bitmap.Config.ARGB_8888) - } - - private fun createBooleanMask(bitmap: Bitmap): BooleanArray { - val w = bitmap.width - val h = bitmap.height - val mask = BooleanArray(w * h) - val pixels = IntArray(w * h) - - bitmap.getPixels(pixels, 0, w, 0, 0, w, h) - - for (i in pixels.indices) { - mask[i] = Color.alpha(pixels[i]) > 0 - } - - return mask - } - - private fun computeBoundingBox(mask: BooleanArray, w: Int, h: Int): Rect { - var minX = Int.MAX_VALUE - var minY = Int.MAX_VALUE - var maxX = Int.MIN_VALUE - var maxY = Int.MIN_VALUE - - for (y in 0 until h) { - for (x in 0 until w) { - val idx = y * w + x - if (mask[idx]) { - if (x < minX) minX = x - if (y < minY) minY = y - if (x > maxX) maxX = x - if (y > maxY) maxY = y - } - } - } - - return if (minX == Int.MAX_VALUE) { - Rect(0, 0, 0, 0) - } else { - Rect(minX, minY, maxX, maxY) - } - } + override fun deriveInference(bitmap: Bitmap): String = "Object Detection" } diff --git a/app/src/main/java/com/example/livingai/data/ml/ObjectDetectionResult.kt b/app/src/main/java/com/example/livingai/data/ml/ObjectDetectionResult.kt new file mode 100644 index 0000000..b040ff8 --- /dev/null +++ b/app/src/main/java/com/example/livingai/data/ml/ObjectDetectionResult.kt @@ -0,0 +1,9 @@ +package com.example.livingai.data.ml + +import android.graphics.Rect + +data class ObjectDetectionResult( + val boundingBox: Rect, + val label: String, + val confidence: Float +) diff --git a/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt b/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt index 3d47777..696d6c9 100644 --- a/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt +++ b/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt @@ -4,6 +4,7 @@ import android.content.ContentValues import android.content.Context import android.graphics.Bitmap import android.graphics.Matrix +import android.graphics.Rect import android.provider.MediaStore import androidx.camera.core.ImageProxy import com.example.livingai.data.ml.DistanceEstimatorImpl @@ -48,16 +49,69 @@ class CameraRepositoryImpl( bitmap: Bitmap, requestedOrientation: Orientation, silhouetteBitmap: Bitmap, - realObjectHeightMeters: Float?, // ★ NEW PARAM - focalLengthPixels: Float // from camera intrinsics + realObjectHeightMeters: Float?, + focalLengthPixels: Float, + boundingBox: Rect? ): OrientationState = withContext(Dispatchers.Default) { // 1. Collect segmentation - val meta = FrameMetadataProvider.collectMetadata(bitmap) - val bbox = meta.segmentationBox - val mask = meta.segmentationMaskBitmap + // Use the passed boundingBox if available, otherwise it relies on FrameMetadataProvider running segmentation again + // But FrameMetadataProvider.collectMetadata runs segmentation internally. + // To avoid re-running detection/segmentation if we already have bbox, we can pass it. + // However, FrameMetadataProvider currently calls getSegmentation(bitmap) which calls aiModel.segmentImage(bitmap). + // AIModel.segmentImage is returning null in current impl. + + // ISSUE: processFrame relies on FrameMetadataProvider.collectMetadata -> getSegmentation -> aiModel.segmentImage + // But AIModelImpl.segmentImage returns null! + // So bbox will be null, and processFrame returns early with "Segmentation missing". + + // FIX: We need to use the detection result we already have from CameraViewModel. + // We will mock the segmentation result using the bounding box from object detection. + // And for the mask, since we don't have segmentation, we can either: + // a) Create a dummy mask filled within the bbox (simple box mask) + // b) Or just proceed if DistanceEstimator can handle it (it needs mask). + + // Let's create a synthetic mask from the bbox. + val syntheticMeta = if (boundingBox != null) { + // Create a simple mask where pixels inside bbox are true + // This is computationally expensive to do full bitmap, so be careful. + // But we need a Bitmap mask for DistanceEstimator. + // Let's create a black bitmap with white rect. + + // NOTE: This runs on Default dispatcher, so should be okay-ish. + + // However, FrameMetadataProvider.collectMetadata does more (IMU, Depth). + // Let's manually construct metadata. + + val maskBitmap = Bitmap.createBitmap(bitmap.width, bitmap.height, Bitmap.Config.ARGB_8888) + val canvas = android.graphics.Canvas(maskBitmap) + val paint = android.graphics.Paint().apply { color = android.graphics.Color.WHITE } + canvas.drawRect(boundingBox, paint) + + val imu = FrameMetadataProvider.getIMU() + val rot = FrameMetadataProvider.getRotation() + val depth = FrameMetadataProvider.getDepthData() + + FrameMetadataProvider.FrameCollectedMetadata( + segmentationMaskBitmap = maskBitmap, + segmentationBox = boundingBox, + depthMeters = depth.depthMeters, + depthWidth = depth.width, + depthHeight = depth.height, + depthConfidence = depth.confidence, + pitch = imu.pitch, + roll = imu.roll, + yaw = imu.yaw, + rotationDegrees = rot + ) + } else { + FrameMetadataProvider.collectMetadata(bitmap) + } - if (bbox == null || mask == null) { + val bbox = syntheticMeta.segmentationBox + // val mask = syntheticMeta.segmentationMaskBitmap // Mask is used inside distanceEstimator + + if (bbox == null) { return@withContext OrientationState( success = false, reason = "Segmentation missing", @@ -78,7 +132,7 @@ class CameraRepositoryImpl( ) // 3. Build FrameData with relative depth only - val frameData = meta.toFrameData(bitmap).copy( + val frameData = syntheticMeta.toFrameData(bitmap).copy( medianDepth = midasResult?.relativeDepth ) @@ -136,4 +190,4 @@ class CameraRepositoryImpl( uri.toString() } -} \ No newline at end of file +} diff --git a/app/src/main/java/com/example/livingai/di/AppModule.kt b/app/src/main/java/com/example/livingai/di/AppModule.kt index 36d14ef..6a52fee 100644 --- a/app/src/main/java/com/example/livingai/di/AppModule.kt +++ b/app/src/main/java/com/example/livingai/di/AppModule.kt @@ -111,7 +111,7 @@ val appModule = module { } // ML Model - single { AIModelImpl() } + single { AIModelImpl(androidContext()) } single { ObjectDetectorImpl( context = androidContext(), diff --git a/app/src/main/java/com/example/livingai/domain/ml/AIModel.kt b/app/src/main/java/com/example/livingai/domain/ml/AIModel.kt index 47a7d0f..2f97936 100644 --- a/app/src/main/java/com/example/livingai/domain/ml/AIModel.kt +++ b/app/src/main/java/com/example/livingai/domain/ml/AIModel.kt @@ -2,8 +2,10 @@ package com.example.livingai.domain.ml import android.graphics.Bitmap import android.graphics.Rect +import com.example.livingai.data.ml.ObjectDetectionResult interface AIModel { fun deriveInference(bitmap: Bitmap): String suspend fun segmentImage(bitmap: Bitmap): Triple? -} \ No newline at end of file + suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult? +} diff --git a/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt b/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt index 14dc736..7bc4c0e 100644 --- a/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt +++ b/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt @@ -1,6 +1,7 @@ package com.example.livingai.domain.repository import android.graphics.Bitmap +import android.graphics.Rect import androidx.camera.core.ImageProxy import com.example.livingai.domain.ml.Orientation import com.example.livingai.domain.ml.OrientationState @@ -12,7 +13,8 @@ interface CameraRepository { requestedOrientation: Orientation, silhouetteBitmap: Bitmap, realObjectHeightMeters: Float?, - focalLengthPixels: Float + focalLengthPixels: Float, + boundingBox: Rect? = null ): OrientationState suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String } diff --git a/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt b/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt index ec915e9..90ca70b 100644 --- a/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt +++ b/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt @@ -7,10 +7,14 @@ import androidx.camera.core.ImageProxy import androidx.camera.view.LifecycleCameraController import androidx.compose.foundation.Image import androidx.compose.foundation.background +import androidx.compose.foundation.border import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.BoxWithConstraints import androidx.compose.foundation.layout.Column import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.offset import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.layout.size import androidx.compose.foundation.shape.RoundedCornerShape import androidx.compose.material.icons.Icons import androidx.compose.material.icons.filled.Camera @@ -18,7 +22,6 @@ import androidx.compose.material3.CircularProgressIndicator import androidx.compose.material3.FabPosition import androidx.compose.material3.FloatingActionButton import androidx.compose.material3.Icon -import androidx.compose.material3.MaterialTheme import androidx.compose.material3.Scaffold import androidx.compose.material3.Text import androidx.compose.runtime.Composable @@ -32,6 +35,7 @@ import androidx.compose.ui.graphics.Color import androidx.compose.ui.graphics.asImageBitmap import androidx.compose.ui.layout.ContentScale import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.platform.LocalDensity import androidx.compose.ui.unit.dp import androidx.navigation.NavController import androidx.core.content.ContextCompat @@ -67,6 +71,7 @@ fun CameraScreen( PermissionWrapper { val state by viewModel.state.collectAsState() val context = LocalContext.current + val density = LocalDensity.current val controller = remember { LifecycleCameraController(context).apply { @@ -83,31 +88,14 @@ fun CameraScreen( viewModel.onEvent(CameraEvent.ImageCaptured(image)) } - override fun onError(exception: ImageCaptureException) { - // Handle error, e.g., log it or show a message - } + override fun onError(exception: ImageCaptureException) {} } ) } - LaunchedEffect(state.shouldAutoCapture) { - if (state.shouldAutoCapture) { - takePhoto() - viewModel.onEvent(CameraEvent.AutoCaptureTriggered) - } - } - LaunchedEffect(state.capturedImageUri) { state.capturedImageUri?.let { - navController.navigate( - Route.ViewImageScreen( - imageUri = it.toString(), - shouldAllowRetake = true, - showAccept = true, - orientation = orientation, - animalId = animalId - ) - ) + navController.navigate(Route.ViewImageScreen(it.toString(), true, orientation, true, false, animalId)) viewModel.onEvent(CameraEvent.ClearCapturedImage) } } @@ -120,71 +108,95 @@ fun CameraScreen( }, floatingActionButtonPosition = FabPosition.Center ) { paddingValues -> - Box( - modifier = Modifier.fillMaxSize(), - ) { - Box( - modifier = Modifier.fillMaxSize() - ) { - CameraPreview( - modifier = Modifier.fillMaxSize(), - controller = controller, - onFrame = { bitmap, rotation, fxPixels -> - viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels)) - } + BoxWithConstraints(modifier = Modifier.fillMaxSize().padding(paddingValues)) { + val screenWidth = maxWidth + val screenHeight = maxHeight + + CameraPreview( + modifier = Modifier.fillMaxSize(), + controller = controller, + onFrame = { bitmap, rotation, fxPixels -> + viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels)) + } + ) + + state.detectionResult?.let { detection -> + val imageWidth = state.imageWidth.toFloat() + val imageHeight = state.imageHeight.toFloat() + if (imageWidth == 0f || imageHeight == 0f) return@let + + val screenW = with(density) { screenWidth.toPx() } + val screenH = with(density) { screenHeight.toPx() } + + val scaleX = screenW / imageWidth + val scaleY = screenH / imageHeight + val scale = maxOf(scaleX, scaleY) // For FILL_CENTER behavior + + val offsetX = (screenW - imageWidth * scale) / 2f + val offsetY = (screenH - imageHeight * scale) / 2f + + val bbox = detection.boundingBox + val left = bbox.left * scale + offsetX + val top = bbox.top * scale + offsetY + + val leftDp = with(density) { left.toDp() } + val topDp = with(density) { top.toDp() } + val widthDp = with(density) { (bbox.width() * scale).toDp() } + val heightDp = with(density) { (bbox.height() * scale).toDp() } + + Box( + modifier = Modifier + .offset(x = leftDp, y = topDp) + .size(width = widthDp, height = heightDp) + .border(2.dp, Color.Yellow) ) - // The ML segmentation mask - state.segmentationMask?.let { mask -> - Image( - bitmap = mask.asImageBitmap(), - contentDescription = "Segmentation Overlay", - modifier = Modifier.fillMaxSize(), - contentScale = ContentScale.FillBounds, - alpha = 0.5f - ) - } - - state.silhouetteMask?.let { - Image( - bitmap = it.asImageBitmap(), - contentDescription = "Silhouette Overlay", - modifier = Modifier.fillMaxSize(), - contentScale = ContentScale.Fit, - alpha = 0.4f + // Overlay 1: Object Label & Confidence (Above the box) + Column( + modifier = Modifier + .offset(x = leftDp, y = topDp - 25.dp) + .background(Color.Black.copy(alpha = 0.7f)) + .padding(4.dp) + ) { + Text( + text = "${detection.label} (${(detection.confidence * 100).toInt()}%)", + color = Color.White ) } } - - // Debug Overlay + + // Overlay 2: Fixed top-right corner info state.orientationState?.let { orient -> - Box( + Column( modifier = Modifier .align(Alignment.TopEnd) .padding(16.dp) - .background(Color.Black.copy(alpha = 0.5f), RoundedCornerShape(8.dp)) + .background(Color.Black.copy(alpha = 0.7f), shape = RoundedCornerShape(8.dp)) .padding(8.dp) ) { - Column { - Text("Success: ${orient.success}", color = Color.White) - Text("Reason: ${orient.reason}", color = Color.White) - - orient.pixelMetrics?.let { pm -> - Text("Width (px): ${pm.widthPx}", color = Color.White) - Text("Height (px): ${pm.heightPx}", color = Color.White) - } - - // Display depth metrics from OrientationState - orient.relativeDepth?.let { rel -> - Text("Rel Depth: %.4f".format(rel), color = Color.White) - } - - orient.absoluteDistanceMeters?.let { abs -> - Text("Dist (m): %.2f".format(abs), color = Color.White) - } - - Text("IOU: ${orient.iouScore}", color = Color.White) - Text("Matched: ${orient.orientationMatched}", color = Color.White) + if (orient.relativeDepth != null) { + Text( + text = "Rel Depth: %.2f".format(orient.relativeDepth), + color = Color.Cyan + ) + } + if (orient.absoluteDistanceMeters != null) { + Text( + text = "Dist: %.2fm".format(orient.absoluteDistanceMeters), + color = Color.Green + ) + } + if (orient.iouScore != null) { + Text( + text = "IoU: %.2f".format(orient.iouScore), + color = Color.Yellow + ) + } + orient.pixelMetrics?.let { metrics -> + Text( + text = "W: ${metrics.widthPx}px H: ${metrics.heightPx}px", + color = Color.White + ) } } } diff --git a/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt b/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt index 41212d0..4eeb041 100644 --- a/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt +++ b/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt @@ -6,6 +6,7 @@ import android.net.Uri import androidx.camera.core.ImageProxy import androidx.lifecycle.ViewModel import androidx.lifecycle.viewModelScope +import com.example.livingai.data.ml.ObjectDetectionResult import com.example.livingai.domain.ml.AIModel import com.example.livingai.domain.ml.Orientation import com.example.livingai.domain.ml.OrientationState @@ -72,7 +73,8 @@ class CameraViewModel( private fun clearCaptured() { _state.value = _state.value.copy( capturedImageUri = null, - segmentationMask = null + segmentationMask = null, + detectionResult = null // Clear detection result as well ) } @@ -89,80 +91,49 @@ class CameraViewModel( } private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) { - if (_state.value.isCapturing || _state.value.shouldAutoCapture) { - return - } - if (isProcessingFrame.compareAndSet(false, true)) { viewModelScope.launch { try { - val currentOrientationStr = _state.value.orientation - val silhouette = _state.value.savedMaskBitmap - - val orientationState = if (currentOrientationStr != null && silhouette != null) { - val orientationEnum = mapStringToOrientation(currentOrientationStr) - - cameraRepository.processFrame( - bitmap, - orientationEnum, - silhouette, - 1.55f, - focalLengthPixels - ) + // Rotate bitmap to be upright before processing + val rotatedBitmap = if (rotationDegrees != 0) { + val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) } + Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true) } else { - null + bitmap } + + // Perform Object Detection + val detectionResult = aiModel.detectObject(rotatedBitmap) + + var orientationState: OrientationState? = null + val requestedOrientationStr = _state.value.orientation - val result = aiModel.segmentImage(bitmap) - if (result != null) { - val (maskBitmap, _) = result - - val rotatedMask = if (rotationDegrees != 0) { - val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) } - Bitmap.createBitmap( - maskBitmap, - 0, - 0, - maskBitmap.width, - maskBitmap.height, - matrix, - true - ) - } else { - maskBitmap - } - - val output = if(_state.value.orientation == "front" || _state.value.orientation == "back") - fitImageToCrop(rotatedMask, screenDims.screenWidth, screenDims.screenHeight) - else - fitImageToCrop(rotatedMask, screenDims.screenHeight, screenDims.screenWidth) - - _state.value = _state.value.copy( - segmentationMask = output, - orientationState = orientationState - ) - - if (_state.value.isAutoCaptureEnabled && - _state.value.savedMaskBitmap != null && - output != null - ) { - val isValidCapture = calculateDistance( - _state.value.distanceMethod, - _state.value.savedMaskBitmap!!, - output, - _state.value.matchThreshold - ) - - if (isValidCapture) { - _state.value = _state.value.copy(shouldAutoCapture = true) - } - } - } else { - _state.value = _state.value.copy( - segmentationMask = null, - orientationState = orientationState - ) + if (requestedOrientationStr != null && detectionResult != null) { + // We need a silhouette bitmap for processFrame. If not available, we can pass a dummy or handle inside. + // But for now, let's use the one we loaded in setContext + val silhouette = _state.value.silhouetteMask + + if (silhouette != null) { + orientationState = cameraRepository.processFrame( + bitmap = rotatedBitmap, + requestedOrientation = mapStringToOrientation(requestedOrientationStr), + silhouetteBitmap = silhouette, + realObjectHeightMeters = null, // Or some default + focalLengthPixels = focalLengthPixels, + boundingBox = detectionResult.boundingBox // Pass the bbox we just found + ) + } } + + _state.value = _state.value.copy( + detectionResult = detectionResult, + orientationState = orientationState, // Update state + imageWidth = rotatedBitmap.width, + imageHeight = rotatedBitmap.height + ) + + } catch (e: Exception) { + e.printStackTrace() } finally { isProcessingFrame.set(false) } @@ -195,7 +166,10 @@ data class CameraUiState( val matchThreshold: Int = 50, val distanceMethod: String = "Jaccard", val shouldAutoCapture: Boolean = false, - val orientationState: OrientationState? = null + val orientationState: OrientationState? = null, + val detectionResult: ObjectDetectionResult? = null, + val imageWidth: Int = 0, + val imageHeight: Int = 0 ) sealed class CameraEvent {