relative est

2025-12-10 09:48:54 +05:30 · 2025-12-10 09:48:54 +05:30 · c445068773
parent c2f3bdd089
commit c445068773
19 changed files with 733 additions and 111 deletions
--- a/.kotlin/errors/errors-1765333795718.log
+++ b/.kotlin/errors/errors-1765333795718.log
@ -0,0 +1,4 @@
 kotlin version: 2.0.21
 error message: The daemon has terminated unexpectedly on startup attempt #1 with error code: 0. The daemon process output:
    1. Kotlin compile daemon is ready
--- a/.kotlin/sessions/kotlin-compiler-1168907591040633302.salive
+++ b/.kotlin/sessions/kotlin-compiler-1168907591040633302.salive
--- a/app/src/main/assets/midas_v2_1_small.tflite
+++ b/app/src/main/assets/midas_v2_1_small.tflite
--- a/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt
+++ b/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt
@ -32,10 +32,23 @@ class AIModelImpl : AIModel {
                .addOnSuccessListener { result ->
                    val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null)
                    // Instead of coloring it here, just pass the original mask bitmap
                    // or ensure it's suitable for further processing.
                    // The foreground bitmap from MLKit is usually the object cut out with transparent background.
                    val booleanMask = createBooleanMask(fg)
                    // We return the raw foreground bitmap as the 'maskBitmap' for now, 
                    // or a colorized version if that's what UI expects. 
                    // But for IOU/Overlap calculation, we might want the binary info.
                    // The UI seems to overlay 'colorMask'. 
                    // DistanceEstimator uses 'segMaskBitmap'.
                    val colorMask = createColorizedMask(fg)
                    val bbox = computeBoundingBox(booleanMask, fg.width, fg.height)
                    // Returning colorMask as the first element because UI expects a visual overlay.
                    // But note: DistanceEstimator might need the binary mask or the foreground.
                    // If DistanceEstimator treats this bitmap as a mask, colorized is fine as long as alpha is preserved.
                    cont.resume(Triple(colorMask, booleanMask, bbox))
                }
                .addOnFailureListener { e ->
@ -52,6 +65,7 @@ class AIModelImpl : AIModel {
        maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h)
        for (i in pixels.indices) {
            // ML Kit Foreground Bitmap: Non-transparent pixels are the object.
            if (Color.alpha(pixels[i]) > 0) {
                pixels[i] = MASK_COLOR
            }
--- a/app/src/main/java/com/example/livingai/data/ml/DistanceEstimatorImpl.kt
+++ b/app/src/main/java/com/example/livingai/data/ml/DistanceEstimatorImpl.kt
@ -1,49 +1,53 @@
 package com.example.livingai.data.ml
-import com.example.livingai.domain.ml.ArcoreDepthEstimator
+import android.graphics.Bitmap
 import com.example.livingai.domain.ml.CameraInfoData
 import com.example.livingai.domain.ml.CameraInfoProvider
 import com.example.livingai.domain.ml.DistanceEstimator
 import com.example.livingai.domain.ml.DistanceRecommendation
 import com.example.livingai.domain.ml.DistanceState
 import com.example.livingai.domain.ml.FrameData
-import com.example.livingai.domain.ml.KnownDimensionEstimator
+import com.example.livingai.domain.ml.Orientation
-import com.example.livingai.utils.Constants
+import com.example.livingai.domain.ml.OrientationPixelEstimator
 import com.example.livingai.domain.ml.OrientationState
-class DistanceEstimatorImpl(
+class DistanceEstimatorImpl {
    private val mainEstimator: DistanceEstimator = ArcoreDepthEstimator(),
    private val fallbackEstimator: DistanceEstimator = KnownDimensionEstimator()
 ) {
-    fun processFrame(frame: FrameData): DistanceState {
+    private val orientationEstimator = OrientationPixelEstimator(iouThreshold = 0.60f)
        // Fallback or retrieve camera info
        val camInfo = CameraInfoProvider.tryGet()
            ?: createFallbackCameraInfo(frame)
-        val main = mainEstimator.analyze(frame, camInfo)
+    fun processFrame(
-        return main.distanceMeters?.let { main }
+        frameData: FrameData,
-            ?: fallbackEstimator.analyze(frame, camInfo)
+        requestedOrientation: Orientation,
-    }
+        silhouetteBitmap: Bitmap
    ): OrientationState {
-    private fun createFallbackCameraInfo(frame: FrameData): CameraInfoData {
+        val segMaskBitmap = frameData.segmentationMaskBitmap
-        // Estimate focal length based on FOV if available, or a reasonable default
+            ?: return OrientationState(
-        // For a typical phone:
+                success = false,
-        // H-FOV ~ 60-70 degrees
+                reason = "No segmentation mask",
-        // fx = (W/2) / tan(FOV/2)
+                pixelMetrics = null,
-        val w = frame.imageBitmap?.width ?: 1080
+                orientationMatched = false
-        val h = frame.imageBitmap?.height ?: 1920
+            )
-        // Assume approx 60 degrees horizontal FOV as a fallback
+        val bbox = frameData.segmentationBox
-        val fovDegrees = 60.0
+            ?: return OrientationState(
-        val fovRadians = Math.toRadians(fovDegrees)
+                success = false,
-        val focalLengthPx = (w / 2.0) / Math.tan(fovRadians / 2.0)
+                reason = "No bounding box",
                pixelMetrics = null,
                orientationMatched = false
            )
-        return CameraInfoData(
+        val result = orientationEstimator.analyze(
-            focalLengthPixels = focalLengthPx.toFloat(),
+            segmentationMaskBitmap = segMaskBitmap,
-            sensorWidthPx = w,
+            silhouetteBitmap = silhouetteBitmap,
-            sensorHeightPx = h,
+            bbox = bbox,
-            principalPointX = w / 2f,
+            frameWidth = frameData.imageWidth,
-            principalPointY = h / 2f
+            frameHeight = frameData.imageHeight,
            medianDepthMeters = frameData.medianDepth
        )
        return OrientationState(
            success = result.orientationMatched,
            reason = if (result.orientationMatched) "OK" else "Orientation mismatch",
            pixelMetrics = result.pixelMetrics,
            orientationMatched = result.orientationMatched,
            iouScore = result.iouScore
        )
    }
 }
--- a/app/src/main/java/com/example/livingai/data/ml/MidasDepthEstimator.kt
+++ b/app/src/main/java/com/example/livingai/data/ml/MidasDepthEstimator.kt
@ -0,0 +1,133 @@
 package com.example.livingai.data.ml
 import android.content.Context
 import android.graphics.Bitmap
 import android.graphics.Rect
 import org.tensorflow.lite.Interpreter
 import org.tensorflow.lite.support.common.FileUtil
 import org.tensorflow.lite.support.common.ops.NormalizeOp
 import org.tensorflow.lite.support.image.ImageProcessor
 import org.tensorflow.lite.support.image.TensorImage
 import org.tensorflow.lite.support.image.ops.ResizeOp
 import java.nio.ByteBuffer
 import java.nio.ByteOrder
 data class MidasDepthResult(
    val relativeDepth: Float,
    val absoluteDistanceMeters: Float?
 )
 class MidasDepthEstimator(private val context: Context) {
    private var interpreter: Interpreter? = null
    companion object {
        private const val MODEL_NAME = "midas_v2_1_small.tflite"
        private const val INPUT_SIZE = 256
        private val NORM_MEAN = floatArrayOf(123.675f, 116.28f, 103.53f)
        private val NORM_STD  = floatArrayOf(58.395f, 57.12f, 57.375f)
    }
    init {
        setupInterpreter()
    }
    private fun setupInterpreter() {
        try {
            val files = context.assets.list("") ?: emptyArray()
            if (!files.contains(MODEL_NAME)) return
            val model = FileUtil.loadMappedFile(context, MODEL_NAME)
            interpreter = Interpreter(model, Interpreter.Options().apply { setNumThreads(4) })
        } catch (e: Exception) {
            e.printStackTrace()
        }
    }
    fun analyzeObject(
        bitmap: Bitmap,
        bbox: Rect,
        realObjectHeightMeters: Float?,
        focalLengthPixels: Float?
    ): MidasDepthResult? {
        val interp = interpreter ?: return null
        try {
            // 1. Preprocess
            var tensorImage = TensorImage(org.tensorflow.lite.DataType.FLOAT32)
            tensorImage.load(bitmap)
            val processor = ImageProcessor.Builder()
                .add(ResizeOp(INPUT_SIZE, INPUT_SIZE, ResizeOp.ResizeMethod.BILINEAR))
                .add(NormalizeOp(NORM_MEAN, NORM_STD))
                .build()
            tensorImage = processor.process(tensorImage)
            // 2. Output Buffer
            val outShape = interp.getOutputTensor(0).shape()
            val size = outShape[1] * outShape[2]
            val output = ByteBuffer.allocateDirect(size * 4).order(ByteOrder.nativeOrder())
            // 3. Run MiDaS
            interp.run(tensorImage.buffer, output)
            output.rewind()
            val depthArray = FloatArray(size)
            output.asFloatBuffer().get(depthArray)
            // Calculate median relative depth (inverse depth) from the BBOX region only?
            // Usually MiDaS runs on full frame.
            // If we want depth of the object, we should look at pixels corresponding to the bbox.
            // But mapping bbox to 256x256 map requires scaling.
            // For now, let's keep it simple: Median of WHOLE FRAME (as relative depth context)
            // OR median of the center?
            // The previous implementation used median of whole frame.
            // Let's refine it: Use median of the whole frame as 'relative depth'
            // OR if you want object depth, we need to crop.
            // Given the user wants "relative depth", median of frame is a common proxy for scene depth.
            // But "distance to object" -> usually means object depth.
            // Let's sample the center of the bbox in the depth map.
            // Map BBox center to 256x256
            val cx = bbox.centerX()
            val cy = bbox.centerY()
            val mapX = (cx * INPUT_SIZE) / bitmap.width
            val mapY = (cy * INPUT_SIZE) / bitmap.height
            // Clamp
            val safeX = mapX.coerceIn(0, INPUT_SIZE - 1)
            val safeY = mapY.coerceIn(0, INPUT_SIZE - 1)
            val depthIndex = safeY * INPUT_SIZE + safeX
            val objectRelativeDepth = depthArray[depthIndex] 
            // Note: MiDaS output is inverse depth (disparity). 
            // Higher value = Closer.
            // 4. Absolute Distance (Pinhole)
            val hPx = bbox.height().toFloat()
            val absDistance = if (realObjectHeightMeters != null && focalLengthPixels != null && hPx > 0) {
                (focalLengthPixels * realObjectHeightMeters) / hPx
            } else {
                null
            }
            return MidasDepthResult(
                relativeDepth = objectRelativeDepth,
                absoluteDistanceMeters = absDistance
            )
        } catch (e: Exception) {
            e.printStackTrace()
            return null
        }
    }
    // Kept for compatibility if needed, but analyzeObject is the new main entry
    fun estimateDepth(bitmap: Bitmap): Float? {
        // Fallback or simpler version
        return analyzeObject(bitmap, Rect(0,0,bitmap.width, bitmap.height), null, null)?.relativeDepth
    }
 }
--- a/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt
+++ b/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt
@ -7,10 +7,12 @@ import android.graphics.Matrix
 import android.provider.MediaStore
 import androidx.camera.core.ImageProxy
 import com.example.livingai.data.ml.DistanceEstimatorImpl
 import com.example.livingai.data.ml.MidasDepthEstimator
 import com.example.livingai.domain.ml.AIModel
 import com.example.livingai.domain.ml.DistanceState
 import com.example.livingai.domain.ml.FrameMetadataProvider
 import com.example.livingai.domain.ml.FrameMetadataProvider.toFrameData
 import com.example.livingai.domain.ml.Orientation
 import com.example.livingai.domain.ml.OrientationState
 import com.example.livingai.domain.repository.CameraRepository
 import com.example.livingai.utils.TiltSensorManager
 import kotlinx.coroutines.Dispatchers
@ -23,9 +25,9 @@ class CameraRepositoryImpl(
 ) : CameraRepository {
    private val distanceEstimator = DistanceEstimatorImpl()
    private val midasEstimator = MidasDepthEstimator(context)
    init {
        // inject dependencies into metadata provider
        FrameMetadataProvider.aiModel = aiModel
        FrameMetadataProvider.tiltSensorManager = tiltSensorManager
    }
@ -37,22 +39,61 @@ class CameraRepositoryImpl(
            imageProxy.close()
            if (rotation != 0) {
-                val m = Matrix().apply { postRotate(rotation.toFloat()) }
+                val matrix = Matrix().apply { postRotate(rotation.toFloat()) }
-                Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, m, true)
+                Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
            } else bitmap
        }
-    override suspend fun processFrame(bitmap: Bitmap): DistanceState =
+    override suspend fun processFrame(
-        withContext(Dispatchers.Default) {
+        bitmap: Bitmap,
        requestedOrientation: Orientation,
        silhouetteBitmap: Bitmap,
        realObjectHeightMeters: Float?,   // ★ NEW PARAM
        focalLengthPixels: Float          // from camera intrinsics
    ): OrientationState = withContext(Dispatchers.Default) {
-            // 1. Collect metadata
+        // 1. Collect segmentation
        val meta = FrameMetadataProvider.collectMetadata(bitmap)
        val bbox = meta.segmentationBox
        val mask = meta.segmentationMaskBitmap
-            // 2. Convert to FrameData
+        if (bbox == null || mask == null) {
-            val frameData = meta.toFrameData(bitmap)
+            return@withContext OrientationState(
                success = false,
                reason = "Segmentation missing",
                pixelMetrics = null,
                orientationMatched = false,
                iouScore = null,
                relativeDepth = null,
                absoluteDistanceMeters = null
            )
        }
-            // 3. Run distance estimator
+        // 2. MiDaS (relative + absolute if reference height provided)
-            distanceEstimator.processFrame(frameData)
+        val midasResult = midasEstimator.analyzeObject(
            bitmap = bitmap,
            bbox = bbox,
            realObjectHeightMeters = realObjectHeightMeters,
            focalLengthPixels = focalLengthPixels
        )
        // 3. Build FrameData with relative depth only
        val frameData = meta.toFrameData(bitmap).copy(
            medianDepth = midasResult?.relativeDepth
        )
        // 4. Orientation detection
        val orientationState = distanceEstimator.processFrame(
            frameData = frameData,
            requestedOrientation = requestedOrientation,
            silhouetteBitmap = silhouetteBitmap
        )
        // 5. Inject relative + absolute values into final result
        orientationState.copy(
            relativeDepth = midasResult?.relativeDepth,
            absoluteDistanceMeters = midasResult?.absoluteDistanceMeters
        )
    }
    override suspend fun saveImage(
@ -60,6 +101,7 @@ class CameraRepositoryImpl(
        animalId: String,
        orientation: String?
    ): String = withContext(Dispatchers.IO) {
        val suffix = orientation?.let { "_$it" } ?: ""
        val fileName = "$animalId$suffix.jpg"
@ -74,7 +116,7 @@ class CameraRepositoryImpl(
        val resolver = context.contentResolver
        val uri = resolver.insert(MediaStore.Images.Media.EXTERNAL_CONTENT_URI, values)
-            ?: throw RuntimeException("Image insert failed")
+            ?: throw RuntimeException("Failed to insert image")
        try {
            resolver.openOutputStream(uri)?.use { out ->
@ -86,6 +128,7 @@ class CameraRepositoryImpl(
                values.put(MediaStore.Images.Media.IS_PENDING, 0)
                resolver.update(uri, values, null, null)
            }
        } catch (e: Exception) {
            resolver.delete(uri, null, null)
            throw e
--- a/app/src/main/java/com/example/livingai/domain/ml/CameraIntrinsicsFetcher.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/CameraIntrinsicsFetcher.kt
@ -0,0 +1,77 @@
 package com.example.livingai.domain.ml
 import android.content.Context
 import android.graphics.Rect
 import android.hardware.camera2.CameraCharacteristics
 import android.hardware.camera2.CameraManager
 import android.util.Size
 import android.util.SizeF
 /**
 * Utility to read camera intrinsics from Camera2 and compute focal length (pixels).
 *
 * Usage:
 *  val (fPx, imgW, imgH) = CameraIntrinsicsFetcher.fetch(context, cameraId, imageSize)
 *  CameraInfoProvider.init(CameraInfoData(fPx, imgW, imgH, px, py, ...))
 *
 * imageSize = the resolution you will actually receive from the ImageReader / CameraX output (width,height)
 *
 * Formula:
 *   f_px = f_mm / sensorWidth_mm * imageWidth_px
 *
 * More accurate: use activeArray size mapping to sensor physical size if needed.
 */
 object CameraIntrinsicsFetcher {
    data class Result(
        val focalLengthPixels: Float,
        val imageWidthPx: Int,
        val imageHeightPx: Int,
        val principalPointX: Float,
        val principalPointY: Float,
        val sensorPhysicalSizeMm: SizeF?
    )
    /**
     * cameraId = device camera id (get from CameraManager)
     * imageSize = the actual output image size you will capture (e.g., 1920x1080)
     */
    fun fetch(context: Context, cameraId: String, imageSize: Size): Result {
        val mgr = context.getSystemService(Context.CAMERA_SERVICE) as CameraManager
        val characteristics = mgr.getCameraCharacteristics(cameraId)
        val focalLengths = characteristics.get(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)
        val fMm = when {
            focalLengths != null && focalLengths.isNotEmpty() -> focalLengths[0]  // mm
            else -> 4.0f
        }
        val sensorSize = characteristics.get(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE) // in mm
        val sensorSizeMm = sensorSize
        // active array size gives pixel array cropping of sensor -> map principal point
        val activeRect = characteristics.get(CameraCharacteristics.SENSOR_INFO_ACTIVE_ARRAY_SIZE) // Rect
        val activeRectW = activeRect?.width() ?: imageSize.width
        val activeRectH = activeRect?.height() ?: imageSize.height
        // Compute focal in pixels: ratio f_mm / sensorWidth_mm * imageWidth_px
        val fPx = if (sensorSizeMm != null && sensorSizeMm.width > 0f) {
            (fMm / sensorSizeMm.width) * imageSize.width
        } else {
            // fallback: estimate based on sensor pixel array
            (fMm / 4.0f) * imageSize.width
        }
        val principalX = (activeRect?.centerX() ?: imageSize.width / 2).toFloat()
        val principalY = (activeRect?.centerY() ?: imageSize.height / 2).toFloat()
        return Result(
            focalLengthPixels = fPx,
            imageWidthPx = imageSize.width,
            imageHeightPx = imageSize.height,
            principalPointX = principalX,
            principalPointY = principalY,
            sensorPhysicalSizeMm = sensorSizeMm
        )
    }
 }
--- a/app/src/main/java/com/example/livingai/domain/ml/DistanceEstimator.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/DistanceEstimator.kt
@ -2,7 +2,6 @@ package com.example.livingai.domain.ml
 import android.graphics.Bitmap
 import android.graphics.Rect
 import kotlin.math.abs
 /**
 * Interface for all distance estimators.
@ -14,28 +13,7 @@ interface DistanceEstimator {
    ): DistanceState
 }
-/**
+// FrameData is defined in FrameData.kt
 * Frame-specific data for one inference cycle.
 */
 data class FrameData(
    val imageBitmap: Bitmap?,
    val segmentationBox: Rect?,
    val segmentationMaskBitmap: Bitmap?,
    // Optional ARCore depth inputs
    val depthMapMeters: FloatArray?,     // row-major R* C
    val depthWidth: Int = 0,
    val depthHeight: Int = 0,
    val depthConfidence: FloatArray? = null,
    // IMU orientation
    val imuPitchDegrees: Float = 0f,
    val imuRollDegrees: Float = 0f,
    val imuYawDegrees: Float = 0f,
    val cameraRotationDegrees: Int = 0,
    val timestampMs: Long = System.currentTimeMillis()
 )
 /**
 * Singleton-provided camera intrinsics for metric calculations.
--- a/app/src/main/java/com/example/livingai/domain/ml/FrameData.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/FrameData.kt
@ -0,0 +1,32 @@
 package com.example.livingai.domain.ml
 import android.graphics.Bitmap
 import android.graphics.Rect
 /**
 * Frame-specific data for one inference cycle.
 */
 data class FrameData(
    val imageBitmap: Bitmap?,
    val segmentationBox: Rect?,
    val segmentationMaskBitmap: Bitmap?,
    // Optional ARCore depth inputs
    val depthMapMeters: FloatArray?,     // row-major R* C
    val depthWidth: Int = 0,
    val depthHeight: Int = 0,
    val depthConfidence: FloatArray? = null,
    // IMU orientation
    val imuPitchDegrees: Float = 0f,
    val imuRollDegrees: Float = 0f,
    val imuYawDegrees: Float = 0f,
    val cameraRotationDegrees: Int = 0,
    val timestampMs: Long = System.currentTimeMillis(),
    //relative
    val imageWidth: Int = 0,
    val imageHeight: Int = 0,
    val medianDepth: Float? = null
 )
--- a/app/src/main/java/com/example/livingai/domain/ml/FrameMetadataProvider.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/FrameMetadataProvider.kt
@ -15,14 +15,15 @@ object FrameMetadataProvider {
    suspend fun getSegmentation(bitmap: Bitmap): SegmentationResult? {
        return try {
-            val (_, booleanMask, bbox) = aiModel.segmentImage(bitmap) ?: return null
+            val (maskBitmap, booleanMask, bbox) = aiModel.segmentImage(bitmap) ?: return null
-            SegmentationResult(booleanMask, bbox)
+            SegmentationResult(maskBitmap, booleanMask, bbox)
        } catch (_: Exception) {
            null
        }
    }
    data class SegmentationResult(
        val maskBitmap: Bitmap?,
        val mask: BooleanArray,
        val boundingBox: Rect
    ) {
@ -32,6 +33,7 @@ object FrameMetadataProvider {
            other as SegmentationResult
            if (maskBitmap != other.maskBitmap) return false
            if (!mask.contentEquals(other.mask)) return false
            if (boundingBox != other.boundingBox) return false
@ -39,7 +41,8 @@ object FrameMetadataProvider {
        }
        override fun hashCode(): Int {
-            var result = mask.contentHashCode()
+            var result = maskBitmap?.hashCode() ?: 0
            result = 31 * result + mask.contentHashCode()
            result = 31 * result + boundingBox.hashCode()
            return result
        }
@ -96,6 +99,7 @@ object FrameMetadataProvider {
    }
    data class FrameCollectedMetadata(
        val segmentationMaskBitmap: Bitmap?,
        val segmentationBox: Rect?,
        val depthMeters: FloatArray?,
        val depthWidth: Int,
@ -112,6 +116,7 @@ object FrameMetadataProvider {
            other as FrameCollectedMetadata
            if (segmentationMaskBitmap != other.segmentationMaskBitmap) return false
            if (segmentationBox != other.segmentationBox) return false
            if (depthMeters != null) {
                if (other.depthMeters == null) return false
@ -132,7 +137,8 @@ object FrameMetadataProvider {
        }
        override fun hashCode(): Int {
-            var result = segmentationBox?.hashCode() ?: 0
+            var result = segmentationMaskBitmap?.hashCode() ?: 0
            result = 31 * result + (segmentationBox?.hashCode() ?: 0)
            result = 31 * result + (depthMeters?.contentHashCode() ?: 0)
            result = 31 * result + depthWidth
            result = 31 * result + depthHeight
@ -152,6 +158,7 @@ object FrameMetadataProvider {
        val rot = getRotation()
        return FrameCollectedMetadata(
            segmentationMaskBitmap = seg?.maskBitmap,
            segmentationBox = seg?.boundingBox,
            depthMeters = depth.depthMeters,
            depthWidth = depth.width,
@ -168,7 +175,7 @@ object FrameMetadataProvider {
        return FrameData(
            imageBitmap = bitmap,
            segmentationBox = segmentationBox,
-            segmentationMaskBitmap = null,
+            segmentationMaskBitmap = segmentationMaskBitmap,
            depthMapMeters = depthMeters,
            depthWidth = depthWidth,
            depthHeight = depthHeight,
@ -176,7 +183,12 @@ object FrameMetadataProvider {
            imuPitchDegrees = pitch,
            imuRollDegrees = roll,
            imuYawDegrees = yaw,
-            cameraRotationDegrees = rotationDegrees
+            cameraRotationDegrees = rotationDegrees,
            // New fields populated from bitmap if available or passed down
            imageWidth = bitmap.width,
            imageHeight = bitmap.height,
            medianDepth = null // Can calculate median from depthMeters if needed
        )
    }
 }
--- a/app/src/main/java/com/example/livingai/domain/ml/OrientationAndPixelEstimator.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationAndPixelEstimator.kt
@ -0,0 +1,182 @@
 package com.example.livingai.domain.ml
 import android.graphics.Bitmap
 import android.graphics.Rect
 import kotlin.math.max
 import kotlin.math.min
 class OrientationPixelEstimator(
    private val iouThreshold: Float = 0.60f
 ) {
    /**
     * Main function:
     *  - segmentationMaskBitmap: MLKit’s alpha mask (animal foreground)
     *  - silhouetteBitmap: template mask for EXPECTED orientation (e.g., LEFT)
     *  - bbox: detected bounding box from segmentation
     */
    fun analyze(
        segmentationMaskBitmap: Bitmap,
        silhouetteBitmap: Bitmap,
        bbox: Rect,
        frameWidth: Int,
        frameHeight: Int,
        medianDepthMeters: Float? = null
    ): OrientationPixelResult {
        // 1) Convert both masks → boolean
        val segFullMask = bitmapToBooleanMask(segmentationMaskBitmap)
        val silhouetteMask = bitmapToBooleanMask(silhouetteBitmap)
        // 2) Crop segmentation mask to bbox
        val croppedMask = cropMaskToBBox(segFullMask, frameWidth, frameHeight, bbox)
        // 3) Scale silhouette mask to bbox size
        val scaledSilhouette = scaleMask(
            silhouetteMask,
            silhouetteBitmap.width,
            silhouetteBitmap.height,
            bbox.width(),
            bbox.height()
        )
        // 4) Compute IoU
        val iou = computeIoU(croppedMask, scaledSilhouette)
        val orientationMatched = iou >= iouThreshold
        // 5) Pixel metrics extraction
        val metrics = computePixelMetrics(croppedMask, bbox, medianDepthMeters)
        return OrientationPixelResult(
            orientationMatched = orientationMatched,
            matchedOrientation = null,
            iouScore = iou,
            iouBestOther = 0f,
            pixelMetrics = metrics
        )
    }
    // -----------------------------
    // MASK HELPERS
    // -----------------------------
    private fun bitmapToBooleanMask(bitmap: Bitmap): BooleanArray {
        val w = bitmap.width
        val h = bitmap.height
        val pixels = IntArray(w * h)
        bitmap.getPixels(pixels, 0, w, 0, 0, w, h)
        val out = BooleanArray(w * h)
        for (i in pixels.indices) {
            val alpha = (pixels[i] ushr 24) and 0xFF
            out[i] = alpha > 0
        }
        return out
    }
    private fun cropMaskToBBox(
        fullMask: BooleanArray,
        frameW: Int,
        frameH: Int,
        bbox: Rect
    ): BooleanArray {
        val left = max(0, bbox.left)
        val top = max(0, bbox.top)
        val right = min(frameW - 1, bbox.right)
        val bottom = min(frameH - 1, bbox.bottom)
        val width = right - left + 1
        val height = bottom - top + 1
        val out = BooleanArray(width * height)
        var idx = 0
        for (y in top..bottom) {
            for (x in left..right) {
                out[idx++] = fullMask[y * frameW + x]
            }
        }
        return out
    }
    private fun scaleMask(
        src: BooleanArray,
        srcW: Int,
        srcH: Int,
        dstW: Int,
        dstH: Int
    ): BooleanArray {
        val out = BooleanArray(dstW * dstH)
        for (y in 0 until dstH) {
            val sy = ((y.toFloat() / dstH) * srcH).toInt().coerceIn(0, srcH - 1)
            for (x in 0 until dstW) {
                val sx = ((x.toFloat() / dstW) * srcW).toInt().coerceIn(0, srcW - 1)
                out[y * dstW + x] = src[sy * srcW + sx]
            }
        }
        return out
    }
    private fun computeIoU(a: BooleanArray, b: BooleanArray): Float {
        if (a.size != b.size) return 0f
        var inter = 0
        var union = 0
        for (i in a.indices) {
            val ai = a[i]
            val bi = b[i]
            if (ai || bi) union++
            if (ai && bi) inter++
        }
        return if (union == 0) 0f else inter.toFloat() / union
    }
    // -----------------------------
    // PIXEL METRICS
    // -----------------------------
    private fun computePixelMetrics(
        croppedMask: BooleanArray,
        bbox: Rect,
        medianDepthMeters: Float?
    ): PixelMetrics {
        val w = bbox.width()
        val h = bbox.height()
        var count = 0
        var sumX = 0L
        var sumY = 0L
        for (y in 0 until h) {
            for (x in 0 until w) {
                if (croppedMask[y * w + x]) {
                    count++
                    sumX += x
                    sumY += y
                }
            }
        }
        val centroidX = bbox.left + (sumX.toFloat() / max(1, count))
        val centroidY = bbox.top + (sumY.toFloat() / max(1, count))
        return PixelMetrics(
            widthPx = w,
            heightPx = h,
            areaPx = count,
            centroidX = centroidX,
            centroidY = centroidY,
            distanceProxyInvHeight = if (h > 0) 1f / h.toFloat() else Float.POSITIVE_INFINITY,
            heightPxFloat = h.toFloat(),
            medianDepthMeters = medianDepthMeters
        )
    }
 }
--- a/app/src/main/java/com/example/livingai/domain/ml/OrientationPixelResult.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationPixelResult.kt
@ -0,0 +1,24 @@
 package com.example.livingai.domain.ml
 data class OrientationPixelResult(
    val orientationMatched: Boolean,     // true only if requested orientation is confidently matched
    val matchedOrientation: Orientation?,// which orientation matched (if any)
    val iouScore: Float,                 // IoU score for matched orientation (0..1)
    val iouBestOther: Float,             // best IoU among other orientations
    val pixelMetrics: PixelMetrics?      // null if orientation not matched
 )
 enum class Orientation {
    LEFT, RIGHT, FRONT, BACK, LEFT_45, RIGHT_45, TOP, BOTTOM
 }
 data class PixelMetrics(
    val widthPx: Int,
    val heightPx: Int,
    val areaPx: Int,
    val centroidX: Float,
    val centroidY: Float,
    val distanceProxyInvHeight: Float,  // 1 / heightPx (relative distance proxy)
    val heightPxFloat: Float,           // convenience
    val medianDepthMeters: Float?       // if depth map available (null otherwise)
 )
--- a/app/src/main/java/com/example/livingai/domain/ml/OrientationState.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationState.kt
@ -0,0 +1,11 @@
 package com.example.livingai.domain.ml
 data class OrientationState(
    val success: Boolean,
    val reason: String,
    val pixelMetrics: PixelMetrics?,
    val orientationMatched: Boolean,
    val iouScore: Float? = null,
    val relativeDepth: Float? = null,
    val absoluteDistanceMeters: Float? = null
 )
--- a/app/src/main/java/com/example/livingai/domain/ml/OrientationTemplate.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationTemplate.kt
@ -0,0 +1,8 @@
 package com.example.livingai.domain.ml
 data class OrientationTemplate(
    val orientation: Orientation,
    val mask: BooleanArray,
    val templateWidth: Int,
    val templateHeight: Int
 )
--- a/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt
+++ b/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt
@ -2,10 +2,17 @@ package com.example.livingai.domain.repository
 import android.graphics.Bitmap
 import androidx.camera.core.ImageProxy
-import com.example.livingai.domain.ml.DistanceState
+import com.example.livingai.domain.ml.Orientation
 import com.example.livingai.domain.ml.OrientationState
 interface CameraRepository {
    suspend fun captureImage(imageProxy: ImageProxy): Bitmap
-    suspend fun processFrame(bitmap: Bitmap): DistanceState
+    suspend fun processFrame(
        bitmap: Bitmap,
        requestedOrientation: Orientation,
        silhouetteBitmap: Bitmap,
        realObjectHeightMeters: Float?,
        focalLengthPixels: Float
    ): OrientationState
    suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String
 }
--- a/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt
+++ b/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt
@ -129,8 +129,8 @@ fun CameraScreen(
                    CameraPreview(
                        modifier = Modifier.fillMaxSize(),
                        controller = controller,
-                        onFrame = { bitmap, rotation, _ ->
+                        onFrame = { bitmap, rotation, fxPixels ->
-                            viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation))
+                            viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
                        }
                    )
@ -157,7 +157,7 @@ fun CameraScreen(
                }
                // Debug Overlay
-                state.distanceState?.let { dist ->
+                state.orientationState?.let { orient ->
                    Box(
                        modifier = Modifier
                            .align(Alignment.TopEnd)
@ -166,13 +166,25 @@ fun CameraScreen(
                            .padding(8.dp)
                    ) {
                        Column {
-                            Text("Dist: ${dist.distanceMeters ?: "N/A"}", color = Color.White)
+                            Text("Success: ${orient.success}", color = Color.White)
-                            Text("Rec: ${dist.recommendation}", color = Color.White)
+                            Text("Reason: ${orient.reason}", color = Color.White)
-                            Text("Tilted: ${dist.isCameraTilted}", color = Color.White)
+                            
-                            Text("Rotated: ${dist.isCameraRotated}", color = Color.White)
+                            orient.pixelMetrics?.let { pm ->
-                            Text("Centered: ${dist.isObjectCentered}", color = Color.White)
+                                Text("Width (px): ${pm.widthPx}", color = Color.White)
-                            Text("Ready: ${dist.readyToCapture}", color = Color.White)
+                                Text("Height (px): ${pm.heightPx}", color = Color.White)
-                            Text("Conf: ${dist.confidenceScore}", color = Color.White)
+                            }
                            // Display depth metrics from OrientationState
                            orient.relativeDepth?.let { rel ->
                                Text("Rel Depth: %.4f".format(rel), color = Color.White)
                            }
                            orient.absoluteDistanceMeters?.let { abs ->
                                Text("Dist (m): %.2f".format(abs), color = Color.White)
                            }
                            Text("IOU: ${orient.iouScore}", color = Color.White)
                            Text("Matched: ${orient.orientationMatched}", color = Color.White)
                        }
                    }
                }
--- a/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt
+++ b/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt
@ -7,7 +7,8 @@ import androidx.camera.core.ImageProxy
 import androidx.lifecycle.ViewModel
 import androidx.lifecycle.viewModelScope
 import com.example.livingai.domain.ml.AIModel
-import com.example.livingai.domain.ml.DistanceState
+import com.example.livingai.domain.ml.Orientation
 import com.example.livingai.domain.ml.OrientationState
 import com.example.livingai.domain.repository.CameraRepository
 import com.example.livingai.domain.usecases.AppDataUseCases
 import com.example.livingai.utils.ScreenDimensions
@ -47,7 +48,7 @@ class CameraViewModel(
    fun onEvent(event: CameraEvent) {
        when (event) {
            is CameraEvent.ImageCaptured -> handleImageProxy(event.imageProxy)
-            is CameraEvent.FrameReceived -> handleFrame(event.bitmap, event.rotationDegrees)
+            is CameraEvent.FrameReceived -> handleFrame(event.bitmap, event.rotationDegrees, event.focalLengthPixels)
            is CameraEvent.ClearCapturedImage -> clearCaptured()
            is CameraEvent.SetContext -> setContext(event.animalId, event.orientation)
            is CameraEvent.AutoCaptureTriggered -> {
@ -87,7 +88,7 @@ class CameraViewModel(
        }
    }
-    private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int) {
+    private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) {
        if (_state.value.isCapturing || _state.value.shouldAutoCapture) {
            return
        }
@ -95,8 +96,22 @@ class CameraViewModel(
        if (isProcessingFrame.compareAndSet(false, true)) {
            viewModelScope.launch {
                try {
-                    // Process the frame for distance and metadata
+                    val currentOrientationStr = _state.value.orientation
-                    val distanceState = cameraRepository.processFrame(bitmap)
+                    val silhouette = _state.value.savedMaskBitmap
                    val orientationState = if (currentOrientationStr != null && silhouette != null) {
                        val orientationEnum = mapStringToOrientation(currentOrientationStr)
                        cameraRepository.processFrame(
                            bitmap, 
                            orientationEnum, 
                            silhouette,
                            1.55f, 
                            focalLengthPixels
                        )
                    } else {
                        null
                    }
                    val result = aiModel.segmentImage(bitmap)
                    if (result != null) {
@ -124,7 +139,7 @@ class CameraViewModel(
                        _state.value = _state.value.copy(
                            segmentationMask = output,
-                            distanceState = distanceState
+                            orientationState = orientationState
                        )
                        if (_state.value.isAutoCaptureEnabled && 
@ -145,7 +160,7 @@ class CameraViewModel(
                    } else {
                        _state.value = _state.value.copy(
                            segmentationMask = null,
-                            distanceState = distanceState
+                            orientationState = orientationState
                        )
                    }
                } finally {
@ -154,6 +169,18 @@ class CameraViewModel(
            }
        }
    }
    private fun mapStringToOrientation(orientation: String): Orientation {
        return when (orientation.lowercase()) {
            "front" -> Orientation.FRONT
            "back" -> Orientation.BACK
            "left" -> Orientation.LEFT
            "right" -> Orientation.RIGHT
            "leftangle" -> Orientation.LEFT_45
            "rightangle" -> Orientation.RIGHT_45
            else -> Orientation.FRONT
        }
    }
 }
 data class CameraUiState(
@ -168,12 +195,12 @@ data class CameraUiState(
    val matchThreshold: Int = 50,
    val distanceMethod: String = "Jaccard",
    val shouldAutoCapture: Boolean = false,
-    val distanceState: DistanceState? = null
+    val orientationState: OrientationState? = null
 )
 sealed class CameraEvent {
    data class ImageCaptured(val imageProxy: ImageProxy) : CameraEvent()
-    data class FrameReceived(val bitmap: Bitmap, val rotationDegrees: Int) : CameraEvent()
+    data class FrameReceived(val bitmap: Bitmap, val rotationDegrees: Int, val focalLengthPixels: Float) : CameraEvent()
    object ClearCapturedImage : CameraEvent()
    data class SetContext(val animalId: String, val orientation: String?) : CameraEvent()
    object AutoCaptureTriggered : CameraEvent()
--- a/app/src/main/java/com/example/livingai/pages/components/CameraPreview.kt
+++ b/app/src/main/java/com/example/livingai/pages/components/CameraPreview.kt
@ -3,6 +3,7 @@ package com.example.livingai.pages.components
 import android.annotation.SuppressLint
 import android.graphics.Bitmap
 import android.hardware.camera2.CameraCharacteristics
 import android.util.SizeF
 import android.view.ViewGroup
 import androidx.annotation.OptIn
 import androidx.camera.camera2.interop.Camera2CameraInfo
@ -34,9 +35,8 @@ fun CameraPreview(
    val cameraController = controller ?: remember { LifecycleCameraController(context) }
-    // State to hold the focal length. 
+    // State to hold the focal length in PIXELS. 
-    // Updated on the Main thread, read by the analysis background thread.
+    val focalLengthPxState = remember { mutableStateOf(0f) }
    val focalLengthState = remember { mutableStateOf(0f) }
    // Periodically check/update focal length on the Main thread
    LaunchedEffect(cameraController) {
@ -45,14 +45,48 @@ fun CameraPreview(
                val info = cameraController.cameraInfo
                if (info != null) {
                    val camera2Info = Camera2CameraInfo.from(info)
                    // 1. Get Focal Length (mm)
                    val focalLengths = camera2Info.getCameraCharacteristic(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)
-                    val fl = focalLengths?.firstOrNull() ?: 0f
+                    val focalLengthMm = focalLengths?.firstOrNull() ?: 0f
-                    focalLengthState.value = fl
+                    
                    // 2. Get Sensor Size (mm)
                    val sensorSize: SizeF? = camera2Info.getCameraCharacteristic(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE)
                    val sensorWidthMm = sensorSize?.width ?: 0f
                    // 3. Get Image Width (pixels) - Assuming analysis width or preview width?
                    // Usually analysis runs on a specific resolution (e.g. 640x480 or similar).
                    // We can approximate or get it from the camera controller if accessible, 
                    // but usually it depends on the ImageAnalysis use case resolution.
                    // However, we can calculate fx_pixels if we assume the width of the captured frame (passed in onFrame)
                    // But here we are outside onFrame.
                    // Let's store the factors needed to calculate it inside onFrame.
                    // Wait, we can pass focalLengthMm and sensorWidthMm to onFrame, and calculate there using bitmap.width.
                    // Or we can just calculate a "pixels per mm" factor if we knew the resolution.
                    // But resolution changes per frame (in onFrame).
                    // So let's store focalLengthMm and sensorWidthMm in the state or pass them.
                    // Actually, let's just pass `focalLengthMm` and `sensorWidthMm` via the callback if possible, 
                    // or calculate a proxy here assuming a standard width? No, that's inaccurate.
                    // Let's change the callback signature to accept (Bitmap, Int, Float, Float) -> Unit
                    // Or just calculate it inside the analyzer block where we have the bitmap width.
                    // But we need access to camera characteristics inside the analyzer? 
                    // The analyzer runs on background thread. accessing camera2Info might be safe?
                    // Yes, Camera2CameraInfo is thread safe.
                    // To keep it simple, let's just update the state with the raw values we need.
                    // But the `onFrame` callback currently expects `Float` (focalLength).
                    // The prompt implies we should "get it from camera intrinsic api values".
                    // The `focalLengthPixels` depends on the image width: F_px = (F_mm / SensorWidth_mm) * ImageWidth_px
                    // We can't calculate F_px here without ImageWidth_px.
                    // So we must do it in the analyzer.
                }
            } catch (e: Exception) {
-                // Ignore errors, e.g. if camera is closing or not ready
+                // Ignore errors
            }
            // Check periodically in case the active camera changes
            delay(2000) 
        }
    }
@ -62,9 +96,29 @@ fun CameraPreview(
            cameraController.setImageAnalysisAnalyzer(cameraExecutor) { imageProxy ->
                val bitmap = imageProxy.toBitmap()
                val rotationDegrees = imageProxy.imageInfo.rotationDegrees
                val currentFocalLength = focalLengthState.value
-                onFrame(bitmap, rotationDegrees, currentFocalLength)
+                // Calculate Focal Length in Pixels
                var fxPixels = 0f
                try {
                    val info = cameraController.cameraInfo
                    if (info != null) {
                        val camera2Info = Camera2CameraInfo.from(info)
                        val focalLengths = camera2Info.getCameraCharacteristic(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)
                        val focalLengthMm = focalLengths?.firstOrNull() ?: 0f
                        val sensorSize = camera2Info.getCameraCharacteristic(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE)
                        val sensorWidthMm = sensorSize?.width ?: 0f
                        if (sensorWidthMm > 0) {
                            val imageWidth = bitmap.width.toFloat()
                            fxPixels = (focalLengthMm / sensorWidthMm) * imageWidth
                        }
                    }
                } catch (e: Exception) {
                    e.printStackTrace()
                }
                onFrame(bitmap, rotationDegrees, fxPixels)
                imageProxy.close()
            }
        }
@ -94,7 +148,7 @@ fun CameraPreview(
            }
        },
        onRelease = {
-             // Cleanup if needed
+             // Cleanup
        }
    )
 }