relative est

2025-12-10 09:48:54 +05:30 · 2025-12-10 09:48:54 +05:30 · c445068773
parent c2f3bdd089
commit c445068773
19 changed files with 733 additions and 111 deletions
--- a/.kotlin/errors/errors-1765333795718.log
+++ b/.kotlin/errors/errors-1765333795718.log
@ -0,0 +1,4 @@
+kotlin version: 2.0.21
+error message: The daemon has terminated unexpectedly on startup attempt #1 with error code: 0. The daemon process output:
+    1. Kotlin compile daemon is ready
+
--- a/.kotlin/sessions/kotlin-compiler-1168907591040633302.salive
+++ b/.kotlin/sessions/kotlin-compiler-1168907591040633302.salive
--- a/app/src/main/assets/midas_v2_1_small.tflite
+++ b/app/src/main/assets/midas_v2_1_small.tflite
--- a/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt
+++ b/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt
@ -32,10 +32,23 @@ class AIModelImpl : AIModel {
                .addOnSuccessListener { result ->
                    val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null)

+                    // Instead of coloring it here, just pass the original mask bitmap
+                    // or ensure it's suitable for further processing.
+                    // The foreground bitmap from MLKit is usually the object cut out with transparent background.
+                    
                    val booleanMask = createBooleanMask(fg)
+                    // We return the raw foreground bitmap as the 'maskBitmap' for now, 
+                    // or a colorized version if that's what UI expects. 
+                    // But for IOU/Overlap calculation, we might want the binary info.
+                    // The UI seems to overlay 'colorMask'. 
+                    // DistanceEstimator uses 'segMaskBitmap'.
+                    
                    val colorMask = createColorizedMask(fg)
                    val bbox = computeBoundingBox(booleanMask, fg.width, fg.height)

+                    // Returning colorMask as the first element because UI expects a visual overlay.
+                    // But note: DistanceEstimator might need the binary mask or the foreground.
+                    // If DistanceEstimator treats this bitmap as a mask, colorized is fine as long as alpha is preserved.
                    cont.resume(Triple(colorMask, booleanMask, bbox))
                }
                .addOnFailureListener { e ->
@ -52,6 +65,7 @@ class AIModelImpl : AIModel {
        maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h)

        for (i in pixels.indices) {
+            // ML Kit Foreground Bitmap: Non-transparent pixels are the object.
            if (Color.alpha(pixels[i]) > 0) {
                pixels[i] = MASK_COLOR
            }
--- a/app/src/main/java/com/example/livingai/data/ml/DistanceEstimatorImpl.kt
+++ b/app/src/main/java/com/example/livingai/data/ml/DistanceEstimatorImpl.kt
@ -1,49 +1,53 @@
 package com.example.livingai.data.ml

-import com.example.livingai.domain.ml.ArcoreDepthEstimator
-import com.example.livingai.domain.ml.CameraInfoData
-import com.example.livingai.domain.ml.CameraInfoProvider
-import com.example.livingai.domain.ml.DistanceEstimator
-import com.example.livingai.domain.ml.DistanceRecommendation
+import android.graphics.Bitmap
 import com.example.livingai.domain.ml.DistanceState
 import com.example.livingai.domain.ml.FrameData
-import com.example.livingai.domain.ml.KnownDimensionEstimator
-import com.example.livingai.utils.Constants
+import com.example.livingai.domain.ml.Orientation
+import com.example.livingai.domain.ml.OrientationPixelEstimator
+import com.example.livingai.domain.ml.OrientationState

-class DistanceEstimatorImpl(
-    private val mainEstimator: DistanceEstimator = ArcoreDepthEstimator(),
-    private val fallbackEstimator: DistanceEstimator = KnownDimensionEstimator()
-) {
+class DistanceEstimatorImpl {

-    fun processFrame(frame: FrameData): DistanceState {
-        // Fallback or retrieve camera info
-        val camInfo = CameraInfoProvider.tryGet()
-            ?: createFallbackCameraInfo(frame)
+    private val orientationEstimator = OrientationPixelEstimator(iouThreshold = 0.60f)

-        val main = mainEstimator.analyze(frame, camInfo)
-        return main.distanceMeters?.let { main }
-            ?: fallbackEstimator.analyze(frame, camInfo)
-    }
+    fun processFrame(
+        frameData: FrameData,
+        requestedOrientation: Orientation,
+        silhouetteBitmap: Bitmap
+    ): OrientationState {

-    private fun createFallbackCameraInfo(frame: FrameData): CameraInfoData {
-        // Estimate focal length based on FOV if available, or a reasonable default
-        // For a typical phone:
-        // H-FOV ~ 60-70 degrees
-        // fx = (W/2) / tan(FOV/2)
-        val w = frame.imageBitmap?.width ?: 1080
-        val h = frame.imageBitmap?.height ?: 1920
+        val segMaskBitmap = frameData.segmentationMaskBitmap
+            ?: return OrientationState(
+                success = false,
+                reason = "No segmentation mask",
+                pixelMetrics = null,
+                orientationMatched = false
+            )

-        // Assume approx 60 degrees horizontal FOV as a fallback
-        val fovDegrees = 60.0
-        val fovRadians = Math.toRadians(fovDegrees)
-        val focalLengthPx = (w / 2.0) / Math.tan(fovRadians / 2.0)
+        val bbox = frameData.segmentationBox
+            ?: return OrientationState(
+                success = false,
+                reason = "No bounding box",
+                pixelMetrics = null,
+                orientationMatched = false
+            )

-        return CameraInfoData(
-            focalLengthPixels = focalLengthPx.toFloat(),
-            sensorWidthPx = w,
-            sensorHeightPx = h,
-            principalPointX = w / 2f,
-            principalPointY = h / 2f
+        val result = orientationEstimator.analyze(
+            segmentationMaskBitmap = segMaskBitmap,
+            silhouetteBitmap = silhouetteBitmap,
+            bbox = bbox,
+            frameWidth = frameData.imageWidth,
+            frameHeight = frameData.imageHeight,
+            medianDepthMeters = frameData.medianDepth
+        )
+
+        return OrientationState(
+            success = result.orientationMatched,
+            reason = if (result.orientationMatched) "OK" else "Orientation mismatch",
+            pixelMetrics = result.pixelMetrics,
+            orientationMatched = result.orientationMatched,
+            iouScore = result.iouScore
        )
    }
 }
--- a/app/src/main/java/com/example/livingai/data/ml/MidasDepthEstimator.kt
+++ b/app/src/main/java/com/example/livingai/data/ml/MidasDepthEstimator.kt
@ -0,0 +1,133 @@
+package com.example.livingai.data.ml
+
+import android.content.Context
+import android.graphics.Bitmap
+import android.graphics.Rect
+import org.tensorflow.lite.Interpreter
+import org.tensorflow.lite.support.common.FileUtil
+import org.tensorflow.lite.support.common.ops.NormalizeOp
+import org.tensorflow.lite.support.image.ImageProcessor
+import org.tensorflow.lite.support.image.TensorImage
+import org.tensorflow.lite.support.image.ops.ResizeOp
+import java.nio.ByteBuffer
+import java.nio.ByteOrder
+
+data class MidasDepthResult(
+    val relativeDepth: Float,
+    val absoluteDistanceMeters: Float?
+)
+
+class MidasDepthEstimator(private val context: Context) {
+
+    private var interpreter: Interpreter? = null
+
+    companion object {
+        private const val MODEL_NAME = "midas_v2_1_small.tflite"
+        private const val INPUT_SIZE = 256
+
+        private val NORM_MEAN = floatArrayOf(123.675f, 116.28f, 103.53f)
+        private val NORM_STD  = floatArrayOf(58.395f, 57.12f, 57.375f)
+    }
+
+    init {
+        setupInterpreter()
+    }
+
+    private fun setupInterpreter() {
+        try {
+            val files = context.assets.list("") ?: emptyArray()
+            if (!files.contains(MODEL_NAME)) return
+
+            val model = FileUtil.loadMappedFile(context, MODEL_NAME)
+            interpreter = Interpreter(model, Interpreter.Options().apply { setNumThreads(4) })
+        } catch (e: Exception) {
+            e.printStackTrace()
+        }
+    }
+
+    fun analyzeObject(
+        bitmap: Bitmap,
+        bbox: Rect,
+        realObjectHeightMeters: Float?,
+        focalLengthPixels: Float?
+    ): MidasDepthResult? {
+        val interp = interpreter ?: return null
+
+        try {
+            // 1. Preprocess
+            var tensorImage = TensorImage(org.tensorflow.lite.DataType.FLOAT32)
+            tensorImage.load(bitmap)
+
+            val processor = ImageProcessor.Builder()
+                .add(ResizeOp(INPUT_SIZE, INPUT_SIZE, ResizeOp.ResizeMethod.BILINEAR))
+                .add(NormalizeOp(NORM_MEAN, NORM_STD))
+                .build()
+
+            tensorImage = processor.process(tensorImage)
+
+            // 2. Output Buffer
+            val outShape = interp.getOutputTensor(0).shape()
+            val size = outShape[1] * outShape[2]
+            val output = ByteBuffer.allocateDirect(size * 4).order(ByteOrder.nativeOrder())
+
+            // 3. Run MiDaS
+            interp.run(tensorImage.buffer, output)
+
+            output.rewind()
+            val depthArray = FloatArray(size)
+            output.asFloatBuffer().get(depthArray)
+            
+            // Calculate median relative depth (inverse depth) from the BBOX region only?
+            // Usually MiDaS runs on full frame.
+            // If we want depth of the object, we should look at pixels corresponding to the bbox.
+            // But mapping bbox to 256x256 map requires scaling.
+            
+            // For now, let's keep it simple: Median of WHOLE FRAME (as relative depth context)
+            // OR median of the center?
+            // The previous implementation used median of whole frame.
+            // Let's refine it: Use median of the whole frame as 'relative depth'
+            // OR if you want object depth, we need to crop.
+            // Given the user wants "relative depth", median of frame is a common proxy for scene depth.
+            // But "distance to object" -> usually means object depth.
+            // Let's sample the center of the bbox in the depth map.
+            
+            // Map BBox center to 256x256
+            val cx = bbox.centerX()
+            val cy = bbox.centerY()
+            val mapX = (cx * INPUT_SIZE) / bitmap.width
+            val mapY = (cy * INPUT_SIZE) / bitmap.height
+            
+            // Clamp
+            val safeX = mapX.coerceIn(0, INPUT_SIZE - 1)
+            val safeY = mapY.coerceIn(0, INPUT_SIZE - 1)
+            
+            val depthIndex = safeY * INPUT_SIZE + safeX
+            val objectRelativeDepth = depthArray[depthIndex] 
+            // Note: MiDaS output is inverse depth (disparity). 
+            // Higher value = Closer.
+            
+            // 4. Absolute Distance (Pinhole)
+            val hPx = bbox.height().toFloat()
+            val absDistance = if (realObjectHeightMeters != null && focalLengthPixels != null && hPx > 0) {
+                (focalLengthPixels * realObjectHeightMeters) / hPx
+            } else {
+                null
+            }
+
+            return MidasDepthResult(
+                relativeDepth = objectRelativeDepth,
+                absoluteDistanceMeters = absDistance
+            )
+
+        } catch (e: Exception) {
+            e.printStackTrace()
+            return null
+        }
+    }
+    
+    // Kept for compatibility if needed, but analyzeObject is the new main entry
+    fun estimateDepth(bitmap: Bitmap): Float? {
+        // Fallback or simpler version
+        return analyzeObject(bitmap, Rect(0,0,bitmap.width, bitmap.height), null, null)?.relativeDepth
+    }
+}
--- a/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt
+++ b/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt
@ -7,10 +7,12 @@ import android.graphics.Matrix
 import android.provider.MediaStore
 import androidx.camera.core.ImageProxy
 import com.example.livingai.data.ml.DistanceEstimatorImpl
+import com.example.livingai.data.ml.MidasDepthEstimator
 import com.example.livingai.domain.ml.AIModel
-import com.example.livingai.domain.ml.DistanceState
 import com.example.livingai.domain.ml.FrameMetadataProvider
 import com.example.livingai.domain.ml.FrameMetadataProvider.toFrameData
+import com.example.livingai.domain.ml.Orientation
+import com.example.livingai.domain.ml.OrientationState
 import com.example.livingai.domain.repository.CameraRepository
 import com.example.livingai.utils.TiltSensorManager
 import kotlinx.coroutines.Dispatchers
@ -23,9 +25,9 @@ class CameraRepositoryImpl(
 ) : CameraRepository {

    private val distanceEstimator = DistanceEstimatorImpl()
+    private val midasEstimator = MidasDepthEstimator(context)

    init {
-        // inject dependencies into metadata provider
        FrameMetadataProvider.aiModel = aiModel
        FrameMetadataProvider.tiltSensorManager = tiltSensorManager
    }
@ -37,22 +39,61 @@ class CameraRepositoryImpl(
            imageProxy.close()

            if (rotation != 0) {
-                val m = Matrix().apply { postRotate(rotation.toFloat()) }
-                Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, m, true)
+                val matrix = Matrix().apply { postRotate(rotation.toFloat()) }
+                Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
            } else bitmap
        }

-    override suspend fun processFrame(bitmap: Bitmap): DistanceState =
-        withContext(Dispatchers.Default) {
+    override suspend fun processFrame(
+        bitmap: Bitmap,
+        requestedOrientation: Orientation,
+        silhouetteBitmap: Bitmap,
+        realObjectHeightMeters: Float?,   // ★ NEW PARAM
+        focalLengthPixels: Float          // from camera intrinsics
+    ): OrientationState = withContext(Dispatchers.Default) {

-            // 1. Collect metadata
+        // 1. Collect segmentation
        val meta = FrameMetadataProvider.collectMetadata(bitmap)
+        val bbox = meta.segmentationBox
+        val mask = meta.segmentationMaskBitmap

-            // 2. Convert to FrameData
-            val frameData = meta.toFrameData(bitmap)
+        if (bbox == null || mask == null) {
+            return@withContext OrientationState(
+                success = false,
+                reason = "Segmentation missing",
+                pixelMetrics = null,
+                orientationMatched = false,
+                iouScore = null,
+                relativeDepth = null,
+                absoluteDistanceMeters = null
+            )
+        }

-            // 3. Run distance estimator
-            distanceEstimator.processFrame(frameData)
+        // 2. MiDaS (relative + absolute if reference height provided)
+        val midasResult = midasEstimator.analyzeObject(
+            bitmap = bitmap,
+            bbox = bbox,
+            realObjectHeightMeters = realObjectHeightMeters,
+            focalLengthPixels = focalLengthPixels
+        )
+
+        // 3. Build FrameData with relative depth only
+        val frameData = meta.toFrameData(bitmap).copy(
+            medianDepth = midasResult?.relativeDepth
+        )
+
+        // 4. Orientation detection
+        val orientationState = distanceEstimator.processFrame(
+            frameData = frameData,
+            requestedOrientation = requestedOrientation,
+            silhouetteBitmap = silhouetteBitmap
+        )
+
+        // 5. Inject relative + absolute values into final result
+        orientationState.copy(
+            relativeDepth = midasResult?.relativeDepth,
+            absoluteDistanceMeters = midasResult?.absoluteDistanceMeters
+        )
    }

    override suspend fun saveImage(
@ -60,6 +101,7 @@ class CameraRepositoryImpl(
        animalId: String,
        orientation: String?
    ): String = withContext(Dispatchers.IO) {
+
        val suffix = orientation?.let { "_$it" } ?: ""
        val fileName = "$animalId$suffix.jpg"

@ -74,7 +116,7 @@ class CameraRepositoryImpl(

        val resolver = context.contentResolver
        val uri = resolver.insert(MediaStore.Images.Media.EXTERNAL_CONTENT_URI, values)
-            ?: throw RuntimeException("Image insert failed")
+            ?: throw RuntimeException("Failed to insert image")

        try {
            resolver.openOutputStream(uri)?.use { out ->
@ -86,6 +128,7 @@ class CameraRepositoryImpl(
                values.put(MediaStore.Images.Media.IS_PENDING, 0)
                resolver.update(uri, values, null, null)
            }
+
        } catch (e: Exception) {
            resolver.delete(uri, null, null)
            throw e
--- a/app/src/main/java/com/example/livingai/domain/ml/CameraIntrinsicsFetcher.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/CameraIntrinsicsFetcher.kt
@ -0,0 +1,77 @@
+package com.example.livingai.domain.ml
+
+import android.content.Context
+import android.graphics.Rect
+import android.hardware.camera2.CameraCharacteristics
+import android.hardware.camera2.CameraManager
+import android.util.Size
+import android.util.SizeF
+
+/**
+ * Utility to read camera intrinsics from Camera2 and compute focal length (pixels).
+ *
+ * Usage:
+ *  val (fPx, imgW, imgH) = CameraIntrinsicsFetcher.fetch(context, cameraId, imageSize)
+ *  CameraInfoProvider.init(CameraInfoData(fPx, imgW, imgH, px, py, ...))
+ *
+ * imageSize = the resolution you will actually receive from the ImageReader / CameraX output (width,height)
+ *
+ * Formula:
+ *   f_px = f_mm / sensorWidth_mm * imageWidth_px
+ *
+ * More accurate: use activeArray size mapping to sensor physical size if needed.
+ */
+object CameraIntrinsicsFetcher {
+
+    data class Result(
+        val focalLengthPixels: Float,
+        val imageWidthPx: Int,
+        val imageHeightPx: Int,
+        val principalPointX: Float,
+        val principalPointY: Float,
+        val sensorPhysicalSizeMm: SizeF?
+    )
+
+    /**
+     * cameraId = device camera id (get from CameraManager)
+     * imageSize = the actual output image size you will capture (e.g., 1920x1080)
+     */
+    fun fetch(context: Context, cameraId: String, imageSize: Size): Result {
+        val mgr = context.getSystemService(Context.CAMERA_SERVICE) as CameraManager
+        val characteristics = mgr.getCameraCharacteristics(cameraId)
+
+        val focalLengths = characteristics.get(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)
+        val fMm = when {
+            focalLengths != null && focalLengths.isNotEmpty() -> focalLengths[0]  // mm
+            else -> 4.0f
+        }
+
+        val sensorSize = characteristics.get(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE) // in mm
+        val sensorSizeMm = sensorSize
+
+        // active array size gives pixel array cropping of sensor -> map principal point
+        val activeRect = characteristics.get(CameraCharacteristics.SENSOR_INFO_ACTIVE_ARRAY_SIZE) // Rect
+        val activeRectW = activeRect?.width() ?: imageSize.width
+        val activeRectH = activeRect?.height() ?: imageSize.height
+
+        // Compute focal in pixels: ratio f_mm / sensorWidth_mm * imageWidth_px
+        val fPx = if (sensorSizeMm != null && sensorSizeMm.width > 0f) {
+            (fMm / sensorSizeMm.width) * imageSize.width
+        } else {
+            // fallback: estimate based on sensor pixel array
+            (fMm / 4.0f) * imageSize.width
+        }
+
+        val principalX = (activeRect?.centerX() ?: imageSize.width / 2).toFloat()
+        val principalY = (activeRect?.centerY() ?: imageSize.height / 2).toFloat()
+
+        return Result(
+            focalLengthPixels = fPx,
+            imageWidthPx = imageSize.width,
+            imageHeightPx = imageSize.height,
+            principalPointX = principalX,
+            principalPointY = principalY,
+            sensorPhysicalSizeMm = sensorSizeMm
+        )
+    }
+}
--- a/app/src/main/java/com/example/livingai/domain/ml/DistanceEstimator.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/DistanceEstimator.kt
@ -2,7 +2,6 @@ package com.example.livingai.domain.ml

 import android.graphics.Bitmap
 import android.graphics.Rect
-import kotlin.math.abs

 /**
 * Interface for all distance estimators.
@ -14,28 +13,7 @@ interface DistanceEstimator {
    ): DistanceState
 }

-/**
- * Frame-specific data for one inference cycle.
- */
-data class FrameData(
-    val imageBitmap: Bitmap?,
-    val segmentationBox: Rect?,
-    val segmentationMaskBitmap: Bitmap?,
-
-    // Optional ARCore depth inputs
-    val depthMapMeters: FloatArray?,     // row-major R* C
-    val depthWidth: Int = 0,
-    val depthHeight: Int = 0,
-    val depthConfidence: FloatArray? = null,
-
-    // IMU orientation
-    val imuPitchDegrees: Float = 0f,
-    val imuRollDegrees: Float = 0f,
-    val imuYawDegrees: Float = 0f,
-
-    val cameraRotationDegrees: Int = 0,
-    val timestampMs: Long = System.currentTimeMillis()
-)
+// FrameData is defined in FrameData.kt

 /**
 * Singleton-provided camera intrinsics for metric calculations.
--- a/app/src/main/java/com/example/livingai/domain/ml/FrameData.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/FrameData.kt
@ -0,0 +1,32 @@
+package com.example.livingai.domain.ml
+
+import android.graphics.Bitmap
+import android.graphics.Rect
+
+/**
+ * Frame-specific data for one inference cycle.
+ */
+data class FrameData(
+    val imageBitmap: Bitmap?,
+    val segmentationBox: Rect?,
+    val segmentationMaskBitmap: Bitmap?,
+
+    // Optional ARCore depth inputs
+    val depthMapMeters: FloatArray?,     // row-major R* C
+    val depthWidth: Int = 0,
+    val depthHeight: Int = 0,
+    val depthConfidence: FloatArray? = null,
+
+    // IMU orientation
+    val imuPitchDegrees: Float = 0f,
+    val imuRollDegrees: Float = 0f,
+    val imuYawDegrees: Float = 0f,
+
+    val cameraRotationDegrees: Int = 0,
+    val timestampMs: Long = System.currentTimeMillis(),
+
+    //relative
+    val imageWidth: Int = 0,
+    val imageHeight: Int = 0,
+    val medianDepth: Float? = null
+)
--- a/app/src/main/java/com/example/livingai/domain/ml/FrameMetadataProvider.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/FrameMetadataProvider.kt
@ -15,14 +15,15 @@ object FrameMetadataProvider {

    suspend fun getSegmentation(bitmap: Bitmap): SegmentationResult? {
        return try {
-            val (_, booleanMask, bbox) = aiModel.segmentImage(bitmap) ?: return null
-            SegmentationResult(booleanMask, bbox)
+            val (maskBitmap, booleanMask, bbox) = aiModel.segmentImage(bitmap) ?: return null
+            SegmentationResult(maskBitmap, booleanMask, bbox)
        } catch (_: Exception) {
            null
        }
    }

    data class SegmentationResult(
+        val maskBitmap: Bitmap?,
        val mask: BooleanArray,
        val boundingBox: Rect
    ) {
@ -32,6 +33,7 @@ object FrameMetadataProvider {

            other as SegmentationResult

+            if (maskBitmap != other.maskBitmap) return false
            if (!mask.contentEquals(other.mask)) return false
            if (boundingBox != other.boundingBox) return false

@ -39,7 +41,8 @@ object FrameMetadataProvider {
        }

        override fun hashCode(): Int {
-            var result = mask.contentHashCode()
+            var result = maskBitmap?.hashCode() ?: 0
+            result = 31 * result + mask.contentHashCode()
            result = 31 * result + boundingBox.hashCode()
            return result
        }
@ -96,6 +99,7 @@ object FrameMetadataProvider {
    }

    data class FrameCollectedMetadata(
+        val segmentationMaskBitmap: Bitmap?,
        val segmentationBox: Rect?,
        val depthMeters: FloatArray?,
        val depthWidth: Int,
@ -112,6 +116,7 @@ object FrameMetadataProvider {

            other as FrameCollectedMetadata

+            if (segmentationMaskBitmap != other.segmentationMaskBitmap) return false
            if (segmentationBox != other.segmentationBox) return false
            if (depthMeters != null) {
                if (other.depthMeters == null) return false
@ -132,7 +137,8 @@ object FrameMetadataProvider {
        }

        override fun hashCode(): Int {
-            var result = segmentationBox?.hashCode() ?: 0
+            var result = segmentationMaskBitmap?.hashCode() ?: 0
+            result = 31 * result + (segmentationBox?.hashCode() ?: 0)
            result = 31 * result + (depthMeters?.contentHashCode() ?: 0)
            result = 31 * result + depthWidth
            result = 31 * result + depthHeight
@ -152,6 +158,7 @@ object FrameMetadataProvider {
        val rot = getRotation()

        return FrameCollectedMetadata(
+            segmentationMaskBitmap = seg?.maskBitmap,
            segmentationBox = seg?.boundingBox,
            depthMeters = depth.depthMeters,
            depthWidth = depth.width,
@ -168,7 +175,7 @@ object FrameMetadataProvider {
        return FrameData(
            imageBitmap = bitmap,
            segmentationBox = segmentationBox,
-            segmentationMaskBitmap = null,
+            segmentationMaskBitmap = segmentationMaskBitmap,
            depthMapMeters = depthMeters,
            depthWidth = depthWidth,
            depthHeight = depthHeight,
@ -176,7 +183,12 @@ object FrameMetadataProvider {
            imuPitchDegrees = pitch,
            imuRollDegrees = roll,
            imuYawDegrees = yaw,
-            cameraRotationDegrees = rotationDegrees
+            cameraRotationDegrees = rotationDegrees,
+            
+            // New fields populated from bitmap if available or passed down
+            imageWidth = bitmap.width,
+            imageHeight = bitmap.height,
+            medianDepth = null // Can calculate median from depthMeters if needed
        )
    }
 }
--- a/app/src/main/java/com/example/livingai/domain/ml/OrientationAndPixelEstimator.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationAndPixelEstimator.kt
@ -0,0 +1,182 @@
+package com.example.livingai.domain.ml
+
+import android.graphics.Bitmap
+import android.graphics.Rect
+import kotlin.math.max
+import kotlin.math.min
+
+class OrientationPixelEstimator(
+    private val iouThreshold: Float = 0.60f
+) {
+
+    /**
+     * Main function:
+     *  - segmentationMaskBitmap: MLKit’s alpha mask (animal foreground)
+     *  - silhouetteBitmap: template mask for EXPECTED orientation (e.g., LEFT)
+     *  - bbox: detected bounding box from segmentation
+     */
+    fun analyze(
+        segmentationMaskBitmap: Bitmap,
+        silhouetteBitmap: Bitmap,
+        bbox: Rect,
+        frameWidth: Int,
+        frameHeight: Int,
+        medianDepthMeters: Float? = null
+    ): OrientationPixelResult {
+
+        // 1) Convert both masks → boolean
+        val segFullMask = bitmapToBooleanMask(segmentationMaskBitmap)
+        val silhouetteMask = bitmapToBooleanMask(silhouetteBitmap)
+
+        // 2) Crop segmentation mask to bbox
+        val croppedMask = cropMaskToBBox(segFullMask, frameWidth, frameHeight, bbox)
+
+        // 3) Scale silhouette mask to bbox size
+        val scaledSilhouette = scaleMask(
+            silhouetteMask,
+            silhouetteBitmap.width,
+            silhouetteBitmap.height,
+            bbox.width(),
+            bbox.height()
+        )
+
+        // 4) Compute IoU
+        val iou = computeIoU(croppedMask, scaledSilhouette)
+        val orientationMatched = iou >= iouThreshold
+
+        // 5) Pixel metrics extraction
+        val metrics = computePixelMetrics(croppedMask, bbox, medianDepthMeters)
+
+        return OrientationPixelResult(
+            orientationMatched = orientationMatched,
+            matchedOrientation = null,
+            iouScore = iou,
+            iouBestOther = 0f,
+            pixelMetrics = metrics
+        )
+    }
+
+    // -----------------------------
+    // MASK HELPERS
+    // -----------------------------
+
+    private fun bitmapToBooleanMask(bitmap: Bitmap): BooleanArray {
+        val w = bitmap.width
+        val h = bitmap.height
+        val pixels = IntArray(w * h)
+        bitmap.getPixels(pixels, 0, w, 0, 0, w, h)
+
+        val out = BooleanArray(w * h)
+        for (i in pixels.indices) {
+            val alpha = (pixels[i] ushr 24) and 0xFF
+            out[i] = alpha > 0
+        }
+        return out
+    }
+
+    private fun cropMaskToBBox(
+        fullMask: BooleanArray,
+        frameW: Int,
+        frameH: Int,
+        bbox: Rect
+    ): BooleanArray {
+
+        val left = max(0, bbox.left)
+        val top = max(0, bbox.top)
+        val right = min(frameW - 1, bbox.right)
+        val bottom = min(frameH - 1, bbox.bottom)
+
+        val width = right - left + 1
+        val height = bottom - top + 1
+
+        val out = BooleanArray(width * height)
+        var idx = 0
+
+        for (y in top..bottom) {
+            for (x in left..right) {
+                out[idx++] = fullMask[y * frameW + x]
+            }
+        }
+
+        return out
+    }
+
+    private fun scaleMask(
+        src: BooleanArray,
+        srcW: Int,
+        srcH: Int,
+        dstW: Int,
+        dstH: Int
+    ): BooleanArray {
+
+        val out = BooleanArray(dstW * dstH)
+
+        for (y in 0 until dstH) {
+            val sy = ((y.toFloat() / dstH) * srcH).toInt().coerceIn(0, srcH - 1)
+            for (x in 0 until dstW) {
+                val sx = ((x.toFloat() / dstW) * srcW).toInt().coerceIn(0, srcW - 1)
+                out[y * dstW + x] = src[sy * srcW + sx]
+            }
+        }
+
+        return out
+    }
+
+    private fun computeIoU(a: BooleanArray, b: BooleanArray): Float {
+        if (a.size != b.size) return 0f
+
+        var inter = 0
+        var union = 0
+
+        for (i in a.indices) {
+            val ai = a[i]
+            val bi = b[i]
+            if (ai || bi) union++
+            if (ai && bi) inter++
+        }
+
+        return if (union == 0) 0f else inter.toFloat() / union
+    }
+
+    // -----------------------------
+    // PIXEL METRICS
+    // -----------------------------
+
+    private fun computePixelMetrics(
+        croppedMask: BooleanArray,
+        bbox: Rect,
+        medianDepthMeters: Float?
+    ): PixelMetrics {
+
+        val w = bbox.width()
+        val h = bbox.height()
+
+        var count = 0
+        var sumX = 0L
+        var sumY = 0L
+
+        for (y in 0 until h) {
+            for (x in 0 until w) {
+                if (croppedMask[y * w + x]) {
+                    count++
+                    sumX += x
+                    sumY += y
+                }
+            }
+        }
+
+        val centroidX = bbox.left + (sumX.toFloat() / max(1, count))
+        val centroidY = bbox.top + (sumY.toFloat() / max(1, count))
+
+        return PixelMetrics(
+            widthPx = w,
+            heightPx = h,
+            areaPx = count,
+            centroidX = centroidX,
+            centroidY = centroidY,
+            distanceProxyInvHeight = if (h > 0) 1f / h.toFloat() else Float.POSITIVE_INFINITY,
+            heightPxFloat = h.toFloat(),
+            medianDepthMeters = medianDepthMeters
+        )
+    }
+}
--- a/app/src/main/java/com/example/livingai/domain/ml/OrientationPixelResult.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationPixelResult.kt
@ -0,0 +1,24 @@
+package com.example.livingai.domain.ml
+
+data class OrientationPixelResult(
+    val orientationMatched: Boolean,     // true only if requested orientation is confidently matched
+    val matchedOrientation: Orientation?,// which orientation matched (if any)
+    val iouScore: Float,                 // IoU score for matched orientation (0..1)
+    val iouBestOther: Float,             // best IoU among other orientations
+    val pixelMetrics: PixelMetrics?      // null if orientation not matched
+)
+
+enum class Orientation {
+    LEFT, RIGHT, FRONT, BACK, LEFT_45, RIGHT_45, TOP, BOTTOM
+}
+
+data class PixelMetrics(
+    val widthPx: Int,
+    val heightPx: Int,
+    val areaPx: Int,
+    val centroidX: Float,
+    val centroidY: Float,
+    val distanceProxyInvHeight: Float,  // 1 / heightPx (relative distance proxy)
+    val heightPxFloat: Float,           // convenience
+    val medianDepthMeters: Float?       // if depth map available (null otherwise)
+)
--- a/app/src/main/java/com/example/livingai/domain/ml/OrientationState.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationState.kt
@ -0,0 +1,11 @@
+package com.example.livingai.domain.ml
+
+data class OrientationState(
+    val success: Boolean,
+    val reason: String,
+    val pixelMetrics: PixelMetrics?,
+    val orientationMatched: Boolean,
+    val iouScore: Float? = null,
+    val relativeDepth: Float? = null,
+    val absoluteDistanceMeters: Float? = null
+)
--- a/app/src/main/java/com/example/livingai/domain/ml/OrientationTemplate.kt
+++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationTemplate.kt
@ -0,0 +1,8 @@
+package com.example.livingai.domain.ml
+
+data class OrientationTemplate(
+    val orientation: Orientation,
+    val mask: BooleanArray,
+    val templateWidth: Int,
+    val templateHeight: Int
+)
--- a/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt
+++ b/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt
@ -2,10 +2,17 @@ package com.example.livingai.domain.repository

 import android.graphics.Bitmap
 import androidx.camera.core.ImageProxy
-import com.example.livingai.domain.ml.DistanceState
+import com.example.livingai.domain.ml.Orientation
+import com.example.livingai.domain.ml.OrientationState

 interface CameraRepository {
    suspend fun captureImage(imageProxy: ImageProxy): Bitmap
-    suspend fun processFrame(bitmap: Bitmap): DistanceState
+    suspend fun processFrame(
+        bitmap: Bitmap,
+        requestedOrientation: Orientation,
+        silhouetteBitmap: Bitmap,
+        realObjectHeightMeters: Float?,
+        focalLengthPixels: Float
+    ): OrientationState
    suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String
 }
--- a/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt
+++ b/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt
@ -129,8 +129,8 @@ fun CameraScreen(
                    CameraPreview(
                        modifier = Modifier.fillMaxSize(),
                        controller = controller,
-                        onFrame = { bitmap, rotation, _ ->
-                            viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation))
+                        onFrame = { bitmap, rotation, fxPixels ->
+                            viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
                        }
                    )

@ -157,7 +157,7 @@ fun CameraScreen(
                }

                // Debug Overlay
-                state.distanceState?.let { dist ->
+                state.orientationState?.let { orient ->
                    Box(
                        modifier = Modifier
                            .align(Alignment.TopEnd)
@ -166,13 +166,25 @@ fun CameraScreen(
                            .padding(8.dp)
                    ) {
                        Column {
-                            Text("Dist: ${dist.distanceMeters ?: "N/A"}", color = Color.White)
-                            Text("Rec: ${dist.recommendation}", color = Color.White)
-                            Text("Tilted: ${dist.isCameraTilted}", color = Color.White)
-                            Text("Rotated: ${dist.isCameraRotated}", color = Color.White)
-                            Text("Centered: ${dist.isObjectCentered}", color = Color.White)
-                            Text("Ready: ${dist.readyToCapture}", color = Color.White)
-                            Text("Conf: ${dist.confidenceScore}", color = Color.White)
+                            Text("Success: ${orient.success}", color = Color.White)
+                            Text("Reason: ${orient.reason}", color = Color.White)
+                            
+                            orient.pixelMetrics?.let { pm ->
+                                Text("Width (px): ${pm.widthPx}", color = Color.White)
+                                Text("Height (px): ${pm.heightPx}", color = Color.White)
+                            }
+                            
+                            // Display depth metrics from OrientationState
+                            orient.relativeDepth?.let { rel ->
+                                Text("Rel Depth: %.4f".format(rel), color = Color.White)
+                            }
+                            
+                            orient.absoluteDistanceMeters?.let { abs ->
+                                Text("Dist (m): %.2f".format(abs), color = Color.White)
+                            }
+
+                            Text("IOU: ${orient.iouScore}", color = Color.White)
+                            Text("Matched: ${orient.orientationMatched}", color = Color.White)
                        }
                    }
                }
--- a/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt
+++ b/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt
@ -7,7 +7,8 @@ import androidx.camera.core.ImageProxy
 import androidx.lifecycle.ViewModel
 import androidx.lifecycle.viewModelScope
 import com.example.livingai.domain.ml.AIModel
-import com.example.livingai.domain.ml.DistanceState
+import com.example.livingai.domain.ml.Orientation
+import com.example.livingai.domain.ml.OrientationState
 import com.example.livingai.domain.repository.CameraRepository
 import com.example.livingai.domain.usecases.AppDataUseCases
 import com.example.livingai.utils.ScreenDimensions
@ -47,7 +48,7 @@ class CameraViewModel(
    fun onEvent(event: CameraEvent) {
        when (event) {
            is CameraEvent.ImageCaptured -> handleImageProxy(event.imageProxy)
-            is CameraEvent.FrameReceived -> handleFrame(event.bitmap, event.rotationDegrees)
+            is CameraEvent.FrameReceived -> handleFrame(event.bitmap, event.rotationDegrees, event.focalLengthPixels)
            is CameraEvent.ClearCapturedImage -> clearCaptured()
            is CameraEvent.SetContext -> setContext(event.animalId, event.orientation)
            is CameraEvent.AutoCaptureTriggered -> {
@ -87,7 +88,7 @@ class CameraViewModel(
        }
    }

-    private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int) {
+    private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) {
        if (_state.value.isCapturing || _state.value.shouldAutoCapture) {
            return
        }
@ -95,8 +96,22 @@ class CameraViewModel(
        if (isProcessingFrame.compareAndSet(false, true)) {
            viewModelScope.launch {
                try {
-                    // Process the frame for distance and metadata
-                    val distanceState = cameraRepository.processFrame(bitmap)
+                    val currentOrientationStr = _state.value.orientation
+                    val silhouette = _state.value.savedMaskBitmap
+
+                    val orientationState = if (currentOrientationStr != null && silhouette != null) {
+                        val orientationEnum = mapStringToOrientation(currentOrientationStr)
+                        
+                        cameraRepository.processFrame(
+                            bitmap, 
+                            orientationEnum, 
+                            silhouette,
+                            1.55f, 
+                            focalLengthPixels
+                        )
+                    } else {
+                        null
+                    }
                    
                    val result = aiModel.segmentImage(bitmap)
                    if (result != null) {
@ -124,7 +139,7 @@ class CameraViewModel(

                        _state.value = _state.value.copy(
                            segmentationMask = output,
-                            distanceState = distanceState
+                            orientationState = orientationState
                        )

                        if (_state.value.isAutoCaptureEnabled && 
@ -145,7 +160,7 @@ class CameraViewModel(
                    } else {
                        _state.value = _state.value.copy(
                            segmentationMask = null,
-                            distanceState = distanceState
+                            orientationState = orientationState
                        )
                    }
                } finally {
@ -154,6 +169,18 @@ class CameraViewModel(
            }
        }
    }
+
+    private fun mapStringToOrientation(orientation: String): Orientation {
+        return when (orientation.lowercase()) {
+            "front" -> Orientation.FRONT
+            "back" -> Orientation.BACK
+            "left" -> Orientation.LEFT
+            "right" -> Orientation.RIGHT
+            "leftangle" -> Orientation.LEFT_45
+            "rightangle" -> Orientation.RIGHT_45
+            else -> Orientation.FRONT
+        }
+    }
 }

 data class CameraUiState(
@ -168,12 +195,12 @@ data class CameraUiState(
    val matchThreshold: Int = 50,
    val distanceMethod: String = "Jaccard",
    val shouldAutoCapture: Boolean = false,
-    val distanceState: DistanceState? = null
+    val orientationState: OrientationState? = null
 )

 sealed class CameraEvent {
    data class ImageCaptured(val imageProxy: ImageProxy) : CameraEvent()
-    data class FrameReceived(val bitmap: Bitmap, val rotationDegrees: Int) : CameraEvent()
+    data class FrameReceived(val bitmap: Bitmap, val rotationDegrees: Int, val focalLengthPixels: Float) : CameraEvent()
    object ClearCapturedImage : CameraEvent()
    data class SetContext(val animalId: String, val orientation: String?) : CameraEvent()
    object AutoCaptureTriggered : CameraEvent()
--- a/app/src/main/java/com/example/livingai/pages/components/CameraPreview.kt
+++ b/app/src/main/java/com/example/livingai/pages/components/CameraPreview.kt
@ -3,6 +3,7 @@ package com.example.livingai.pages.components
 import android.annotation.SuppressLint
 import android.graphics.Bitmap
 import android.hardware.camera2.CameraCharacteristics
+import android.util.SizeF
 import android.view.ViewGroup
 import androidx.annotation.OptIn
 import androidx.camera.camera2.interop.Camera2CameraInfo
@ -34,9 +35,8 @@ fun CameraPreview(

    val cameraController = controller ?: remember { LifecycleCameraController(context) }

-    // State to hold the focal length. 
-    // Updated on the Main thread, read by the analysis background thread.
-    val focalLengthState = remember { mutableStateOf(0f) }
+    // State to hold the focal length in PIXELS. 
+    val focalLengthPxState = remember { mutableStateOf(0f) }

    // Periodically check/update focal length on the Main thread
    LaunchedEffect(cameraController) {
@ -45,14 +45,48 @@ fun CameraPreview(
                val info = cameraController.cameraInfo
                if (info != null) {
                    val camera2Info = Camera2CameraInfo.from(info)
+                    
+                    // 1. Get Focal Length (mm)
                    val focalLengths = camera2Info.getCameraCharacteristic(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)
-                    val fl = focalLengths?.firstOrNull() ?: 0f
-                    focalLengthState.value = fl
+                    val focalLengthMm = focalLengths?.firstOrNull() ?: 0f
+                    
+                    // 2. Get Sensor Size (mm)
+                    val sensorSize: SizeF? = camera2Info.getCameraCharacteristic(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE)
+                    val sensorWidthMm = sensorSize?.width ?: 0f
+                    
+                    // 3. Get Image Width (pixels) - Assuming analysis width or preview width?
+                    // Usually analysis runs on a specific resolution (e.g. 640x480 or similar).
+                    // We can approximate or get it from the camera controller if accessible, 
+                    // but usually it depends on the ImageAnalysis use case resolution.
+                    // However, we can calculate fx_pixels if we assume the width of the captured frame (passed in onFrame)
+                    // But here we are outside onFrame.
+                    // Let's store the factors needed to calculate it inside onFrame.
+                    
+                    // Wait, we can pass focalLengthMm and sensorWidthMm to onFrame, and calculate there using bitmap.width.
+                    // Or we can just calculate a "pixels per mm" factor if we knew the resolution.
+                    // But resolution changes per frame (in onFrame).
+                    // So let's store focalLengthMm and sensorWidthMm in the state or pass them.
+                    
+                    // Actually, let's just pass `focalLengthMm` and `sensorWidthMm` via the callback if possible, 
+                    // or calculate a proxy here assuming a standard width? No, that's inaccurate.
+                    
+                    // Let's change the callback signature to accept (Bitmap, Int, Float, Float) -> Unit
+                    // Or just calculate it inside the analyzer block where we have the bitmap width.
+                    // But we need access to camera characteristics inside the analyzer? 
+                    // The analyzer runs on background thread. accessing camera2Info might be safe?
+                    // Yes, Camera2CameraInfo is thread safe.
+                    
+                    // To keep it simple, let's just update the state with the raw values we need.
+                    // But the `onFrame` callback currently expects `Float` (focalLength).
+                    // The prompt implies we should "get it from camera intrinsic api values".
+                    // The `focalLengthPixels` depends on the image width: F_px = (F_mm / SensorWidth_mm) * ImageWidth_px
+                    
+                    // We can't calculate F_px here without ImageWidth_px.
+                    // So we must do it in the analyzer.
                }
            } catch (e: Exception) {
-                // Ignore errors, e.g. if camera is closing or not ready
+                // Ignore errors
            }
-            // Check periodically in case the active camera changes
            delay(2000) 
        }
    }
@ -62,9 +96,29 @@ fun CameraPreview(
            cameraController.setImageAnalysisAnalyzer(cameraExecutor) { imageProxy ->
                val bitmap = imageProxy.toBitmap()
                val rotationDegrees = imageProxy.imageInfo.rotationDegrees
-                val currentFocalLength = focalLengthState.value
                
-                onFrame(bitmap, rotationDegrees, currentFocalLength)
+                // Calculate Focal Length in Pixels
+                var fxPixels = 0f
+                try {
+                    val info = cameraController.cameraInfo
+                    if (info != null) {
+                        val camera2Info = Camera2CameraInfo.from(info)
+                        val focalLengths = camera2Info.getCameraCharacteristic(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)
+                        val focalLengthMm = focalLengths?.firstOrNull() ?: 0f
+                        
+                        val sensorSize = camera2Info.getCameraCharacteristic(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE)
+                        val sensorWidthMm = sensorSize?.width ?: 0f
+                        
+                        if (sensorWidthMm > 0) {
+                            val imageWidth = bitmap.width.toFloat()
+                            fxPixels = (focalLengthMm / sensorWidthMm) * imageWidth
+                        }
+                    }
+                } catch (e: Exception) {
+                    e.printStackTrace()
+                }
+
+                onFrame(bitmap, rotationDegrees, fxPixels)
                imageProxy.close()
            }
        }
@ -94,7 +148,7 @@ fun CameraPreview(
            }
        },
        onRelease = {
-             // Cleanup if needed
+             // Cleanup
        }
    )
 }