relative est

This commit is contained in:
SaiD 2025-12-10 09:48:54 +05:30
parent c2f3bdd089
commit c445068773
19 changed files with 733 additions and 111 deletions

View File

@ -0,0 +1,4 @@
kotlin version: 2.0.21
error message: The daemon has terminated unexpectedly on startup attempt #1 with error code: 0. The daemon process output:
1. Kotlin compile daemon is ready

Binary file not shown.

View File

@ -32,10 +32,23 @@ class AIModelImpl : AIModel {
.addOnSuccessListener { result -> .addOnSuccessListener { result ->
val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null) val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null)
// Instead of coloring it here, just pass the original mask bitmap
// or ensure it's suitable for further processing.
// The foreground bitmap from MLKit is usually the object cut out with transparent background.
val booleanMask = createBooleanMask(fg) val booleanMask = createBooleanMask(fg)
// We return the raw foreground bitmap as the 'maskBitmap' for now,
// or a colorized version if that's what UI expects.
// But for IOU/Overlap calculation, we might want the binary info.
// The UI seems to overlay 'colorMask'.
// DistanceEstimator uses 'segMaskBitmap'.
val colorMask = createColorizedMask(fg) val colorMask = createColorizedMask(fg)
val bbox = computeBoundingBox(booleanMask, fg.width, fg.height) val bbox = computeBoundingBox(booleanMask, fg.width, fg.height)
// Returning colorMask as the first element because UI expects a visual overlay.
// But note: DistanceEstimator might need the binary mask or the foreground.
// If DistanceEstimator treats this bitmap as a mask, colorized is fine as long as alpha is preserved.
cont.resume(Triple(colorMask, booleanMask, bbox)) cont.resume(Triple(colorMask, booleanMask, bbox))
} }
.addOnFailureListener { e -> .addOnFailureListener { e ->
@ -52,6 +65,7 @@ class AIModelImpl : AIModel {
maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h) maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h)
for (i in pixels.indices) { for (i in pixels.indices) {
// ML Kit Foreground Bitmap: Non-transparent pixels are the object.
if (Color.alpha(pixels[i]) > 0) { if (Color.alpha(pixels[i]) > 0) {
pixels[i] = MASK_COLOR pixels[i] = MASK_COLOR
} }

View File

@ -1,49 +1,53 @@
package com.example.livingai.data.ml package com.example.livingai.data.ml
import com.example.livingai.domain.ml.ArcoreDepthEstimator import android.graphics.Bitmap
import com.example.livingai.domain.ml.CameraInfoData
import com.example.livingai.domain.ml.CameraInfoProvider
import com.example.livingai.domain.ml.DistanceEstimator
import com.example.livingai.domain.ml.DistanceRecommendation
import com.example.livingai.domain.ml.DistanceState import com.example.livingai.domain.ml.DistanceState
import com.example.livingai.domain.ml.FrameData import com.example.livingai.domain.ml.FrameData
import com.example.livingai.domain.ml.KnownDimensionEstimator import com.example.livingai.domain.ml.Orientation
import com.example.livingai.utils.Constants import com.example.livingai.domain.ml.OrientationPixelEstimator
import com.example.livingai.domain.ml.OrientationState
class DistanceEstimatorImpl( class DistanceEstimatorImpl {
private val mainEstimator: DistanceEstimator = ArcoreDepthEstimator(),
private val fallbackEstimator: DistanceEstimator = KnownDimensionEstimator()
) {
fun processFrame(frame: FrameData): DistanceState { private val orientationEstimator = OrientationPixelEstimator(iouThreshold = 0.60f)
// Fallback or retrieve camera info
val camInfo = CameraInfoProvider.tryGet()
?: createFallbackCameraInfo(frame)
val main = mainEstimator.analyze(frame, camInfo) fun processFrame(
return main.distanceMeters?.let { main } frameData: FrameData,
?: fallbackEstimator.analyze(frame, camInfo) requestedOrientation: Orientation,
} silhouetteBitmap: Bitmap
): OrientationState {
private fun createFallbackCameraInfo(frame: FrameData): CameraInfoData { val segMaskBitmap = frameData.segmentationMaskBitmap
// Estimate focal length based on FOV if available, or a reasonable default ?: return OrientationState(
// For a typical phone: success = false,
// H-FOV ~ 60-70 degrees reason = "No segmentation mask",
// fx = (W/2) / tan(FOV/2) pixelMetrics = null,
val w = frame.imageBitmap?.width ?: 1080 orientationMatched = false
val h = frame.imageBitmap?.height ?: 1920 )
// Assume approx 60 degrees horizontal FOV as a fallback val bbox = frameData.segmentationBox
val fovDegrees = 60.0 ?: return OrientationState(
val fovRadians = Math.toRadians(fovDegrees) success = false,
val focalLengthPx = (w / 2.0) / Math.tan(fovRadians / 2.0) reason = "No bounding box",
pixelMetrics = null,
orientationMatched = false
)
return CameraInfoData( val result = orientationEstimator.analyze(
focalLengthPixels = focalLengthPx.toFloat(), segmentationMaskBitmap = segMaskBitmap,
sensorWidthPx = w, silhouetteBitmap = silhouetteBitmap,
sensorHeightPx = h, bbox = bbox,
principalPointX = w / 2f, frameWidth = frameData.imageWidth,
principalPointY = h / 2f frameHeight = frameData.imageHeight,
medianDepthMeters = frameData.medianDepth
)
return OrientationState(
success = result.orientationMatched,
reason = if (result.orientationMatched) "OK" else "Orientation mismatch",
pixelMetrics = result.pixelMetrics,
orientationMatched = result.orientationMatched,
iouScore = result.iouScore
) )
} }
} }

View File

@ -0,0 +1,133 @@
package com.example.livingai.data.ml
import android.content.Context
import android.graphics.Bitmap
import android.graphics.Rect
import org.tensorflow.lite.Interpreter
import org.tensorflow.lite.support.common.FileUtil
import org.tensorflow.lite.support.common.ops.NormalizeOp
import org.tensorflow.lite.support.image.ImageProcessor
import org.tensorflow.lite.support.image.TensorImage
import org.tensorflow.lite.support.image.ops.ResizeOp
import java.nio.ByteBuffer
import java.nio.ByteOrder
data class MidasDepthResult(
val relativeDepth: Float,
val absoluteDistanceMeters: Float?
)
class MidasDepthEstimator(private val context: Context) {
private var interpreter: Interpreter? = null
companion object {
private const val MODEL_NAME = "midas_v2_1_small.tflite"
private const val INPUT_SIZE = 256
private val NORM_MEAN = floatArrayOf(123.675f, 116.28f, 103.53f)
private val NORM_STD = floatArrayOf(58.395f, 57.12f, 57.375f)
}
init {
setupInterpreter()
}
private fun setupInterpreter() {
try {
val files = context.assets.list("") ?: emptyArray()
if (!files.contains(MODEL_NAME)) return
val model = FileUtil.loadMappedFile(context, MODEL_NAME)
interpreter = Interpreter(model, Interpreter.Options().apply { setNumThreads(4) })
} catch (e: Exception) {
e.printStackTrace()
}
}
fun analyzeObject(
bitmap: Bitmap,
bbox: Rect,
realObjectHeightMeters: Float?,
focalLengthPixels: Float?
): MidasDepthResult? {
val interp = interpreter ?: return null
try {
// 1. Preprocess
var tensorImage = TensorImage(org.tensorflow.lite.DataType.FLOAT32)
tensorImage.load(bitmap)
val processor = ImageProcessor.Builder()
.add(ResizeOp(INPUT_SIZE, INPUT_SIZE, ResizeOp.ResizeMethod.BILINEAR))
.add(NormalizeOp(NORM_MEAN, NORM_STD))
.build()
tensorImage = processor.process(tensorImage)
// 2. Output Buffer
val outShape = interp.getOutputTensor(0).shape()
val size = outShape[1] * outShape[2]
val output = ByteBuffer.allocateDirect(size * 4).order(ByteOrder.nativeOrder())
// 3. Run MiDaS
interp.run(tensorImage.buffer, output)
output.rewind()
val depthArray = FloatArray(size)
output.asFloatBuffer().get(depthArray)
// Calculate median relative depth (inverse depth) from the BBOX region only?
// Usually MiDaS runs on full frame.
// If we want depth of the object, we should look at pixels corresponding to the bbox.
// But mapping bbox to 256x256 map requires scaling.
// For now, let's keep it simple: Median of WHOLE FRAME (as relative depth context)
// OR median of the center?
// The previous implementation used median of whole frame.
// Let's refine it: Use median of the whole frame as 'relative depth'
// OR if you want object depth, we need to crop.
// Given the user wants "relative depth", median of frame is a common proxy for scene depth.
// But "distance to object" -> usually means object depth.
// Let's sample the center of the bbox in the depth map.
// Map BBox center to 256x256
val cx = bbox.centerX()
val cy = bbox.centerY()
val mapX = (cx * INPUT_SIZE) / bitmap.width
val mapY = (cy * INPUT_SIZE) / bitmap.height
// Clamp
val safeX = mapX.coerceIn(0, INPUT_SIZE - 1)
val safeY = mapY.coerceIn(0, INPUT_SIZE - 1)
val depthIndex = safeY * INPUT_SIZE + safeX
val objectRelativeDepth = depthArray[depthIndex]
// Note: MiDaS output is inverse depth (disparity).
// Higher value = Closer.
// 4. Absolute Distance (Pinhole)
val hPx = bbox.height().toFloat()
val absDistance = if (realObjectHeightMeters != null && focalLengthPixels != null && hPx > 0) {
(focalLengthPixels * realObjectHeightMeters) / hPx
} else {
null
}
return MidasDepthResult(
relativeDepth = objectRelativeDepth,
absoluteDistanceMeters = absDistance
)
} catch (e: Exception) {
e.printStackTrace()
return null
}
}
// Kept for compatibility if needed, but analyzeObject is the new main entry
fun estimateDepth(bitmap: Bitmap): Float? {
// Fallback or simpler version
return analyzeObject(bitmap, Rect(0,0,bitmap.width, bitmap.height), null, null)?.relativeDepth
}
}

View File

@ -7,10 +7,12 @@ import android.graphics.Matrix
import android.provider.MediaStore import android.provider.MediaStore
import androidx.camera.core.ImageProxy import androidx.camera.core.ImageProxy
import com.example.livingai.data.ml.DistanceEstimatorImpl import com.example.livingai.data.ml.DistanceEstimatorImpl
import com.example.livingai.data.ml.MidasDepthEstimator
import com.example.livingai.domain.ml.AIModel import com.example.livingai.domain.ml.AIModel
import com.example.livingai.domain.ml.DistanceState
import com.example.livingai.domain.ml.FrameMetadataProvider import com.example.livingai.domain.ml.FrameMetadataProvider
import com.example.livingai.domain.ml.FrameMetadataProvider.toFrameData import com.example.livingai.domain.ml.FrameMetadataProvider.toFrameData
import com.example.livingai.domain.ml.Orientation
import com.example.livingai.domain.ml.OrientationState
import com.example.livingai.domain.repository.CameraRepository import com.example.livingai.domain.repository.CameraRepository
import com.example.livingai.utils.TiltSensorManager import com.example.livingai.utils.TiltSensorManager
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
@ -23,9 +25,9 @@ class CameraRepositoryImpl(
) : CameraRepository { ) : CameraRepository {
private val distanceEstimator = DistanceEstimatorImpl() private val distanceEstimator = DistanceEstimatorImpl()
private val midasEstimator = MidasDepthEstimator(context)
init { init {
// inject dependencies into metadata provider
FrameMetadataProvider.aiModel = aiModel FrameMetadataProvider.aiModel = aiModel
FrameMetadataProvider.tiltSensorManager = tiltSensorManager FrameMetadataProvider.tiltSensorManager = tiltSensorManager
} }
@ -37,22 +39,61 @@ class CameraRepositoryImpl(
imageProxy.close() imageProxy.close()
if (rotation != 0) { if (rotation != 0) {
val m = Matrix().apply { postRotate(rotation.toFloat()) } val matrix = Matrix().apply { postRotate(rotation.toFloat()) }
Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, m, true) Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
} else bitmap } else bitmap
} }
override suspend fun processFrame(bitmap: Bitmap): DistanceState = override suspend fun processFrame(
withContext(Dispatchers.Default) { bitmap: Bitmap,
requestedOrientation: Orientation,
silhouetteBitmap: Bitmap,
realObjectHeightMeters: Float?, // ★ NEW PARAM
focalLengthPixels: Float // from camera intrinsics
): OrientationState = withContext(Dispatchers.Default) {
// 1. Collect metadata // 1. Collect segmentation
val meta = FrameMetadataProvider.collectMetadata(bitmap) val meta = FrameMetadataProvider.collectMetadata(bitmap)
val bbox = meta.segmentationBox
val mask = meta.segmentationMaskBitmap
// 2. Convert to FrameData if (bbox == null || mask == null) {
val frameData = meta.toFrameData(bitmap) return@withContext OrientationState(
success = false,
reason = "Segmentation missing",
pixelMetrics = null,
orientationMatched = false,
iouScore = null,
relativeDepth = null,
absoluteDistanceMeters = null
)
}
// 3. Run distance estimator // 2. MiDaS (relative + absolute if reference height provided)
distanceEstimator.processFrame(frameData) val midasResult = midasEstimator.analyzeObject(
bitmap = bitmap,
bbox = bbox,
realObjectHeightMeters = realObjectHeightMeters,
focalLengthPixels = focalLengthPixels
)
// 3. Build FrameData with relative depth only
val frameData = meta.toFrameData(bitmap).copy(
medianDepth = midasResult?.relativeDepth
)
// 4. Orientation detection
val orientationState = distanceEstimator.processFrame(
frameData = frameData,
requestedOrientation = requestedOrientation,
silhouetteBitmap = silhouetteBitmap
)
// 5. Inject relative + absolute values into final result
orientationState.copy(
relativeDepth = midasResult?.relativeDepth,
absoluteDistanceMeters = midasResult?.absoluteDistanceMeters
)
} }
override suspend fun saveImage( override suspend fun saveImage(
@ -60,6 +101,7 @@ class CameraRepositoryImpl(
animalId: String, animalId: String,
orientation: String? orientation: String?
): String = withContext(Dispatchers.IO) { ): String = withContext(Dispatchers.IO) {
val suffix = orientation?.let { "_$it" } ?: "" val suffix = orientation?.let { "_$it" } ?: ""
val fileName = "$animalId$suffix.jpg" val fileName = "$animalId$suffix.jpg"
@ -74,7 +116,7 @@ class CameraRepositoryImpl(
val resolver = context.contentResolver val resolver = context.contentResolver
val uri = resolver.insert(MediaStore.Images.Media.EXTERNAL_CONTENT_URI, values) val uri = resolver.insert(MediaStore.Images.Media.EXTERNAL_CONTENT_URI, values)
?: throw RuntimeException("Image insert failed") ?: throw RuntimeException("Failed to insert image")
try { try {
resolver.openOutputStream(uri)?.use { out -> resolver.openOutputStream(uri)?.use { out ->
@ -86,6 +128,7 @@ class CameraRepositoryImpl(
values.put(MediaStore.Images.Media.IS_PENDING, 0) values.put(MediaStore.Images.Media.IS_PENDING, 0)
resolver.update(uri, values, null, null) resolver.update(uri, values, null, null)
} }
} catch (e: Exception) { } catch (e: Exception) {
resolver.delete(uri, null, null) resolver.delete(uri, null, null)
throw e throw e

View File

@ -0,0 +1,77 @@
package com.example.livingai.domain.ml
import android.content.Context
import android.graphics.Rect
import android.hardware.camera2.CameraCharacteristics
import android.hardware.camera2.CameraManager
import android.util.Size
import android.util.SizeF
/**
* Utility to read camera intrinsics from Camera2 and compute focal length (pixels).
*
* Usage:
* val (fPx, imgW, imgH) = CameraIntrinsicsFetcher.fetch(context, cameraId, imageSize)
* CameraInfoProvider.init(CameraInfoData(fPx, imgW, imgH, px, py, ...))
*
* imageSize = the resolution you will actually receive from the ImageReader / CameraX output (width,height)
*
* Formula:
* f_px = f_mm / sensorWidth_mm * imageWidth_px
*
* More accurate: use activeArray size mapping to sensor physical size if needed.
*/
object CameraIntrinsicsFetcher {
data class Result(
val focalLengthPixels: Float,
val imageWidthPx: Int,
val imageHeightPx: Int,
val principalPointX: Float,
val principalPointY: Float,
val sensorPhysicalSizeMm: SizeF?
)
/**
* cameraId = device camera id (get from CameraManager)
* imageSize = the actual output image size you will capture (e.g., 1920x1080)
*/
fun fetch(context: Context, cameraId: String, imageSize: Size): Result {
val mgr = context.getSystemService(Context.CAMERA_SERVICE) as CameraManager
val characteristics = mgr.getCameraCharacteristics(cameraId)
val focalLengths = characteristics.get(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)
val fMm = when {
focalLengths != null && focalLengths.isNotEmpty() -> focalLengths[0] // mm
else -> 4.0f
}
val sensorSize = characteristics.get(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE) // in mm
val sensorSizeMm = sensorSize
// active array size gives pixel array cropping of sensor -> map principal point
val activeRect = characteristics.get(CameraCharacteristics.SENSOR_INFO_ACTIVE_ARRAY_SIZE) // Rect
val activeRectW = activeRect?.width() ?: imageSize.width
val activeRectH = activeRect?.height() ?: imageSize.height
// Compute focal in pixels: ratio f_mm / sensorWidth_mm * imageWidth_px
val fPx = if (sensorSizeMm != null && sensorSizeMm.width > 0f) {
(fMm / sensorSizeMm.width) * imageSize.width
} else {
// fallback: estimate based on sensor pixel array
(fMm / 4.0f) * imageSize.width
}
val principalX = (activeRect?.centerX() ?: imageSize.width / 2).toFloat()
val principalY = (activeRect?.centerY() ?: imageSize.height / 2).toFloat()
return Result(
focalLengthPixels = fPx,
imageWidthPx = imageSize.width,
imageHeightPx = imageSize.height,
principalPointX = principalX,
principalPointY = principalY,
sensorPhysicalSizeMm = sensorSizeMm
)
}
}

View File

@ -2,7 +2,6 @@ package com.example.livingai.domain.ml
import android.graphics.Bitmap import android.graphics.Bitmap
import android.graphics.Rect import android.graphics.Rect
import kotlin.math.abs
/** /**
* Interface for all distance estimators. * Interface for all distance estimators.
@ -14,28 +13,7 @@ interface DistanceEstimator {
): DistanceState ): DistanceState
} }
/** // FrameData is defined in FrameData.kt
* Frame-specific data for one inference cycle.
*/
data class FrameData(
val imageBitmap: Bitmap?,
val segmentationBox: Rect?,
val segmentationMaskBitmap: Bitmap?,
// Optional ARCore depth inputs
val depthMapMeters: FloatArray?, // row-major R* C
val depthWidth: Int = 0,
val depthHeight: Int = 0,
val depthConfidence: FloatArray? = null,
// IMU orientation
val imuPitchDegrees: Float = 0f,
val imuRollDegrees: Float = 0f,
val imuYawDegrees: Float = 0f,
val cameraRotationDegrees: Int = 0,
val timestampMs: Long = System.currentTimeMillis()
)
/** /**
* Singleton-provided camera intrinsics for metric calculations. * Singleton-provided camera intrinsics for metric calculations.

View File

@ -0,0 +1,32 @@
package com.example.livingai.domain.ml
import android.graphics.Bitmap
import android.graphics.Rect
/**
* Frame-specific data for one inference cycle.
*/
data class FrameData(
val imageBitmap: Bitmap?,
val segmentationBox: Rect?,
val segmentationMaskBitmap: Bitmap?,
// Optional ARCore depth inputs
val depthMapMeters: FloatArray?, // row-major R* C
val depthWidth: Int = 0,
val depthHeight: Int = 0,
val depthConfidence: FloatArray? = null,
// IMU orientation
val imuPitchDegrees: Float = 0f,
val imuRollDegrees: Float = 0f,
val imuYawDegrees: Float = 0f,
val cameraRotationDegrees: Int = 0,
val timestampMs: Long = System.currentTimeMillis(),
//relative
val imageWidth: Int = 0,
val imageHeight: Int = 0,
val medianDepth: Float? = null
)

View File

@ -15,14 +15,15 @@ object FrameMetadataProvider {
suspend fun getSegmentation(bitmap: Bitmap): SegmentationResult? { suspend fun getSegmentation(bitmap: Bitmap): SegmentationResult? {
return try { return try {
val (_, booleanMask, bbox) = aiModel.segmentImage(bitmap) ?: return null val (maskBitmap, booleanMask, bbox) = aiModel.segmentImage(bitmap) ?: return null
SegmentationResult(booleanMask, bbox) SegmentationResult(maskBitmap, booleanMask, bbox)
} catch (_: Exception) { } catch (_: Exception) {
null null
} }
} }
data class SegmentationResult( data class SegmentationResult(
val maskBitmap: Bitmap?,
val mask: BooleanArray, val mask: BooleanArray,
val boundingBox: Rect val boundingBox: Rect
) { ) {
@ -32,6 +33,7 @@ object FrameMetadataProvider {
other as SegmentationResult other as SegmentationResult
if (maskBitmap != other.maskBitmap) return false
if (!mask.contentEquals(other.mask)) return false if (!mask.contentEquals(other.mask)) return false
if (boundingBox != other.boundingBox) return false if (boundingBox != other.boundingBox) return false
@ -39,7 +41,8 @@ object FrameMetadataProvider {
} }
override fun hashCode(): Int { override fun hashCode(): Int {
var result = mask.contentHashCode() var result = maskBitmap?.hashCode() ?: 0
result = 31 * result + mask.contentHashCode()
result = 31 * result + boundingBox.hashCode() result = 31 * result + boundingBox.hashCode()
return result return result
} }
@ -96,6 +99,7 @@ object FrameMetadataProvider {
} }
data class FrameCollectedMetadata( data class FrameCollectedMetadata(
val segmentationMaskBitmap: Bitmap?,
val segmentationBox: Rect?, val segmentationBox: Rect?,
val depthMeters: FloatArray?, val depthMeters: FloatArray?,
val depthWidth: Int, val depthWidth: Int,
@ -112,6 +116,7 @@ object FrameMetadataProvider {
other as FrameCollectedMetadata other as FrameCollectedMetadata
if (segmentationMaskBitmap != other.segmentationMaskBitmap) return false
if (segmentationBox != other.segmentationBox) return false if (segmentationBox != other.segmentationBox) return false
if (depthMeters != null) { if (depthMeters != null) {
if (other.depthMeters == null) return false if (other.depthMeters == null) return false
@ -132,7 +137,8 @@ object FrameMetadataProvider {
} }
override fun hashCode(): Int { override fun hashCode(): Int {
var result = segmentationBox?.hashCode() ?: 0 var result = segmentationMaskBitmap?.hashCode() ?: 0
result = 31 * result + (segmentationBox?.hashCode() ?: 0)
result = 31 * result + (depthMeters?.contentHashCode() ?: 0) result = 31 * result + (depthMeters?.contentHashCode() ?: 0)
result = 31 * result + depthWidth result = 31 * result + depthWidth
result = 31 * result + depthHeight result = 31 * result + depthHeight
@ -152,6 +158,7 @@ object FrameMetadataProvider {
val rot = getRotation() val rot = getRotation()
return FrameCollectedMetadata( return FrameCollectedMetadata(
segmentationMaskBitmap = seg?.maskBitmap,
segmentationBox = seg?.boundingBox, segmentationBox = seg?.boundingBox,
depthMeters = depth.depthMeters, depthMeters = depth.depthMeters,
depthWidth = depth.width, depthWidth = depth.width,
@ -168,7 +175,7 @@ object FrameMetadataProvider {
return FrameData( return FrameData(
imageBitmap = bitmap, imageBitmap = bitmap,
segmentationBox = segmentationBox, segmentationBox = segmentationBox,
segmentationMaskBitmap = null, segmentationMaskBitmap = segmentationMaskBitmap,
depthMapMeters = depthMeters, depthMapMeters = depthMeters,
depthWidth = depthWidth, depthWidth = depthWidth,
depthHeight = depthHeight, depthHeight = depthHeight,
@ -176,7 +183,12 @@ object FrameMetadataProvider {
imuPitchDegrees = pitch, imuPitchDegrees = pitch,
imuRollDegrees = roll, imuRollDegrees = roll,
imuYawDegrees = yaw, imuYawDegrees = yaw,
cameraRotationDegrees = rotationDegrees cameraRotationDegrees = rotationDegrees,
// New fields populated from bitmap if available or passed down
imageWidth = bitmap.width,
imageHeight = bitmap.height,
medianDepth = null // Can calculate median from depthMeters if needed
) )
} }
} }

View File

@ -0,0 +1,182 @@
package com.example.livingai.domain.ml
import android.graphics.Bitmap
import android.graphics.Rect
import kotlin.math.max
import kotlin.math.min
class OrientationPixelEstimator(
private val iouThreshold: Float = 0.60f
) {
/**
* Main function:
* - segmentationMaskBitmap: MLKits alpha mask (animal foreground)
* - silhouetteBitmap: template mask for EXPECTED orientation (e.g., LEFT)
* - bbox: detected bounding box from segmentation
*/
fun analyze(
segmentationMaskBitmap: Bitmap,
silhouetteBitmap: Bitmap,
bbox: Rect,
frameWidth: Int,
frameHeight: Int,
medianDepthMeters: Float? = null
): OrientationPixelResult {
// 1) Convert both masks → boolean
val segFullMask = bitmapToBooleanMask(segmentationMaskBitmap)
val silhouetteMask = bitmapToBooleanMask(silhouetteBitmap)
// 2) Crop segmentation mask to bbox
val croppedMask = cropMaskToBBox(segFullMask, frameWidth, frameHeight, bbox)
// 3) Scale silhouette mask to bbox size
val scaledSilhouette = scaleMask(
silhouetteMask,
silhouetteBitmap.width,
silhouetteBitmap.height,
bbox.width(),
bbox.height()
)
// 4) Compute IoU
val iou = computeIoU(croppedMask, scaledSilhouette)
val orientationMatched = iou >= iouThreshold
// 5) Pixel metrics extraction
val metrics = computePixelMetrics(croppedMask, bbox, medianDepthMeters)
return OrientationPixelResult(
orientationMatched = orientationMatched,
matchedOrientation = null,
iouScore = iou,
iouBestOther = 0f,
pixelMetrics = metrics
)
}
// -----------------------------
// MASK HELPERS
// -----------------------------
private fun bitmapToBooleanMask(bitmap: Bitmap): BooleanArray {
val w = bitmap.width
val h = bitmap.height
val pixels = IntArray(w * h)
bitmap.getPixels(pixels, 0, w, 0, 0, w, h)
val out = BooleanArray(w * h)
for (i in pixels.indices) {
val alpha = (pixels[i] ushr 24) and 0xFF
out[i] = alpha > 0
}
return out
}
private fun cropMaskToBBox(
fullMask: BooleanArray,
frameW: Int,
frameH: Int,
bbox: Rect
): BooleanArray {
val left = max(0, bbox.left)
val top = max(0, bbox.top)
val right = min(frameW - 1, bbox.right)
val bottom = min(frameH - 1, bbox.bottom)
val width = right - left + 1
val height = bottom - top + 1
val out = BooleanArray(width * height)
var idx = 0
for (y in top..bottom) {
for (x in left..right) {
out[idx++] = fullMask[y * frameW + x]
}
}
return out
}
private fun scaleMask(
src: BooleanArray,
srcW: Int,
srcH: Int,
dstW: Int,
dstH: Int
): BooleanArray {
val out = BooleanArray(dstW * dstH)
for (y in 0 until dstH) {
val sy = ((y.toFloat() / dstH) * srcH).toInt().coerceIn(0, srcH - 1)
for (x in 0 until dstW) {
val sx = ((x.toFloat() / dstW) * srcW).toInt().coerceIn(0, srcW - 1)
out[y * dstW + x] = src[sy * srcW + sx]
}
}
return out
}
private fun computeIoU(a: BooleanArray, b: BooleanArray): Float {
if (a.size != b.size) return 0f
var inter = 0
var union = 0
for (i in a.indices) {
val ai = a[i]
val bi = b[i]
if (ai || bi) union++
if (ai && bi) inter++
}
return if (union == 0) 0f else inter.toFloat() / union
}
// -----------------------------
// PIXEL METRICS
// -----------------------------
private fun computePixelMetrics(
croppedMask: BooleanArray,
bbox: Rect,
medianDepthMeters: Float?
): PixelMetrics {
val w = bbox.width()
val h = bbox.height()
var count = 0
var sumX = 0L
var sumY = 0L
for (y in 0 until h) {
for (x in 0 until w) {
if (croppedMask[y * w + x]) {
count++
sumX += x
sumY += y
}
}
}
val centroidX = bbox.left + (sumX.toFloat() / max(1, count))
val centroidY = bbox.top + (sumY.toFloat() / max(1, count))
return PixelMetrics(
widthPx = w,
heightPx = h,
areaPx = count,
centroidX = centroidX,
centroidY = centroidY,
distanceProxyInvHeight = if (h > 0) 1f / h.toFloat() else Float.POSITIVE_INFINITY,
heightPxFloat = h.toFloat(),
medianDepthMeters = medianDepthMeters
)
}
}

View File

@ -0,0 +1,24 @@
package com.example.livingai.domain.ml
data class OrientationPixelResult(
val orientationMatched: Boolean, // true only if requested orientation is confidently matched
val matchedOrientation: Orientation?,// which orientation matched (if any)
val iouScore: Float, // IoU score for matched orientation (0..1)
val iouBestOther: Float, // best IoU among other orientations
val pixelMetrics: PixelMetrics? // null if orientation not matched
)
enum class Orientation {
LEFT, RIGHT, FRONT, BACK, LEFT_45, RIGHT_45, TOP, BOTTOM
}
data class PixelMetrics(
val widthPx: Int,
val heightPx: Int,
val areaPx: Int,
val centroidX: Float,
val centroidY: Float,
val distanceProxyInvHeight: Float, // 1 / heightPx (relative distance proxy)
val heightPxFloat: Float, // convenience
val medianDepthMeters: Float? // if depth map available (null otherwise)
)

View File

@ -0,0 +1,11 @@
package com.example.livingai.domain.ml
data class OrientationState(
val success: Boolean,
val reason: String,
val pixelMetrics: PixelMetrics?,
val orientationMatched: Boolean,
val iouScore: Float? = null,
val relativeDepth: Float? = null,
val absoluteDistanceMeters: Float? = null
)

View File

@ -0,0 +1,8 @@
package com.example.livingai.domain.ml
data class OrientationTemplate(
val orientation: Orientation,
val mask: BooleanArray,
val templateWidth: Int,
val templateHeight: Int
)

View File

@ -2,10 +2,17 @@ package com.example.livingai.domain.repository
import android.graphics.Bitmap import android.graphics.Bitmap
import androidx.camera.core.ImageProxy import androidx.camera.core.ImageProxy
import com.example.livingai.domain.ml.DistanceState import com.example.livingai.domain.ml.Orientation
import com.example.livingai.domain.ml.OrientationState
interface CameraRepository { interface CameraRepository {
suspend fun captureImage(imageProxy: ImageProxy): Bitmap suspend fun captureImage(imageProxy: ImageProxy): Bitmap
suspend fun processFrame(bitmap: Bitmap): DistanceState suspend fun processFrame(
bitmap: Bitmap,
requestedOrientation: Orientation,
silhouetteBitmap: Bitmap,
realObjectHeightMeters: Float?,
focalLengthPixels: Float
): OrientationState
suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String
} }

View File

@ -129,8 +129,8 @@ fun CameraScreen(
CameraPreview( CameraPreview(
modifier = Modifier.fillMaxSize(), modifier = Modifier.fillMaxSize(),
controller = controller, controller = controller,
onFrame = { bitmap, rotation, _ -> onFrame = { bitmap, rotation, fxPixels ->
viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation)) viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
} }
) )
@ -157,7 +157,7 @@ fun CameraScreen(
} }
// Debug Overlay // Debug Overlay
state.distanceState?.let { dist -> state.orientationState?.let { orient ->
Box( Box(
modifier = Modifier modifier = Modifier
.align(Alignment.TopEnd) .align(Alignment.TopEnd)
@ -166,13 +166,25 @@ fun CameraScreen(
.padding(8.dp) .padding(8.dp)
) { ) {
Column { Column {
Text("Dist: ${dist.distanceMeters ?: "N/A"}", color = Color.White) Text("Success: ${orient.success}", color = Color.White)
Text("Rec: ${dist.recommendation}", color = Color.White) Text("Reason: ${orient.reason}", color = Color.White)
Text("Tilted: ${dist.isCameraTilted}", color = Color.White)
Text("Rotated: ${dist.isCameraRotated}", color = Color.White) orient.pixelMetrics?.let { pm ->
Text("Centered: ${dist.isObjectCentered}", color = Color.White) Text("Width (px): ${pm.widthPx}", color = Color.White)
Text("Ready: ${dist.readyToCapture}", color = Color.White) Text("Height (px): ${pm.heightPx}", color = Color.White)
Text("Conf: ${dist.confidenceScore}", color = Color.White) }
// Display depth metrics from OrientationState
orient.relativeDepth?.let { rel ->
Text("Rel Depth: %.4f".format(rel), color = Color.White)
}
orient.absoluteDistanceMeters?.let { abs ->
Text("Dist (m): %.2f".format(abs), color = Color.White)
}
Text("IOU: ${orient.iouScore}", color = Color.White)
Text("Matched: ${orient.orientationMatched}", color = Color.White)
} }
} }
} }

View File

@ -7,7 +7,8 @@ import androidx.camera.core.ImageProxy
import androidx.lifecycle.ViewModel import androidx.lifecycle.ViewModel
import androidx.lifecycle.viewModelScope import androidx.lifecycle.viewModelScope
import com.example.livingai.domain.ml.AIModel import com.example.livingai.domain.ml.AIModel
import com.example.livingai.domain.ml.DistanceState import com.example.livingai.domain.ml.Orientation
import com.example.livingai.domain.ml.OrientationState
import com.example.livingai.domain.repository.CameraRepository import com.example.livingai.domain.repository.CameraRepository
import com.example.livingai.domain.usecases.AppDataUseCases import com.example.livingai.domain.usecases.AppDataUseCases
import com.example.livingai.utils.ScreenDimensions import com.example.livingai.utils.ScreenDimensions
@ -47,7 +48,7 @@ class CameraViewModel(
fun onEvent(event: CameraEvent) { fun onEvent(event: CameraEvent) {
when (event) { when (event) {
is CameraEvent.ImageCaptured -> handleImageProxy(event.imageProxy) is CameraEvent.ImageCaptured -> handleImageProxy(event.imageProxy)
is CameraEvent.FrameReceived -> handleFrame(event.bitmap, event.rotationDegrees) is CameraEvent.FrameReceived -> handleFrame(event.bitmap, event.rotationDegrees, event.focalLengthPixels)
is CameraEvent.ClearCapturedImage -> clearCaptured() is CameraEvent.ClearCapturedImage -> clearCaptured()
is CameraEvent.SetContext -> setContext(event.animalId, event.orientation) is CameraEvent.SetContext -> setContext(event.animalId, event.orientation)
is CameraEvent.AutoCaptureTriggered -> { is CameraEvent.AutoCaptureTriggered -> {
@ -87,7 +88,7 @@ class CameraViewModel(
} }
} }
private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int) { private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) {
if (_state.value.isCapturing || _state.value.shouldAutoCapture) { if (_state.value.isCapturing || _state.value.shouldAutoCapture) {
return return
} }
@ -95,8 +96,22 @@ class CameraViewModel(
if (isProcessingFrame.compareAndSet(false, true)) { if (isProcessingFrame.compareAndSet(false, true)) {
viewModelScope.launch { viewModelScope.launch {
try { try {
// Process the frame for distance and metadata val currentOrientationStr = _state.value.orientation
val distanceState = cameraRepository.processFrame(bitmap) val silhouette = _state.value.savedMaskBitmap
val orientationState = if (currentOrientationStr != null && silhouette != null) {
val orientationEnum = mapStringToOrientation(currentOrientationStr)
cameraRepository.processFrame(
bitmap,
orientationEnum,
silhouette,
1.55f,
focalLengthPixels
)
} else {
null
}
val result = aiModel.segmentImage(bitmap) val result = aiModel.segmentImage(bitmap)
if (result != null) { if (result != null) {
@ -124,7 +139,7 @@ class CameraViewModel(
_state.value = _state.value.copy( _state.value = _state.value.copy(
segmentationMask = output, segmentationMask = output,
distanceState = distanceState orientationState = orientationState
) )
if (_state.value.isAutoCaptureEnabled && if (_state.value.isAutoCaptureEnabled &&
@ -145,7 +160,7 @@ class CameraViewModel(
} else { } else {
_state.value = _state.value.copy( _state.value = _state.value.copy(
segmentationMask = null, segmentationMask = null,
distanceState = distanceState orientationState = orientationState
) )
} }
} finally { } finally {
@ -154,6 +169,18 @@ class CameraViewModel(
} }
} }
} }
private fun mapStringToOrientation(orientation: String): Orientation {
return when (orientation.lowercase()) {
"front" -> Orientation.FRONT
"back" -> Orientation.BACK
"left" -> Orientation.LEFT
"right" -> Orientation.RIGHT
"leftangle" -> Orientation.LEFT_45
"rightangle" -> Orientation.RIGHT_45
else -> Orientation.FRONT
}
}
} }
data class CameraUiState( data class CameraUiState(
@ -168,12 +195,12 @@ data class CameraUiState(
val matchThreshold: Int = 50, val matchThreshold: Int = 50,
val distanceMethod: String = "Jaccard", val distanceMethod: String = "Jaccard",
val shouldAutoCapture: Boolean = false, val shouldAutoCapture: Boolean = false,
val distanceState: DistanceState? = null val orientationState: OrientationState? = null
) )
sealed class CameraEvent { sealed class CameraEvent {
data class ImageCaptured(val imageProxy: ImageProxy) : CameraEvent() data class ImageCaptured(val imageProxy: ImageProxy) : CameraEvent()
data class FrameReceived(val bitmap: Bitmap, val rotationDegrees: Int) : CameraEvent() data class FrameReceived(val bitmap: Bitmap, val rotationDegrees: Int, val focalLengthPixels: Float) : CameraEvent()
object ClearCapturedImage : CameraEvent() object ClearCapturedImage : CameraEvent()
data class SetContext(val animalId: String, val orientation: String?) : CameraEvent() data class SetContext(val animalId: String, val orientation: String?) : CameraEvent()
object AutoCaptureTriggered : CameraEvent() object AutoCaptureTriggered : CameraEvent()

View File

@ -3,6 +3,7 @@ package com.example.livingai.pages.components
import android.annotation.SuppressLint import android.annotation.SuppressLint
import android.graphics.Bitmap import android.graphics.Bitmap
import android.hardware.camera2.CameraCharacteristics import android.hardware.camera2.CameraCharacteristics
import android.util.SizeF
import android.view.ViewGroup import android.view.ViewGroup
import androidx.annotation.OptIn import androidx.annotation.OptIn
import androidx.camera.camera2.interop.Camera2CameraInfo import androidx.camera.camera2.interop.Camera2CameraInfo
@ -34,9 +35,8 @@ fun CameraPreview(
val cameraController = controller ?: remember { LifecycleCameraController(context) } val cameraController = controller ?: remember { LifecycleCameraController(context) }
// State to hold the focal length. // State to hold the focal length in PIXELS.
// Updated on the Main thread, read by the analysis background thread. val focalLengthPxState = remember { mutableStateOf(0f) }
val focalLengthState = remember { mutableStateOf(0f) }
// Periodically check/update focal length on the Main thread // Periodically check/update focal length on the Main thread
LaunchedEffect(cameraController) { LaunchedEffect(cameraController) {
@ -45,14 +45,48 @@ fun CameraPreview(
val info = cameraController.cameraInfo val info = cameraController.cameraInfo
if (info != null) { if (info != null) {
val camera2Info = Camera2CameraInfo.from(info) val camera2Info = Camera2CameraInfo.from(info)
// 1. Get Focal Length (mm)
val focalLengths = camera2Info.getCameraCharacteristic(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS) val focalLengths = camera2Info.getCameraCharacteristic(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)
val fl = focalLengths?.firstOrNull() ?: 0f val focalLengthMm = focalLengths?.firstOrNull() ?: 0f
focalLengthState.value = fl
// 2. Get Sensor Size (mm)
val sensorSize: SizeF? = camera2Info.getCameraCharacteristic(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE)
val sensorWidthMm = sensorSize?.width ?: 0f
// 3. Get Image Width (pixels) - Assuming analysis width or preview width?
// Usually analysis runs on a specific resolution (e.g. 640x480 or similar).
// We can approximate or get it from the camera controller if accessible,
// but usually it depends on the ImageAnalysis use case resolution.
// However, we can calculate fx_pixels if we assume the width of the captured frame (passed in onFrame)
// But here we are outside onFrame.
// Let's store the factors needed to calculate it inside onFrame.
// Wait, we can pass focalLengthMm and sensorWidthMm to onFrame, and calculate there using bitmap.width.
// Or we can just calculate a "pixels per mm" factor if we knew the resolution.
// But resolution changes per frame (in onFrame).
// So let's store focalLengthMm and sensorWidthMm in the state or pass them.
// Actually, let's just pass `focalLengthMm` and `sensorWidthMm` via the callback if possible,
// or calculate a proxy here assuming a standard width? No, that's inaccurate.
// Let's change the callback signature to accept (Bitmap, Int, Float, Float) -> Unit
// Or just calculate it inside the analyzer block where we have the bitmap width.
// But we need access to camera characteristics inside the analyzer?
// The analyzer runs on background thread. accessing camera2Info might be safe?
// Yes, Camera2CameraInfo is thread safe.
// To keep it simple, let's just update the state with the raw values we need.
// But the `onFrame` callback currently expects `Float` (focalLength).
// The prompt implies we should "get it from camera intrinsic api values".
// The `focalLengthPixels` depends on the image width: F_px = (F_mm / SensorWidth_mm) * ImageWidth_px
// We can't calculate F_px here without ImageWidth_px.
// So we must do it in the analyzer.
} }
} catch (e: Exception) { } catch (e: Exception) {
// Ignore errors, e.g. if camera is closing or not ready // Ignore errors
} }
// Check periodically in case the active camera changes
delay(2000) delay(2000)
} }
} }
@ -62,9 +96,29 @@ fun CameraPreview(
cameraController.setImageAnalysisAnalyzer(cameraExecutor) { imageProxy -> cameraController.setImageAnalysisAnalyzer(cameraExecutor) { imageProxy ->
val bitmap = imageProxy.toBitmap() val bitmap = imageProxy.toBitmap()
val rotationDegrees = imageProxy.imageInfo.rotationDegrees val rotationDegrees = imageProxy.imageInfo.rotationDegrees
val currentFocalLength = focalLengthState.value
onFrame(bitmap, rotationDegrees, currentFocalLength) // Calculate Focal Length in Pixels
var fxPixels = 0f
try {
val info = cameraController.cameraInfo
if (info != null) {
val camera2Info = Camera2CameraInfo.from(info)
val focalLengths = camera2Info.getCameraCharacteristic(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS)
val focalLengthMm = focalLengths?.firstOrNull() ?: 0f
val sensorSize = camera2Info.getCameraCharacteristic(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE)
val sensorWidthMm = sensorSize?.width ?: 0f
if (sensorWidthMm > 0) {
val imageWidth = bitmap.width.toFloat()
fxPixels = (focalLengthMm / sensorWidthMm) * imageWidth
}
}
} catch (e: Exception) {
e.printStackTrace()
}
onFrame(bitmap, rotationDegrees, fxPixels)
imageProxy.close() imageProxy.close()
} }
} }
@ -94,7 +148,7 @@ fun CameraPreview(
} }
}, },
onRelease = { onRelease = {
// Cleanup if needed // Cleanup
} }
) )
} }