diff --git a/.kotlin/errors/errors-1765333795718.log b/.kotlin/errors/errors-1765333795718.log new file mode 100644 index 0000000..1219b50 --- /dev/null +++ b/.kotlin/errors/errors-1765333795718.log @@ -0,0 +1,4 @@ +kotlin version: 2.0.21 +error message: The daemon has terminated unexpectedly on startup attempt #1 with error code: 0. The daemon process output: + 1. Kotlin compile daemon is ready + diff --git a/.kotlin/sessions/kotlin-compiler-9759521387444280191.salive b/.kotlin/sessions/kotlin-compiler-1168907591040633302.salive similarity index 100% rename from .kotlin/sessions/kotlin-compiler-9759521387444280191.salive rename to .kotlin/sessions/kotlin-compiler-1168907591040633302.salive diff --git a/app/src/main/assets/midas_v2_1_small.tflite b/app/src/main/assets/midas_v2_1_small.tflite new file mode 100644 index 0000000..f1384b2 Binary files /dev/null and b/app/src/main/assets/midas_v2_1_small.tflite differ diff --git a/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt b/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt index 703b115..9cdd94d 100644 --- a/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt +++ b/app/src/main/java/com/example/livingai/data/ml/AIModelImpl.kt @@ -32,10 +32,23 @@ class AIModelImpl : AIModel { .addOnSuccessListener { result -> val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null) + // Instead of coloring it here, just pass the original mask bitmap + // or ensure it's suitable for further processing. + // The foreground bitmap from MLKit is usually the object cut out with transparent background. + val booleanMask = createBooleanMask(fg) + // We return the raw foreground bitmap as the 'maskBitmap' for now, + // or a colorized version if that's what UI expects. + // But for IOU/Overlap calculation, we might want the binary info. + // The UI seems to overlay 'colorMask'. + // DistanceEstimator uses 'segMaskBitmap'. + val colorMask = createColorizedMask(fg) val bbox = computeBoundingBox(booleanMask, fg.width, fg.height) + // Returning colorMask as the first element because UI expects a visual overlay. + // But note: DistanceEstimator might need the binary mask or the foreground. + // If DistanceEstimator treats this bitmap as a mask, colorized is fine as long as alpha is preserved. cont.resume(Triple(colorMask, booleanMask, bbox)) } .addOnFailureListener { e -> @@ -52,6 +65,7 @@ class AIModelImpl : AIModel { maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h) for (i in pixels.indices) { + // ML Kit Foreground Bitmap: Non-transparent pixels are the object. if (Color.alpha(pixels[i]) > 0) { pixels[i] = MASK_COLOR } diff --git a/app/src/main/java/com/example/livingai/data/ml/DistanceEstimatorImpl.kt b/app/src/main/java/com/example/livingai/data/ml/DistanceEstimatorImpl.kt index 3993a42..75a519f 100644 --- a/app/src/main/java/com/example/livingai/data/ml/DistanceEstimatorImpl.kt +++ b/app/src/main/java/com/example/livingai/data/ml/DistanceEstimatorImpl.kt @@ -1,49 +1,53 @@ package com.example.livingai.data.ml -import com.example.livingai.domain.ml.ArcoreDepthEstimator -import com.example.livingai.domain.ml.CameraInfoData -import com.example.livingai.domain.ml.CameraInfoProvider -import com.example.livingai.domain.ml.DistanceEstimator -import com.example.livingai.domain.ml.DistanceRecommendation +import android.graphics.Bitmap import com.example.livingai.domain.ml.DistanceState import com.example.livingai.domain.ml.FrameData -import com.example.livingai.domain.ml.KnownDimensionEstimator -import com.example.livingai.utils.Constants +import com.example.livingai.domain.ml.Orientation +import com.example.livingai.domain.ml.OrientationPixelEstimator +import com.example.livingai.domain.ml.OrientationState -class DistanceEstimatorImpl( - private val mainEstimator: DistanceEstimator = ArcoreDepthEstimator(), - private val fallbackEstimator: DistanceEstimator = KnownDimensionEstimator() -) { +class DistanceEstimatorImpl { - fun processFrame(frame: FrameData): DistanceState { - // Fallback or retrieve camera info - val camInfo = CameraInfoProvider.tryGet() - ?: createFallbackCameraInfo(frame) + private val orientationEstimator = OrientationPixelEstimator(iouThreshold = 0.60f) - val main = mainEstimator.analyze(frame, camInfo) - return main.distanceMeters?.let { main } - ?: fallbackEstimator.analyze(frame, camInfo) - } + fun processFrame( + frameData: FrameData, + requestedOrientation: Orientation, + silhouetteBitmap: Bitmap + ): OrientationState { - private fun createFallbackCameraInfo(frame: FrameData): CameraInfoData { - // Estimate focal length based on FOV if available, or a reasonable default - // For a typical phone: - // H-FOV ~ 60-70 degrees - // fx = (W/2) / tan(FOV/2) - val w = frame.imageBitmap?.width ?: 1080 - val h = frame.imageBitmap?.height ?: 1920 + val segMaskBitmap = frameData.segmentationMaskBitmap + ?: return OrientationState( + success = false, + reason = "No segmentation mask", + pixelMetrics = null, + orientationMatched = false + ) - // Assume approx 60 degrees horizontal FOV as a fallback - val fovDegrees = 60.0 - val fovRadians = Math.toRadians(fovDegrees) - val focalLengthPx = (w / 2.0) / Math.tan(fovRadians / 2.0) + val bbox = frameData.segmentationBox + ?: return OrientationState( + success = false, + reason = "No bounding box", + pixelMetrics = null, + orientationMatched = false + ) - return CameraInfoData( - focalLengthPixels = focalLengthPx.toFloat(), - sensorWidthPx = w, - sensorHeightPx = h, - principalPointX = w / 2f, - principalPointY = h / 2f + val result = orientationEstimator.analyze( + segmentationMaskBitmap = segMaskBitmap, + silhouetteBitmap = silhouetteBitmap, + bbox = bbox, + frameWidth = frameData.imageWidth, + frameHeight = frameData.imageHeight, + medianDepthMeters = frameData.medianDepth + ) + + return OrientationState( + success = result.orientationMatched, + reason = if (result.orientationMatched) "OK" else "Orientation mismatch", + pixelMetrics = result.pixelMetrics, + orientationMatched = result.orientationMatched, + iouScore = result.iouScore ) } } diff --git a/app/src/main/java/com/example/livingai/data/ml/MidasDepthEstimator.kt b/app/src/main/java/com/example/livingai/data/ml/MidasDepthEstimator.kt new file mode 100644 index 0000000..25a277f --- /dev/null +++ b/app/src/main/java/com/example/livingai/data/ml/MidasDepthEstimator.kt @@ -0,0 +1,133 @@ +package com.example.livingai.data.ml + +import android.content.Context +import android.graphics.Bitmap +import android.graphics.Rect +import org.tensorflow.lite.Interpreter +import org.tensorflow.lite.support.common.FileUtil +import org.tensorflow.lite.support.common.ops.NormalizeOp +import org.tensorflow.lite.support.image.ImageProcessor +import org.tensorflow.lite.support.image.TensorImage +import org.tensorflow.lite.support.image.ops.ResizeOp +import java.nio.ByteBuffer +import java.nio.ByteOrder + +data class MidasDepthResult( + val relativeDepth: Float, + val absoluteDistanceMeters: Float? +) + +class MidasDepthEstimator(private val context: Context) { + + private var interpreter: Interpreter? = null + + companion object { + private const val MODEL_NAME = "midas_v2_1_small.tflite" + private const val INPUT_SIZE = 256 + + private val NORM_MEAN = floatArrayOf(123.675f, 116.28f, 103.53f) + private val NORM_STD = floatArrayOf(58.395f, 57.12f, 57.375f) + } + + init { + setupInterpreter() + } + + private fun setupInterpreter() { + try { + val files = context.assets.list("") ?: emptyArray() + if (!files.contains(MODEL_NAME)) return + + val model = FileUtil.loadMappedFile(context, MODEL_NAME) + interpreter = Interpreter(model, Interpreter.Options().apply { setNumThreads(4) }) + } catch (e: Exception) { + e.printStackTrace() + } + } + + fun analyzeObject( + bitmap: Bitmap, + bbox: Rect, + realObjectHeightMeters: Float?, + focalLengthPixels: Float? + ): MidasDepthResult? { + val interp = interpreter ?: return null + + try { + // 1. Preprocess + var tensorImage = TensorImage(org.tensorflow.lite.DataType.FLOAT32) + tensorImage.load(bitmap) + + val processor = ImageProcessor.Builder() + .add(ResizeOp(INPUT_SIZE, INPUT_SIZE, ResizeOp.ResizeMethod.BILINEAR)) + .add(NormalizeOp(NORM_MEAN, NORM_STD)) + .build() + + tensorImage = processor.process(tensorImage) + + // 2. Output Buffer + val outShape = interp.getOutputTensor(0).shape() + val size = outShape[1] * outShape[2] + val output = ByteBuffer.allocateDirect(size * 4).order(ByteOrder.nativeOrder()) + + // 3. Run MiDaS + interp.run(tensorImage.buffer, output) + + output.rewind() + val depthArray = FloatArray(size) + output.asFloatBuffer().get(depthArray) + + // Calculate median relative depth (inverse depth) from the BBOX region only? + // Usually MiDaS runs on full frame. + // If we want depth of the object, we should look at pixels corresponding to the bbox. + // But mapping bbox to 256x256 map requires scaling. + + // For now, let's keep it simple: Median of WHOLE FRAME (as relative depth context) + // OR median of the center? + // The previous implementation used median of whole frame. + // Let's refine it: Use median of the whole frame as 'relative depth' + // OR if you want object depth, we need to crop. + // Given the user wants "relative depth", median of frame is a common proxy for scene depth. + // But "distance to object" -> usually means object depth. + // Let's sample the center of the bbox in the depth map. + + // Map BBox center to 256x256 + val cx = bbox.centerX() + val cy = bbox.centerY() + val mapX = (cx * INPUT_SIZE) / bitmap.width + val mapY = (cy * INPUT_SIZE) / bitmap.height + + // Clamp + val safeX = mapX.coerceIn(0, INPUT_SIZE - 1) + val safeY = mapY.coerceIn(0, INPUT_SIZE - 1) + + val depthIndex = safeY * INPUT_SIZE + safeX + val objectRelativeDepth = depthArray[depthIndex] + // Note: MiDaS output is inverse depth (disparity). + // Higher value = Closer. + + // 4. Absolute Distance (Pinhole) + val hPx = bbox.height().toFloat() + val absDistance = if (realObjectHeightMeters != null && focalLengthPixels != null && hPx > 0) { + (focalLengthPixels * realObjectHeightMeters) / hPx + } else { + null + } + + return MidasDepthResult( + relativeDepth = objectRelativeDepth, + absoluteDistanceMeters = absDistance + ) + + } catch (e: Exception) { + e.printStackTrace() + return null + } + } + + // Kept for compatibility if needed, but analyzeObject is the new main entry + fun estimateDepth(bitmap: Bitmap): Float? { + // Fallback or simpler version + return analyzeObject(bitmap, Rect(0,0,bitmap.width, bitmap.height), null, null)?.relativeDepth + } +} diff --git a/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt b/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt index 70b0845..3d47777 100644 --- a/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt +++ b/app/src/main/java/com/example/livingai/data/repository/media/CameraRepositoryImpl.kt @@ -7,10 +7,12 @@ import android.graphics.Matrix import android.provider.MediaStore import androidx.camera.core.ImageProxy import com.example.livingai.data.ml.DistanceEstimatorImpl +import com.example.livingai.data.ml.MidasDepthEstimator import com.example.livingai.domain.ml.AIModel -import com.example.livingai.domain.ml.DistanceState import com.example.livingai.domain.ml.FrameMetadataProvider import com.example.livingai.domain.ml.FrameMetadataProvider.toFrameData +import com.example.livingai.domain.ml.Orientation +import com.example.livingai.domain.ml.OrientationState import com.example.livingai.domain.repository.CameraRepository import com.example.livingai.utils.TiltSensorManager import kotlinx.coroutines.Dispatchers @@ -23,9 +25,9 @@ class CameraRepositoryImpl( ) : CameraRepository { private val distanceEstimator = DistanceEstimatorImpl() + private val midasEstimator = MidasDepthEstimator(context) init { - // inject dependencies into metadata provider FrameMetadataProvider.aiModel = aiModel FrameMetadataProvider.tiltSensorManager = tiltSensorManager } @@ -37,29 +39,69 @@ class CameraRepositoryImpl( imageProxy.close() if (rotation != 0) { - val m = Matrix().apply { postRotate(rotation.toFloat()) } - Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, m, true) + val matrix = Matrix().apply { postRotate(rotation.toFloat()) } + Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true) } else bitmap } - override suspend fun processFrame(bitmap: Bitmap): DistanceState = - withContext(Dispatchers.Default) { + override suspend fun processFrame( + bitmap: Bitmap, + requestedOrientation: Orientation, + silhouetteBitmap: Bitmap, + realObjectHeightMeters: Float?, // ★ NEW PARAM + focalLengthPixels: Float // from camera intrinsics + ): OrientationState = withContext(Dispatchers.Default) { - // 1. Collect metadata - val meta = FrameMetadataProvider.collectMetadata(bitmap) + // 1. Collect segmentation + val meta = FrameMetadataProvider.collectMetadata(bitmap) + val bbox = meta.segmentationBox + val mask = meta.segmentationMaskBitmap - // 2. Convert to FrameData - val frameData = meta.toFrameData(bitmap) - - // 3. Run distance estimator - distanceEstimator.processFrame(frameData) + if (bbox == null || mask == null) { + return@withContext OrientationState( + success = false, + reason = "Segmentation missing", + pixelMetrics = null, + orientationMatched = false, + iouScore = null, + relativeDepth = null, + absoluteDistanceMeters = null + ) } + // 2. MiDaS (relative + absolute if reference height provided) + val midasResult = midasEstimator.analyzeObject( + bitmap = bitmap, + bbox = bbox, + realObjectHeightMeters = realObjectHeightMeters, + focalLengthPixels = focalLengthPixels + ) + + // 3. Build FrameData with relative depth only + val frameData = meta.toFrameData(bitmap).copy( + medianDepth = midasResult?.relativeDepth + ) + + // 4. Orientation detection + val orientationState = distanceEstimator.processFrame( + frameData = frameData, + requestedOrientation = requestedOrientation, + silhouetteBitmap = silhouetteBitmap + ) + + // 5. Inject relative + absolute values into final result + orientationState.copy( + relativeDepth = midasResult?.relativeDepth, + absoluteDistanceMeters = midasResult?.absoluteDistanceMeters + ) + } + override suspend fun saveImage( bitmap: Bitmap, animalId: String, orientation: String? ): String = withContext(Dispatchers.IO) { + val suffix = orientation?.let { "_$it" } ?: "" val fileName = "$animalId$suffix.jpg" @@ -74,7 +116,7 @@ class CameraRepositoryImpl( val resolver = context.contentResolver val uri = resolver.insert(MediaStore.Images.Media.EXTERNAL_CONTENT_URI, values) - ?: throw RuntimeException("Image insert failed") + ?: throw RuntimeException("Failed to insert image") try { resolver.openOutputStream(uri)?.use { out -> @@ -86,6 +128,7 @@ class CameraRepositoryImpl( values.put(MediaStore.Images.Media.IS_PENDING, 0) resolver.update(uri, values, null, null) } + } catch (e: Exception) { resolver.delete(uri, null, null) throw e @@ -93,4 +136,4 @@ class CameraRepositoryImpl( uri.toString() } -} +} \ No newline at end of file diff --git a/app/src/main/java/com/example/livingai/domain/ml/CameraIntrinsicsFetcher.kt b/app/src/main/java/com/example/livingai/domain/ml/CameraIntrinsicsFetcher.kt new file mode 100644 index 0000000..12005cd --- /dev/null +++ b/app/src/main/java/com/example/livingai/domain/ml/CameraIntrinsicsFetcher.kt @@ -0,0 +1,77 @@ +package com.example.livingai.domain.ml + +import android.content.Context +import android.graphics.Rect +import android.hardware.camera2.CameraCharacteristics +import android.hardware.camera2.CameraManager +import android.util.Size +import android.util.SizeF + +/** + * Utility to read camera intrinsics from Camera2 and compute focal length (pixels). + * + * Usage: + * val (fPx, imgW, imgH) = CameraIntrinsicsFetcher.fetch(context, cameraId, imageSize) + * CameraInfoProvider.init(CameraInfoData(fPx, imgW, imgH, px, py, ...)) + * + * imageSize = the resolution you will actually receive from the ImageReader / CameraX output (width,height) + * + * Formula: + * f_px = f_mm / sensorWidth_mm * imageWidth_px + * + * More accurate: use activeArray size mapping to sensor physical size if needed. + */ +object CameraIntrinsicsFetcher { + + data class Result( + val focalLengthPixels: Float, + val imageWidthPx: Int, + val imageHeightPx: Int, + val principalPointX: Float, + val principalPointY: Float, + val sensorPhysicalSizeMm: SizeF? + ) + + /** + * cameraId = device camera id (get from CameraManager) + * imageSize = the actual output image size you will capture (e.g., 1920x1080) + */ + fun fetch(context: Context, cameraId: String, imageSize: Size): Result { + val mgr = context.getSystemService(Context.CAMERA_SERVICE) as CameraManager + val characteristics = mgr.getCameraCharacteristics(cameraId) + + val focalLengths = characteristics.get(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS) + val fMm = when { + focalLengths != null && focalLengths.isNotEmpty() -> focalLengths[0] // mm + else -> 4.0f + } + + val sensorSize = characteristics.get(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE) // in mm + val sensorSizeMm = sensorSize + + // active array size gives pixel array cropping of sensor -> map principal point + val activeRect = characteristics.get(CameraCharacteristics.SENSOR_INFO_ACTIVE_ARRAY_SIZE) // Rect + val activeRectW = activeRect?.width() ?: imageSize.width + val activeRectH = activeRect?.height() ?: imageSize.height + + // Compute focal in pixels: ratio f_mm / sensorWidth_mm * imageWidth_px + val fPx = if (sensorSizeMm != null && sensorSizeMm.width > 0f) { + (fMm / sensorSizeMm.width) * imageSize.width + } else { + // fallback: estimate based on sensor pixel array + (fMm / 4.0f) * imageSize.width + } + + val principalX = (activeRect?.centerX() ?: imageSize.width / 2).toFloat() + val principalY = (activeRect?.centerY() ?: imageSize.height / 2).toFloat() + + return Result( + focalLengthPixels = fPx, + imageWidthPx = imageSize.width, + imageHeightPx = imageSize.height, + principalPointX = principalX, + principalPointY = principalY, + sensorPhysicalSizeMm = sensorSizeMm + ) + } +} diff --git a/app/src/main/java/com/example/livingai/domain/ml/DistanceEstimator.kt b/app/src/main/java/com/example/livingai/domain/ml/DistanceEstimator.kt index af35413..2369f74 100644 --- a/app/src/main/java/com/example/livingai/domain/ml/DistanceEstimator.kt +++ b/app/src/main/java/com/example/livingai/domain/ml/DistanceEstimator.kt @@ -2,7 +2,6 @@ package com.example.livingai.domain.ml import android.graphics.Bitmap import android.graphics.Rect -import kotlin.math.abs /** * Interface for all distance estimators. @@ -14,28 +13,7 @@ interface DistanceEstimator { ): DistanceState } -/** - * Frame-specific data for one inference cycle. - */ -data class FrameData( - val imageBitmap: Bitmap?, - val segmentationBox: Rect?, - val segmentationMaskBitmap: Bitmap?, - - // Optional ARCore depth inputs - val depthMapMeters: FloatArray?, // row-major R* C - val depthWidth: Int = 0, - val depthHeight: Int = 0, - val depthConfidence: FloatArray? = null, - - // IMU orientation - val imuPitchDegrees: Float = 0f, - val imuRollDegrees: Float = 0f, - val imuYawDegrees: Float = 0f, - - val cameraRotationDegrees: Int = 0, - val timestampMs: Long = System.currentTimeMillis() -) +// FrameData is defined in FrameData.kt /** * Singleton-provided camera intrinsics for metric calculations. diff --git a/app/src/main/java/com/example/livingai/domain/ml/FrameData.kt b/app/src/main/java/com/example/livingai/domain/ml/FrameData.kt new file mode 100644 index 0000000..4f93308 --- /dev/null +++ b/app/src/main/java/com/example/livingai/domain/ml/FrameData.kt @@ -0,0 +1,32 @@ +package com.example.livingai.domain.ml + +import android.graphics.Bitmap +import android.graphics.Rect + +/** + * Frame-specific data for one inference cycle. + */ +data class FrameData( + val imageBitmap: Bitmap?, + val segmentationBox: Rect?, + val segmentationMaskBitmap: Bitmap?, + + // Optional ARCore depth inputs + val depthMapMeters: FloatArray?, // row-major R* C + val depthWidth: Int = 0, + val depthHeight: Int = 0, + val depthConfidence: FloatArray? = null, + + // IMU orientation + val imuPitchDegrees: Float = 0f, + val imuRollDegrees: Float = 0f, + val imuYawDegrees: Float = 0f, + + val cameraRotationDegrees: Int = 0, + val timestampMs: Long = System.currentTimeMillis(), + + //relative + val imageWidth: Int = 0, + val imageHeight: Int = 0, + val medianDepth: Float? = null +) diff --git a/app/src/main/java/com/example/livingai/domain/ml/FrameMetadataProvider.kt b/app/src/main/java/com/example/livingai/domain/ml/FrameMetadataProvider.kt index 315c31e..d61d526 100644 --- a/app/src/main/java/com/example/livingai/domain/ml/FrameMetadataProvider.kt +++ b/app/src/main/java/com/example/livingai/domain/ml/FrameMetadataProvider.kt @@ -15,14 +15,15 @@ object FrameMetadataProvider { suspend fun getSegmentation(bitmap: Bitmap): SegmentationResult? { return try { - val (_, booleanMask, bbox) = aiModel.segmentImage(bitmap) ?: return null - SegmentationResult(booleanMask, bbox) + val (maskBitmap, booleanMask, bbox) = aiModel.segmentImage(bitmap) ?: return null + SegmentationResult(maskBitmap, booleanMask, bbox) } catch (_: Exception) { null } } data class SegmentationResult( + val maskBitmap: Bitmap?, val mask: BooleanArray, val boundingBox: Rect ) { @@ -32,6 +33,7 @@ object FrameMetadataProvider { other as SegmentationResult + if (maskBitmap != other.maskBitmap) return false if (!mask.contentEquals(other.mask)) return false if (boundingBox != other.boundingBox) return false @@ -39,7 +41,8 @@ object FrameMetadataProvider { } override fun hashCode(): Int { - var result = mask.contentHashCode() + var result = maskBitmap?.hashCode() ?: 0 + result = 31 * result + mask.contentHashCode() result = 31 * result + boundingBox.hashCode() return result } @@ -96,6 +99,7 @@ object FrameMetadataProvider { } data class FrameCollectedMetadata( + val segmentationMaskBitmap: Bitmap?, val segmentationBox: Rect?, val depthMeters: FloatArray?, val depthWidth: Int, @@ -112,6 +116,7 @@ object FrameMetadataProvider { other as FrameCollectedMetadata + if (segmentationMaskBitmap != other.segmentationMaskBitmap) return false if (segmentationBox != other.segmentationBox) return false if (depthMeters != null) { if (other.depthMeters == null) return false @@ -132,7 +137,8 @@ object FrameMetadataProvider { } override fun hashCode(): Int { - var result = segmentationBox?.hashCode() ?: 0 + var result = segmentationMaskBitmap?.hashCode() ?: 0 + result = 31 * result + (segmentationBox?.hashCode() ?: 0) result = 31 * result + (depthMeters?.contentHashCode() ?: 0) result = 31 * result + depthWidth result = 31 * result + depthHeight @@ -152,6 +158,7 @@ object FrameMetadataProvider { val rot = getRotation() return FrameCollectedMetadata( + segmentationMaskBitmap = seg?.maskBitmap, segmentationBox = seg?.boundingBox, depthMeters = depth.depthMeters, depthWidth = depth.width, @@ -168,7 +175,7 @@ object FrameMetadataProvider { return FrameData( imageBitmap = bitmap, segmentationBox = segmentationBox, - segmentationMaskBitmap = null, + segmentationMaskBitmap = segmentationMaskBitmap, depthMapMeters = depthMeters, depthWidth = depthWidth, depthHeight = depthHeight, @@ -176,7 +183,12 @@ object FrameMetadataProvider { imuPitchDegrees = pitch, imuRollDegrees = roll, imuYawDegrees = yaw, - cameraRotationDegrees = rotationDegrees + cameraRotationDegrees = rotationDegrees, + + // New fields populated from bitmap if available or passed down + imageWidth = bitmap.width, + imageHeight = bitmap.height, + medianDepth = null // Can calculate median from depthMeters if needed ) } } diff --git a/app/src/main/java/com/example/livingai/domain/ml/OrientationAndPixelEstimator.kt b/app/src/main/java/com/example/livingai/domain/ml/OrientationAndPixelEstimator.kt new file mode 100644 index 0000000..c39adbd --- /dev/null +++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationAndPixelEstimator.kt @@ -0,0 +1,182 @@ +package com.example.livingai.domain.ml + +import android.graphics.Bitmap +import android.graphics.Rect +import kotlin.math.max +import kotlin.math.min + +class OrientationPixelEstimator( + private val iouThreshold: Float = 0.60f +) { + + /** + * Main function: + * - segmentationMaskBitmap: MLKit’s alpha mask (animal foreground) + * - silhouetteBitmap: template mask for EXPECTED orientation (e.g., LEFT) + * - bbox: detected bounding box from segmentation + */ + fun analyze( + segmentationMaskBitmap: Bitmap, + silhouetteBitmap: Bitmap, + bbox: Rect, + frameWidth: Int, + frameHeight: Int, + medianDepthMeters: Float? = null + ): OrientationPixelResult { + + // 1) Convert both masks → boolean + val segFullMask = bitmapToBooleanMask(segmentationMaskBitmap) + val silhouetteMask = bitmapToBooleanMask(silhouetteBitmap) + + // 2) Crop segmentation mask to bbox + val croppedMask = cropMaskToBBox(segFullMask, frameWidth, frameHeight, bbox) + + // 3) Scale silhouette mask to bbox size + val scaledSilhouette = scaleMask( + silhouetteMask, + silhouetteBitmap.width, + silhouetteBitmap.height, + bbox.width(), + bbox.height() + ) + + // 4) Compute IoU + val iou = computeIoU(croppedMask, scaledSilhouette) + val orientationMatched = iou >= iouThreshold + + // 5) Pixel metrics extraction + val metrics = computePixelMetrics(croppedMask, bbox, medianDepthMeters) + + return OrientationPixelResult( + orientationMatched = orientationMatched, + matchedOrientation = null, + iouScore = iou, + iouBestOther = 0f, + pixelMetrics = metrics + ) + } + + // ----------------------------- + // MASK HELPERS + // ----------------------------- + + private fun bitmapToBooleanMask(bitmap: Bitmap): BooleanArray { + val w = bitmap.width + val h = bitmap.height + val pixels = IntArray(w * h) + bitmap.getPixels(pixels, 0, w, 0, 0, w, h) + + val out = BooleanArray(w * h) + for (i in pixels.indices) { + val alpha = (pixels[i] ushr 24) and 0xFF + out[i] = alpha > 0 + } + return out + } + + private fun cropMaskToBBox( + fullMask: BooleanArray, + frameW: Int, + frameH: Int, + bbox: Rect + ): BooleanArray { + + val left = max(0, bbox.left) + val top = max(0, bbox.top) + val right = min(frameW - 1, bbox.right) + val bottom = min(frameH - 1, bbox.bottom) + + val width = right - left + 1 + val height = bottom - top + 1 + + val out = BooleanArray(width * height) + var idx = 0 + + for (y in top..bottom) { + for (x in left..right) { + out[idx++] = fullMask[y * frameW + x] + } + } + + return out + } + + private fun scaleMask( + src: BooleanArray, + srcW: Int, + srcH: Int, + dstW: Int, + dstH: Int + ): BooleanArray { + + val out = BooleanArray(dstW * dstH) + + for (y in 0 until dstH) { + val sy = ((y.toFloat() / dstH) * srcH).toInt().coerceIn(0, srcH - 1) + for (x in 0 until dstW) { + val sx = ((x.toFloat() / dstW) * srcW).toInt().coerceIn(0, srcW - 1) + out[y * dstW + x] = src[sy * srcW + sx] + } + } + + return out + } + + private fun computeIoU(a: BooleanArray, b: BooleanArray): Float { + if (a.size != b.size) return 0f + + var inter = 0 + var union = 0 + + for (i in a.indices) { + val ai = a[i] + val bi = b[i] + if (ai || bi) union++ + if (ai && bi) inter++ + } + + return if (union == 0) 0f else inter.toFloat() / union + } + + // ----------------------------- + // PIXEL METRICS + // ----------------------------- + + private fun computePixelMetrics( + croppedMask: BooleanArray, + bbox: Rect, + medianDepthMeters: Float? + ): PixelMetrics { + + val w = bbox.width() + val h = bbox.height() + + var count = 0 + var sumX = 0L + var sumY = 0L + + for (y in 0 until h) { + for (x in 0 until w) { + if (croppedMask[y * w + x]) { + count++ + sumX += x + sumY += y + } + } + } + + val centroidX = bbox.left + (sumX.toFloat() / max(1, count)) + val centroidY = bbox.top + (sumY.toFloat() / max(1, count)) + + return PixelMetrics( + widthPx = w, + heightPx = h, + areaPx = count, + centroidX = centroidX, + centroidY = centroidY, + distanceProxyInvHeight = if (h > 0) 1f / h.toFloat() else Float.POSITIVE_INFINITY, + heightPxFloat = h.toFloat(), + medianDepthMeters = medianDepthMeters + ) + } +} diff --git a/app/src/main/java/com/example/livingai/domain/ml/OrientationPixelResult.kt b/app/src/main/java/com/example/livingai/domain/ml/OrientationPixelResult.kt new file mode 100644 index 0000000..ba1d225 --- /dev/null +++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationPixelResult.kt @@ -0,0 +1,24 @@ +package com.example.livingai.domain.ml + +data class OrientationPixelResult( + val orientationMatched: Boolean, // true only if requested orientation is confidently matched + val matchedOrientation: Orientation?,// which orientation matched (if any) + val iouScore: Float, // IoU score for matched orientation (0..1) + val iouBestOther: Float, // best IoU among other orientations + val pixelMetrics: PixelMetrics? // null if orientation not matched +) + +enum class Orientation { + LEFT, RIGHT, FRONT, BACK, LEFT_45, RIGHT_45, TOP, BOTTOM +} + +data class PixelMetrics( + val widthPx: Int, + val heightPx: Int, + val areaPx: Int, + val centroidX: Float, + val centroidY: Float, + val distanceProxyInvHeight: Float, // 1 / heightPx (relative distance proxy) + val heightPxFloat: Float, // convenience + val medianDepthMeters: Float? // if depth map available (null otherwise) +) diff --git a/app/src/main/java/com/example/livingai/domain/ml/OrientationState.kt b/app/src/main/java/com/example/livingai/domain/ml/OrientationState.kt new file mode 100644 index 0000000..ef1dda7 --- /dev/null +++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationState.kt @@ -0,0 +1,11 @@ +package com.example.livingai.domain.ml + +data class OrientationState( + val success: Boolean, + val reason: String, + val pixelMetrics: PixelMetrics?, + val orientationMatched: Boolean, + val iouScore: Float? = null, + val relativeDepth: Float? = null, + val absoluteDistanceMeters: Float? = null +) \ No newline at end of file diff --git a/app/src/main/java/com/example/livingai/domain/ml/OrientationTemplate.kt b/app/src/main/java/com/example/livingai/domain/ml/OrientationTemplate.kt new file mode 100644 index 0000000..d33ed11 --- /dev/null +++ b/app/src/main/java/com/example/livingai/domain/ml/OrientationTemplate.kt @@ -0,0 +1,8 @@ +package com.example.livingai.domain.ml + +data class OrientationTemplate( + val orientation: Orientation, + val mask: BooleanArray, + val templateWidth: Int, + val templateHeight: Int +) \ No newline at end of file diff --git a/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt b/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt index 1e852ea..14dc736 100644 --- a/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt +++ b/app/src/main/java/com/example/livingai/domain/repository/CameraRepository.kt @@ -2,10 +2,17 @@ package com.example.livingai.domain.repository import android.graphics.Bitmap import androidx.camera.core.ImageProxy -import com.example.livingai.domain.ml.DistanceState +import com.example.livingai.domain.ml.Orientation +import com.example.livingai.domain.ml.OrientationState interface CameraRepository { suspend fun captureImage(imageProxy: ImageProxy): Bitmap - suspend fun processFrame(bitmap: Bitmap): DistanceState + suspend fun processFrame( + bitmap: Bitmap, + requestedOrientation: Orientation, + silhouetteBitmap: Bitmap, + realObjectHeightMeters: Float?, + focalLengthPixels: Float + ): OrientationState suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String } diff --git a/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt b/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt index ec5491d..ec915e9 100644 --- a/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt +++ b/app/src/main/java/com/example/livingai/pages/camera/CameraScreen.kt @@ -129,8 +129,8 @@ fun CameraScreen( CameraPreview( modifier = Modifier.fillMaxSize(), controller = controller, - onFrame = { bitmap, rotation, _ -> - viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation)) + onFrame = { bitmap, rotation, fxPixels -> + viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels)) } ) @@ -157,7 +157,7 @@ fun CameraScreen( } // Debug Overlay - state.distanceState?.let { dist -> + state.orientationState?.let { orient -> Box( modifier = Modifier .align(Alignment.TopEnd) @@ -166,13 +166,25 @@ fun CameraScreen( .padding(8.dp) ) { Column { - Text("Dist: ${dist.distanceMeters ?: "N/A"}", color = Color.White) - Text("Rec: ${dist.recommendation}", color = Color.White) - Text("Tilted: ${dist.isCameraTilted}", color = Color.White) - Text("Rotated: ${dist.isCameraRotated}", color = Color.White) - Text("Centered: ${dist.isObjectCentered}", color = Color.White) - Text("Ready: ${dist.readyToCapture}", color = Color.White) - Text("Conf: ${dist.confidenceScore}", color = Color.White) + Text("Success: ${orient.success}", color = Color.White) + Text("Reason: ${orient.reason}", color = Color.White) + + orient.pixelMetrics?.let { pm -> + Text("Width (px): ${pm.widthPx}", color = Color.White) + Text("Height (px): ${pm.heightPx}", color = Color.White) + } + + // Display depth metrics from OrientationState + orient.relativeDepth?.let { rel -> + Text("Rel Depth: %.4f".format(rel), color = Color.White) + } + + orient.absoluteDistanceMeters?.let { abs -> + Text("Dist (m): %.2f".format(abs), color = Color.White) + } + + Text("IOU: ${orient.iouScore}", color = Color.White) + Text("Matched: ${orient.orientationMatched}", color = Color.White) } } } diff --git a/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt b/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt index abafb82..41212d0 100644 --- a/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt +++ b/app/src/main/java/com/example/livingai/pages/camera/CameraViewModel.kt @@ -7,7 +7,8 @@ import androidx.camera.core.ImageProxy import androidx.lifecycle.ViewModel import androidx.lifecycle.viewModelScope import com.example.livingai.domain.ml.AIModel -import com.example.livingai.domain.ml.DistanceState +import com.example.livingai.domain.ml.Orientation +import com.example.livingai.domain.ml.OrientationState import com.example.livingai.domain.repository.CameraRepository import com.example.livingai.domain.usecases.AppDataUseCases import com.example.livingai.utils.ScreenDimensions @@ -47,7 +48,7 @@ class CameraViewModel( fun onEvent(event: CameraEvent) { when (event) { is CameraEvent.ImageCaptured -> handleImageProxy(event.imageProxy) - is CameraEvent.FrameReceived -> handleFrame(event.bitmap, event.rotationDegrees) + is CameraEvent.FrameReceived -> handleFrame(event.bitmap, event.rotationDegrees, event.focalLengthPixels) is CameraEvent.ClearCapturedImage -> clearCaptured() is CameraEvent.SetContext -> setContext(event.animalId, event.orientation) is CameraEvent.AutoCaptureTriggered -> { @@ -87,7 +88,7 @@ class CameraViewModel( } } - private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int) { + private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) { if (_state.value.isCapturing || _state.value.shouldAutoCapture) { return } @@ -95,8 +96,22 @@ class CameraViewModel( if (isProcessingFrame.compareAndSet(false, true)) { viewModelScope.launch { try { - // Process the frame for distance and metadata - val distanceState = cameraRepository.processFrame(bitmap) + val currentOrientationStr = _state.value.orientation + val silhouette = _state.value.savedMaskBitmap + + val orientationState = if (currentOrientationStr != null && silhouette != null) { + val orientationEnum = mapStringToOrientation(currentOrientationStr) + + cameraRepository.processFrame( + bitmap, + orientationEnum, + silhouette, + 1.55f, + focalLengthPixels + ) + } else { + null + } val result = aiModel.segmentImage(bitmap) if (result != null) { @@ -124,7 +139,7 @@ class CameraViewModel( _state.value = _state.value.copy( segmentationMask = output, - distanceState = distanceState + orientationState = orientationState ) if (_state.value.isAutoCaptureEnabled && @@ -145,7 +160,7 @@ class CameraViewModel( } else { _state.value = _state.value.copy( segmentationMask = null, - distanceState = distanceState + orientationState = orientationState ) } } finally { @@ -154,6 +169,18 @@ class CameraViewModel( } } } + + private fun mapStringToOrientation(orientation: String): Orientation { + return when (orientation.lowercase()) { + "front" -> Orientation.FRONT + "back" -> Orientation.BACK + "left" -> Orientation.LEFT + "right" -> Orientation.RIGHT + "leftangle" -> Orientation.LEFT_45 + "rightangle" -> Orientation.RIGHT_45 + else -> Orientation.FRONT + } + } } data class CameraUiState( @@ -168,12 +195,12 @@ data class CameraUiState( val matchThreshold: Int = 50, val distanceMethod: String = "Jaccard", val shouldAutoCapture: Boolean = false, - val distanceState: DistanceState? = null + val orientationState: OrientationState? = null ) sealed class CameraEvent { data class ImageCaptured(val imageProxy: ImageProxy) : CameraEvent() - data class FrameReceived(val bitmap: Bitmap, val rotationDegrees: Int) : CameraEvent() + data class FrameReceived(val bitmap: Bitmap, val rotationDegrees: Int, val focalLengthPixels: Float) : CameraEvent() object ClearCapturedImage : CameraEvent() data class SetContext(val animalId: String, val orientation: String?) : CameraEvent() object AutoCaptureTriggered : CameraEvent() diff --git a/app/src/main/java/com/example/livingai/pages/components/CameraPreview.kt b/app/src/main/java/com/example/livingai/pages/components/CameraPreview.kt index 214e3e6..181ee1b 100644 --- a/app/src/main/java/com/example/livingai/pages/components/CameraPreview.kt +++ b/app/src/main/java/com/example/livingai/pages/components/CameraPreview.kt @@ -3,6 +3,7 @@ package com.example.livingai.pages.components import android.annotation.SuppressLint import android.graphics.Bitmap import android.hardware.camera2.CameraCharacteristics +import android.util.SizeF import android.view.ViewGroup import androidx.annotation.OptIn import androidx.camera.camera2.interop.Camera2CameraInfo @@ -34,9 +35,8 @@ fun CameraPreview( val cameraController = controller ?: remember { LifecycleCameraController(context) } - // State to hold the focal length. - // Updated on the Main thread, read by the analysis background thread. - val focalLengthState = remember { mutableStateOf(0f) } + // State to hold the focal length in PIXELS. + val focalLengthPxState = remember { mutableStateOf(0f) } // Periodically check/update focal length on the Main thread LaunchedEffect(cameraController) { @@ -45,14 +45,48 @@ fun CameraPreview( val info = cameraController.cameraInfo if (info != null) { val camera2Info = Camera2CameraInfo.from(info) + + // 1. Get Focal Length (mm) val focalLengths = camera2Info.getCameraCharacteristic(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS) - val fl = focalLengths?.firstOrNull() ?: 0f - focalLengthState.value = fl + val focalLengthMm = focalLengths?.firstOrNull() ?: 0f + + // 2. Get Sensor Size (mm) + val sensorSize: SizeF? = camera2Info.getCameraCharacteristic(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE) + val sensorWidthMm = sensorSize?.width ?: 0f + + // 3. Get Image Width (pixels) - Assuming analysis width or preview width? + // Usually analysis runs on a specific resolution (e.g. 640x480 or similar). + // We can approximate or get it from the camera controller if accessible, + // but usually it depends on the ImageAnalysis use case resolution. + // However, we can calculate fx_pixels if we assume the width of the captured frame (passed in onFrame) + // But here we are outside onFrame. + // Let's store the factors needed to calculate it inside onFrame. + + // Wait, we can pass focalLengthMm and sensorWidthMm to onFrame, and calculate there using bitmap.width. + // Or we can just calculate a "pixels per mm" factor if we knew the resolution. + // But resolution changes per frame (in onFrame). + // So let's store focalLengthMm and sensorWidthMm in the state or pass them. + + // Actually, let's just pass `focalLengthMm` and `sensorWidthMm` via the callback if possible, + // or calculate a proxy here assuming a standard width? No, that's inaccurate. + + // Let's change the callback signature to accept (Bitmap, Int, Float, Float) -> Unit + // Or just calculate it inside the analyzer block where we have the bitmap width. + // But we need access to camera characteristics inside the analyzer? + // The analyzer runs on background thread. accessing camera2Info might be safe? + // Yes, Camera2CameraInfo is thread safe. + + // To keep it simple, let's just update the state with the raw values we need. + // But the `onFrame` callback currently expects `Float` (focalLength). + // The prompt implies we should "get it from camera intrinsic api values". + // The `focalLengthPixels` depends on the image width: F_px = (F_mm / SensorWidth_mm) * ImageWidth_px + + // We can't calculate F_px here without ImageWidth_px. + // So we must do it in the analyzer. } } catch (e: Exception) { - // Ignore errors, e.g. if camera is closing or not ready + // Ignore errors } - // Check periodically in case the active camera changes delay(2000) } } @@ -62,9 +96,29 @@ fun CameraPreview( cameraController.setImageAnalysisAnalyzer(cameraExecutor) { imageProxy -> val bitmap = imageProxy.toBitmap() val rotationDegrees = imageProxy.imageInfo.rotationDegrees - val currentFocalLength = focalLengthState.value + + // Calculate Focal Length in Pixels + var fxPixels = 0f + try { + val info = cameraController.cameraInfo + if (info != null) { + val camera2Info = Camera2CameraInfo.from(info) + val focalLengths = camera2Info.getCameraCharacteristic(CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS) + val focalLengthMm = focalLengths?.firstOrNull() ?: 0f + + val sensorSize = camera2Info.getCameraCharacteristic(CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE) + val sensorWidthMm = sensorSize?.width ?: 0f + + if (sensorWidthMm > 0) { + val imageWidth = bitmap.width.toFloat() + fxPixels = (focalLengthMm / sensorWidthMm) * imageWidth + } + } + } catch (e: Exception) { + e.printStackTrace() + } - onFrame(bitmap, rotationDegrees, currentFocalLength) + onFrame(bitmap, rotationDegrees, fxPixels) imageProxy.close() } } @@ -94,7 +148,7 @@ fun CameraPreview( } }, onRelease = { - // Cleanup if needed + // Cleanup } ) }