detection

This commit is contained in:
SaiD 2025-12-10 11:25:53 +05:30
parent c445068773
commit f647a509d5
10 changed files with 368 additions and 256 deletions

View File

@ -0,0 +1,81 @@
Unknown
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

View File

@ -1,116 +1,94 @@
package com.example.livingai.data.ml package com.example.livingai.data.ml
import android.content.Context
import android.graphics.Bitmap import android.graphics.Bitmap
import android.graphics.Color
import android.graphics.Rect import android.graphics.Rect
import com.example.livingai.domain.ml.AIModel import com.example.livingai.domain.ml.AIModel
import com.google.mlkit.vision.common.InputImage import org.tensorflow.lite.Interpreter
import com.google.mlkit.vision.segmentation.subject.SubjectSegmentation import org.tensorflow.lite.support.common.FileUtil
import com.google.mlkit.vision.segmentation.subject.SubjectSegmenterOptions import org.tensorflow.lite.support.image.ImageProcessor
import kotlinx.coroutines.suspendCancellableCoroutine import org.tensorflow.lite.support.image.TensorImage
import kotlin.coroutines.resume import org.tensorflow.lite.support.image.ops.ResizeOp
import kotlin.coroutines.resumeWithException import java.nio.ByteBuffer
import java.nio.ByteOrder
private const val MASK_COLOR = 0x5500FF00 // semi-transparent green overlay class AIModelImpl(private val context: Context) : AIModel {
class AIModelImpl : AIModel { private val objectDetector: Interpreter
private val labels: List<String>
private val segmenter by lazy { init {
val options = SubjectSegmenterOptions.Builder() // Load the TFLite model from assets
.enableForegroundBitmap() val modelBuffer = FileUtil.loadMappedFile(context, "efficientdet-lite0.tflite")
val options = Interpreter.Options().apply { numThreads = 4 }
objectDetector = Interpreter(modelBuffer, options)
// Load labels from assets
labels = try {
FileUtil.loadLabels(context, "labels.txt")
} catch (e: Exception) {
e.printStackTrace()
emptyList()
}
}
override suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult? {
// Preprocess the image
val imageProcessor = ImageProcessor.Builder()
.add(ResizeOp(320, 320, ResizeOp.ResizeMethod.BILINEAR))
.build() .build()
SubjectSegmentation.getClient(options)
var tensorImage = TensorImage.fromBitmap(bitmap)
tensorImage = imageProcessor.process(tensorImage)
// Prepare model inputs and outputs
// Based on crash: [1, 25, 4] vs [1, 10, 4]. The model outputs 25 detections, not 10.
val locations = Array(1) { Array(25) { FloatArray(4) } }
val classes = Array(1) { FloatArray(25) }
val scores = Array(1) { FloatArray(25) }
val numDetections = FloatArray(1)
val outputs = mapOf(
0 to locations,
1 to classes,
2 to scores,
3 to numDetections
)
// Run inference
objectDetector.runForMultipleInputsOutputs(arrayOf(tensorImage.buffer), outputs)
// Post-process the results
val bestDetection = scores[0].withIndex()
.maxByOrNull { it.value }
?.takeIf { it.value > 0.5f } // Confidence threshold
if (bestDetection != null) {
val index = bestDetection.index
val score = bestDetection.value
val location = locations[0][index] // [ymin, xmin, ymax, xmax]
val labelIndex = classes[0][index].toInt()
val label = labels.getOrElse(labelIndex) { "Unknown" }
// Convert normalized coordinates to absolute pixel values
val ymin = location[0] * bitmap.height
val xmin = location[1] * bitmap.width
val ymax = location[2] * bitmap.height
val xmax = location[3] * bitmap.width
val boundingBox = Rect(xmin.toInt(), ymin.toInt(), xmax.toInt(), ymax.toInt())
return ObjectDetectionResult(boundingBox, label, score)
}
return null
} }
override fun deriveInference(bitmap: Bitmap): String = "Inference Result" // This is no longer the primary function, but kept for interface compliance
override suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>? { override suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>? {
return suspendCancellableCoroutine { cont -> // Returning null as we are focusing on object detection now
val image = InputImage.fromBitmap(bitmap, 0) return null
segmenter.process(image)
.addOnSuccessListener { result ->
val fg = result.foregroundBitmap ?: return@addOnSuccessListener cont.resume(null)
// Instead of coloring it here, just pass the original mask bitmap
// or ensure it's suitable for further processing.
// The foreground bitmap from MLKit is usually the object cut out with transparent background.
val booleanMask = createBooleanMask(fg)
// We return the raw foreground bitmap as the 'maskBitmap' for now,
// or a colorized version if that's what UI expects.
// But for IOU/Overlap calculation, we might want the binary info.
// The UI seems to overlay 'colorMask'.
// DistanceEstimator uses 'segMaskBitmap'.
val colorMask = createColorizedMask(fg)
val bbox = computeBoundingBox(booleanMask, fg.width, fg.height)
// Returning colorMask as the first element because UI expects a visual overlay.
// But note: DistanceEstimator might need the binary mask or the foreground.
// If DistanceEstimator treats this bitmap as a mask, colorized is fine as long as alpha is preserved.
cont.resume(Triple(colorMask, booleanMask, bbox))
}
.addOnFailureListener { e ->
cont.resumeWithException(e)
}
}
} }
private fun createColorizedMask(maskBitmap: Bitmap): Bitmap { override fun deriveInference(bitmap: Bitmap): String = "Object Detection"
val w = maskBitmap.width
val h = maskBitmap.height
val pixels = IntArray(w * h)
maskBitmap.getPixels(pixels, 0, w, 0, 0, w, h)
for (i in pixels.indices) {
// ML Kit Foreground Bitmap: Non-transparent pixels are the object.
if (Color.alpha(pixels[i]) > 0) {
pixels[i] = MASK_COLOR
}
}
return Bitmap.createBitmap(pixels, w, h, Bitmap.Config.ARGB_8888)
}
private fun createBooleanMask(bitmap: Bitmap): BooleanArray {
val w = bitmap.width
val h = bitmap.height
val mask = BooleanArray(w * h)
val pixels = IntArray(w * h)
bitmap.getPixels(pixels, 0, w, 0, 0, w, h)
for (i in pixels.indices) {
mask[i] = Color.alpha(pixels[i]) > 0
}
return mask
}
private fun computeBoundingBox(mask: BooleanArray, w: Int, h: Int): Rect {
var minX = Int.MAX_VALUE
var minY = Int.MAX_VALUE
var maxX = Int.MIN_VALUE
var maxY = Int.MIN_VALUE
for (y in 0 until h) {
for (x in 0 until w) {
val idx = y * w + x
if (mask[idx]) {
if (x < minX) minX = x
if (y < minY) minY = y
if (x > maxX) maxX = x
if (y > maxY) maxY = y
}
}
}
return if (minX == Int.MAX_VALUE) {
Rect(0, 0, 0, 0)
} else {
Rect(minX, minY, maxX, maxY)
}
}
} }

View File

@ -0,0 +1,9 @@
package com.example.livingai.data.ml
import android.graphics.Rect
data class ObjectDetectionResult(
val boundingBox: Rect,
val label: String,
val confidence: Float
)

View File

@ -4,6 +4,7 @@ import android.content.ContentValues
import android.content.Context import android.content.Context
import android.graphics.Bitmap import android.graphics.Bitmap
import android.graphics.Matrix import android.graphics.Matrix
import android.graphics.Rect
import android.provider.MediaStore import android.provider.MediaStore
import androidx.camera.core.ImageProxy import androidx.camera.core.ImageProxy
import com.example.livingai.data.ml.DistanceEstimatorImpl import com.example.livingai.data.ml.DistanceEstimatorImpl
@ -48,16 +49,69 @@ class CameraRepositoryImpl(
bitmap: Bitmap, bitmap: Bitmap,
requestedOrientation: Orientation, requestedOrientation: Orientation,
silhouetteBitmap: Bitmap, silhouetteBitmap: Bitmap,
realObjectHeightMeters: Float?, // ★ NEW PARAM realObjectHeightMeters: Float?,
focalLengthPixels: Float // from camera intrinsics focalLengthPixels: Float,
boundingBox: Rect?
): OrientationState = withContext(Dispatchers.Default) { ): OrientationState = withContext(Dispatchers.Default) {
// 1. Collect segmentation // 1. Collect segmentation
val meta = FrameMetadataProvider.collectMetadata(bitmap) // Use the passed boundingBox if available, otherwise it relies on FrameMetadataProvider running segmentation again
val bbox = meta.segmentationBox // But FrameMetadataProvider.collectMetadata runs segmentation internally.
val mask = meta.segmentationMaskBitmap // To avoid re-running detection/segmentation if we already have bbox, we can pass it.
// However, FrameMetadataProvider currently calls getSegmentation(bitmap) which calls aiModel.segmentImage(bitmap).
// AIModel.segmentImage is returning null in current impl.
if (bbox == null || mask == null) { // ISSUE: processFrame relies on FrameMetadataProvider.collectMetadata -> getSegmentation -> aiModel.segmentImage
// But AIModelImpl.segmentImage returns null!
// So bbox will be null, and processFrame returns early with "Segmentation missing".
// FIX: We need to use the detection result we already have from CameraViewModel.
// We will mock the segmentation result using the bounding box from object detection.
// And for the mask, since we don't have segmentation, we can either:
// a) Create a dummy mask filled within the bbox (simple box mask)
// b) Or just proceed if DistanceEstimator can handle it (it needs mask).
// Let's create a synthetic mask from the bbox.
val syntheticMeta = if (boundingBox != null) {
// Create a simple mask where pixels inside bbox are true
// This is computationally expensive to do full bitmap, so be careful.
// But we need a Bitmap mask for DistanceEstimator.
// Let's create a black bitmap with white rect.
// NOTE: This runs on Default dispatcher, so should be okay-ish.
// However, FrameMetadataProvider.collectMetadata does more (IMU, Depth).
// Let's manually construct metadata.
val maskBitmap = Bitmap.createBitmap(bitmap.width, bitmap.height, Bitmap.Config.ARGB_8888)
val canvas = android.graphics.Canvas(maskBitmap)
val paint = android.graphics.Paint().apply { color = android.graphics.Color.WHITE }
canvas.drawRect(boundingBox, paint)
val imu = FrameMetadataProvider.getIMU()
val rot = FrameMetadataProvider.getRotation()
val depth = FrameMetadataProvider.getDepthData()
FrameMetadataProvider.FrameCollectedMetadata(
segmentationMaskBitmap = maskBitmap,
segmentationBox = boundingBox,
depthMeters = depth.depthMeters,
depthWidth = depth.width,
depthHeight = depth.height,
depthConfidence = depth.confidence,
pitch = imu.pitch,
roll = imu.roll,
yaw = imu.yaw,
rotationDegrees = rot
)
} else {
FrameMetadataProvider.collectMetadata(bitmap)
}
val bbox = syntheticMeta.segmentationBox
// val mask = syntheticMeta.segmentationMaskBitmap // Mask is used inside distanceEstimator
if (bbox == null) {
return@withContext OrientationState( return@withContext OrientationState(
success = false, success = false,
reason = "Segmentation missing", reason = "Segmentation missing",
@ -78,7 +132,7 @@ class CameraRepositoryImpl(
) )
// 3. Build FrameData with relative depth only // 3. Build FrameData with relative depth only
val frameData = meta.toFrameData(bitmap).copy( val frameData = syntheticMeta.toFrameData(bitmap).copy(
medianDepth = midasResult?.relativeDepth medianDepth = midasResult?.relativeDepth
) )

View File

@ -111,7 +111,7 @@ val appModule = module {
} }
// ML Model // ML Model
single<AIModel> { AIModelImpl() } single<AIModel> { AIModelImpl(androidContext()) }
single<ObjectDetector> { single<ObjectDetector> {
ObjectDetectorImpl( ObjectDetectorImpl(
context = androidContext(), context = androidContext(),

View File

@ -2,8 +2,10 @@ package com.example.livingai.domain.ml
import android.graphics.Bitmap import android.graphics.Bitmap
import android.graphics.Rect import android.graphics.Rect
import com.example.livingai.data.ml.ObjectDetectionResult
interface AIModel { interface AIModel {
fun deriveInference(bitmap: Bitmap): String fun deriveInference(bitmap: Bitmap): String
suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>? suspend fun segmentImage(bitmap: Bitmap): Triple<Bitmap, BooleanArray, Rect>?
suspend fun detectObject(bitmap: Bitmap): ObjectDetectionResult?
} }

View File

@ -1,6 +1,7 @@
package com.example.livingai.domain.repository package com.example.livingai.domain.repository
import android.graphics.Bitmap import android.graphics.Bitmap
import android.graphics.Rect
import androidx.camera.core.ImageProxy import androidx.camera.core.ImageProxy
import com.example.livingai.domain.ml.Orientation import com.example.livingai.domain.ml.Orientation
import com.example.livingai.domain.ml.OrientationState import com.example.livingai.domain.ml.OrientationState
@ -12,7 +13,8 @@ interface CameraRepository {
requestedOrientation: Orientation, requestedOrientation: Orientation,
silhouetteBitmap: Bitmap, silhouetteBitmap: Bitmap,
realObjectHeightMeters: Float?, realObjectHeightMeters: Float?,
focalLengthPixels: Float focalLengthPixels: Float,
boundingBox: Rect? = null
): OrientationState ): OrientationState
suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String suspend fun saveImage(bitmap: Bitmap, animalId: String, orientation: String?): String
} }

View File

@ -7,10 +7,14 @@ import androidx.camera.core.ImageProxy
import androidx.camera.view.LifecycleCameraController import androidx.camera.view.LifecycleCameraController
import androidx.compose.foundation.Image import androidx.compose.foundation.Image
import androidx.compose.foundation.background import androidx.compose.foundation.background
import androidx.compose.foundation.border
import androidx.compose.foundation.layout.Box import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.BoxWithConstraints
import androidx.compose.foundation.layout.Column import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.offset
import androidx.compose.foundation.layout.padding import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.size
import androidx.compose.foundation.shape.RoundedCornerShape import androidx.compose.foundation.shape.RoundedCornerShape
import androidx.compose.material.icons.Icons import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.Camera import androidx.compose.material.icons.filled.Camera
@ -18,7 +22,6 @@ import androidx.compose.material3.CircularProgressIndicator
import androidx.compose.material3.FabPosition import androidx.compose.material3.FabPosition
import androidx.compose.material3.FloatingActionButton import androidx.compose.material3.FloatingActionButton
import androidx.compose.material3.Icon import androidx.compose.material3.Icon
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Scaffold import androidx.compose.material3.Scaffold
import androidx.compose.material3.Text import androidx.compose.material3.Text
import androidx.compose.runtime.Composable import androidx.compose.runtime.Composable
@ -32,6 +35,7 @@ import androidx.compose.ui.graphics.Color
import androidx.compose.ui.graphics.asImageBitmap import androidx.compose.ui.graphics.asImageBitmap
import androidx.compose.ui.layout.ContentScale import androidx.compose.ui.layout.ContentScale
import androidx.compose.ui.platform.LocalContext import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.platform.LocalDensity
import androidx.compose.ui.unit.dp import androidx.compose.ui.unit.dp
import androidx.navigation.NavController import androidx.navigation.NavController
import androidx.core.content.ContextCompat import androidx.core.content.ContextCompat
@ -67,6 +71,7 @@ fun CameraScreen(
PermissionWrapper { PermissionWrapper {
val state by viewModel.state.collectAsState() val state by viewModel.state.collectAsState()
val context = LocalContext.current val context = LocalContext.current
val density = LocalDensity.current
val controller = remember { val controller = remember {
LifecycleCameraController(context).apply { LifecycleCameraController(context).apply {
@ -83,31 +88,14 @@ fun CameraScreen(
viewModel.onEvent(CameraEvent.ImageCaptured(image)) viewModel.onEvent(CameraEvent.ImageCaptured(image))
} }
override fun onError(exception: ImageCaptureException) { override fun onError(exception: ImageCaptureException) {}
// Handle error, e.g., log it or show a message
}
} }
) )
} }
LaunchedEffect(state.shouldAutoCapture) {
if (state.shouldAutoCapture) {
takePhoto()
viewModel.onEvent(CameraEvent.AutoCaptureTriggered)
}
}
LaunchedEffect(state.capturedImageUri) { LaunchedEffect(state.capturedImageUri) {
state.capturedImageUri?.let { state.capturedImageUri?.let {
navController.navigate( navController.navigate(Route.ViewImageScreen(it.toString(), true, orientation, true, false, animalId))
Route.ViewImageScreen(
imageUri = it.toString(),
shouldAllowRetake = true,
showAccept = true,
orientation = orientation,
animalId = animalId
)
)
viewModel.onEvent(CameraEvent.ClearCapturedImage) viewModel.onEvent(CameraEvent.ClearCapturedImage)
} }
} }
@ -120,71 +108,95 @@ fun CameraScreen(
}, },
floatingActionButtonPosition = FabPosition.Center floatingActionButtonPosition = FabPosition.Center
) { paddingValues -> ) { paddingValues ->
Box( BoxWithConstraints(modifier = Modifier.fillMaxSize().padding(paddingValues)) {
modifier = Modifier.fillMaxSize(), val screenWidth = maxWidth
) { val screenHeight = maxHeight
Box(
modifier = Modifier.fillMaxSize() CameraPreview(
) { modifier = Modifier.fillMaxSize(),
CameraPreview( controller = controller,
modifier = Modifier.fillMaxSize(), onFrame = { bitmap, rotation, fxPixels ->
controller = controller, viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels))
onFrame = { bitmap, rotation, fxPixels -> }
viewModel.onEvent(CameraEvent.FrameReceived(bitmap, rotation, fxPixels)) )
}
state.detectionResult?.let { detection ->
val imageWidth = state.imageWidth.toFloat()
val imageHeight = state.imageHeight.toFloat()
if (imageWidth == 0f || imageHeight == 0f) return@let
val screenW = with(density) { screenWidth.toPx() }
val screenH = with(density) { screenHeight.toPx() }
val scaleX = screenW / imageWidth
val scaleY = screenH / imageHeight
val scale = maxOf(scaleX, scaleY) // For FILL_CENTER behavior
val offsetX = (screenW - imageWidth * scale) / 2f
val offsetY = (screenH - imageHeight * scale) / 2f
val bbox = detection.boundingBox
val left = bbox.left * scale + offsetX
val top = bbox.top * scale + offsetY
val leftDp = with(density) { left.toDp() }
val topDp = with(density) { top.toDp() }
val widthDp = with(density) { (bbox.width() * scale).toDp() }
val heightDp = with(density) { (bbox.height() * scale).toDp() }
Box(
modifier = Modifier
.offset(x = leftDp, y = topDp)
.size(width = widthDp, height = heightDp)
.border(2.dp, Color.Yellow)
) )
// The ML segmentation mask // Overlay 1: Object Label & Confidence (Above the box)
state.segmentationMask?.let { mask -> Column(
Image( modifier = Modifier
bitmap = mask.asImageBitmap(), .offset(x = leftDp, y = topDp - 25.dp)
contentDescription = "Segmentation Overlay", .background(Color.Black.copy(alpha = 0.7f))
modifier = Modifier.fillMaxSize(), .padding(4.dp)
contentScale = ContentScale.FillBounds, ) {
alpha = 0.5f Text(
) text = "${detection.label} (${(detection.confidence * 100).toInt()}%)",
} color = Color.White
state.silhouetteMask?.let {
Image(
bitmap = it.asImageBitmap(),
contentDescription = "Silhouette Overlay",
modifier = Modifier.fillMaxSize(),
contentScale = ContentScale.Fit,
alpha = 0.4f
) )
} }
} }
// Debug Overlay // Overlay 2: Fixed top-right corner info
state.orientationState?.let { orient -> state.orientationState?.let { orient ->
Box( Column(
modifier = Modifier modifier = Modifier
.align(Alignment.TopEnd) .align(Alignment.TopEnd)
.padding(16.dp) .padding(16.dp)
.background(Color.Black.copy(alpha = 0.5f), RoundedCornerShape(8.dp)) .background(Color.Black.copy(alpha = 0.7f), shape = RoundedCornerShape(8.dp))
.padding(8.dp) .padding(8.dp)
) { ) {
Column { if (orient.relativeDepth != null) {
Text("Success: ${orient.success}", color = Color.White) Text(
Text("Reason: ${orient.reason}", color = Color.White) text = "Rel Depth: %.2f".format(orient.relativeDepth),
color = Color.Cyan
orient.pixelMetrics?.let { pm -> )
Text("Width (px): ${pm.widthPx}", color = Color.White) }
Text("Height (px): ${pm.heightPx}", color = Color.White) if (orient.absoluteDistanceMeters != null) {
} Text(
text = "Dist: %.2fm".format(orient.absoluteDistanceMeters),
// Display depth metrics from OrientationState color = Color.Green
orient.relativeDepth?.let { rel -> )
Text("Rel Depth: %.4f".format(rel), color = Color.White) }
} if (orient.iouScore != null) {
Text(
orient.absoluteDistanceMeters?.let { abs -> text = "IoU: %.2f".format(orient.iouScore),
Text("Dist (m): %.2f".format(abs), color = Color.White) color = Color.Yellow
} )
}
Text("IOU: ${orient.iouScore}", color = Color.White) orient.pixelMetrics?.let { metrics ->
Text("Matched: ${orient.orientationMatched}", color = Color.White) Text(
text = "W: ${metrics.widthPx}px H: ${metrics.heightPx}px",
color = Color.White
)
} }
} }
} }

View File

@ -6,6 +6,7 @@ import android.net.Uri
import androidx.camera.core.ImageProxy import androidx.camera.core.ImageProxy
import androidx.lifecycle.ViewModel import androidx.lifecycle.ViewModel
import androidx.lifecycle.viewModelScope import androidx.lifecycle.viewModelScope
import com.example.livingai.data.ml.ObjectDetectionResult
import com.example.livingai.domain.ml.AIModel import com.example.livingai.domain.ml.AIModel
import com.example.livingai.domain.ml.Orientation import com.example.livingai.domain.ml.Orientation
import com.example.livingai.domain.ml.OrientationState import com.example.livingai.domain.ml.OrientationState
@ -72,7 +73,8 @@ class CameraViewModel(
private fun clearCaptured() { private fun clearCaptured() {
_state.value = _state.value.copy( _state.value = _state.value.copy(
capturedImageUri = null, capturedImageUri = null,
segmentationMask = null segmentationMask = null,
detectionResult = null // Clear detection result as well
) )
} }
@ -89,80 +91,49 @@ class CameraViewModel(
} }
private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) { private fun handleFrame(bitmap: Bitmap, rotationDegrees: Int, focalLengthPixels: Float) {
if (_state.value.isCapturing || _state.value.shouldAutoCapture) {
return
}
if (isProcessingFrame.compareAndSet(false, true)) { if (isProcessingFrame.compareAndSet(false, true)) {
viewModelScope.launch { viewModelScope.launch {
try { try {
val currentOrientationStr = _state.value.orientation // Rotate bitmap to be upright before processing
val silhouette = _state.value.savedMaskBitmap val rotatedBitmap = if (rotationDegrees != 0) {
val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) }
val orientationState = if (currentOrientationStr != null && silhouette != null) { Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true)
val orientationEnum = mapStringToOrientation(currentOrientationStr)
cameraRepository.processFrame(
bitmap,
orientationEnum,
silhouette,
1.55f,
focalLengthPixels
)
} else { } else {
null bitmap
} }
val result = aiModel.segmentImage(bitmap) // Perform Object Detection
if (result != null) { val detectionResult = aiModel.detectObject(rotatedBitmap)
val (maskBitmap, _) = result
val rotatedMask = if (rotationDegrees != 0) { var orientationState: OrientationState? = null
val matrix = Matrix().apply { postRotate(rotationDegrees.toFloat()) } val requestedOrientationStr = _state.value.orientation
Bitmap.createBitmap(
maskBitmap,
0,
0,
maskBitmap.width,
maskBitmap.height,
matrix,
true
)
} else {
maskBitmap
}
val output = if(_state.value.orientation == "front" || _state.value.orientation == "back") if (requestedOrientationStr != null && detectionResult != null) {
fitImageToCrop(rotatedMask, screenDims.screenWidth, screenDims.screenHeight) // We need a silhouette bitmap for processFrame. If not available, we can pass a dummy or handle inside.
else // But for now, let's use the one we loaded in setContext
fitImageToCrop(rotatedMask, screenDims.screenHeight, screenDims.screenWidth) val silhouette = _state.value.silhouetteMask
_state.value = _state.value.copy( if (silhouette != null) {
segmentationMask = output, orientationState = cameraRepository.processFrame(
orientationState = orientationState bitmap = rotatedBitmap,
) requestedOrientation = mapStringToOrientation(requestedOrientationStr),
silhouetteBitmap = silhouette,
if (_state.value.isAutoCaptureEnabled && realObjectHeightMeters = null, // Or some default
_state.value.savedMaskBitmap != null && focalLengthPixels = focalLengthPixels,
output != null boundingBox = detectionResult.boundingBox // Pass the bbox we just found
) { )
val isValidCapture = calculateDistance( }
_state.value.distanceMethod,
_state.value.savedMaskBitmap!!,
output,
_state.value.matchThreshold
)
if (isValidCapture) {
_state.value = _state.value.copy(shouldAutoCapture = true)
}
}
} else {
_state.value = _state.value.copy(
segmentationMask = null,
orientationState = orientationState
)
} }
_state.value = _state.value.copy(
detectionResult = detectionResult,
orientationState = orientationState, // Update state
imageWidth = rotatedBitmap.width,
imageHeight = rotatedBitmap.height
)
} catch (e: Exception) {
e.printStackTrace()
} finally { } finally {
isProcessingFrame.set(false) isProcessingFrame.set(false)
} }
@ -195,7 +166,10 @@ data class CameraUiState(
val matchThreshold: Int = 50, val matchThreshold: Int = 50,
val distanceMethod: String = "Jaccard", val distanceMethod: String = "Jaccard",
val shouldAutoCapture: Boolean = false, val shouldAutoCapture: Boolean = false,
val orientationState: OrientationState? = null val orientationState: OrientationState? = null,
val detectionResult: ObjectDetectionResult? = null,
val imageWidth: Int = 0,
val imageHeight: Int = 0
) )
sealed class CameraEvent { sealed class CameraEvent {