|
6 | 6 | import time |
7 | 7 | import logging |
8 | 8 |
|
9 | | -from models.detection import DetectionResponse, DetectionResult, ImageResponse |
| 9 | +from models.detection import DetectionResponse, DetectionResult, ImageResponse, DescribeResponse, QueryResponse |
10 | 10 | from models.zone import ZoneConfiguration |
11 | 11 | from backends.factory import get_backend, BACKEND_REGISTRY |
12 | 12 | from utils.image import validate_image, preprocess_image, image_to_bytes |
@@ -108,6 +108,11 @@ async def detect_objects( |
108 | 108 | for det in detections: |
109 | 109 | class_counts[det.label] = class_counts.get(det.label, 0) + 1 |
110 | 110 | logger.info(f"Detected objects: {dict(class_counts)}") |
| 111 | + for det in detections: |
| 112 | + b = det.bounding_box |
| 113 | + logger.info(f" → {det.label} ({det.confidence:.2f}): [{b.x_min:.3f},{b.y_min:.3f},{b.x_max:.3f},{b.y_max:.3f}]") |
| 114 | + else: |
| 115 | + logger.info("No objects detected") |
111 | 116 | except Exception as e: |
112 | 117 | logger.error(f"Detection failed: {str(e)}", exc_info=True) |
113 | 118 | raise HTTPException(status_code=500, detail=f"Detection error: {str(e)}") |
@@ -183,6 +188,133 @@ async def detect_objects( |
183 | 188 | return response |
184 | 189 |
|
185 | 190 |
|
| 191 | +@router.post("/describe", response_model=DescribeResponse) |
| 192 | +async def describe_image( |
| 193 | + file: UploadFile = File(...), |
| 194 | + backend: str = Query("moondream", description="Backend to use for description"), |
| 195 | + length: str = Query("normal", description="Caption length: 'short', 'normal', or 'long'"), |
| 196 | + return_image: bool = Query(False, description="Ignored for describe endpoint (accepted for compatibility)"), |
| 197 | +): |
| 198 | + """ |
| 199 | + Generate a natural language description of an uploaded image. |
| 200 | +
|
| 201 | + - **file**: Image file to describe |
| 202 | + - **backend**: Backend to use (must support description, e.g. moondream) |
| 203 | + - **length**: Caption length - 'short', 'normal', or 'long' |
| 204 | + """ |
| 205 | + logger.info(f"Describe request: backend={backend}, length={length}, filename={file.filename}") |
| 206 | + |
| 207 | + if backend not in settings.AVAILABLE_BACKENDS: |
| 208 | + raise HTTPException( |
| 209 | + status_code=400, |
| 210 | + detail=f"Backend '{backend}' not available. Available backends: {settings.AVAILABLE_BACKENDS}" |
| 211 | + ) |
| 212 | + |
| 213 | + try: |
| 214 | + detector = get_backend(backend) |
| 215 | + except Exception as e: |
| 216 | + logger.error(f"Failed to initialize backend {backend}: {str(e)}") |
| 217 | + raise HTTPException(status_code=500, detail=f"Backend initialization error: {str(e)}") |
| 218 | + |
| 219 | + # Read and validate image |
| 220 | + try: |
| 221 | + contents = await file.read() |
| 222 | + image = Image.open(io.BytesIO(contents)) |
| 223 | + validate_image(image, file.filename) |
| 224 | + processed_image = preprocess_image(image) |
| 225 | + except Exception as e: |
| 226 | + logger.error(f"Image validation/processing failed: {str(e)}") |
| 227 | + raise HTTPException(status_code=400, detail=f"Invalid image: {str(e)}") |
| 228 | + |
| 229 | + start_time = time.time() |
| 230 | + try: |
| 231 | + description = detector.describe(processed_image, length=length) |
| 232 | + process_time = time.time() - start_time |
| 233 | + logger.info(f"Describe completed in {process_time*1000:.1f}ms") |
| 234 | + logger.info(f"Description result: {description}") |
| 235 | + except NotImplementedError: |
| 236 | + raise HTTPException( |
| 237 | + status_code=400, |
| 238 | + detail=f"Backend '{backend}' does not support image description" |
| 239 | + ) |
| 240 | + except Exception as e: |
| 241 | + logger.error(f"Describe failed: {str(e)}", exc_info=True) |
| 242 | + raise HTTPException(status_code=500, detail=f"Description error: {str(e)}") |
| 243 | + |
| 244 | + return DescribeResponse( |
| 245 | + backend=backend, |
| 246 | + filename=file.filename, |
| 247 | + description=description, |
| 248 | + process_time_ms=int(process_time * 1000), |
| 249 | + image_width=image.width, |
| 250 | + image_height=image.height, |
| 251 | + ) |
| 252 | + |
| 253 | + |
| 254 | +@router.post("/query", response_model=QueryResponse) |
| 255 | +async def query_image( |
| 256 | + file: UploadFile = File(...), |
| 257 | + question: str = Query(..., description="Question to ask about the image"), |
| 258 | + backend: str = Query("moondream", description="Backend to use for visual Q&A"), |
| 259 | + return_image: bool = Query(False, description="Ignored for query endpoint (accepted for compatibility)"), |
| 260 | +): |
| 261 | + """ |
| 262 | + Ask a natural language question about an uploaded image. |
| 263 | +
|
| 264 | + - **file**: Image file to query |
| 265 | + - **question**: Natural language question about the image |
| 266 | + - **backend**: Backend to use (must support visual Q&A, e.g. moondream) |
| 267 | + """ |
| 268 | + logger.info(f"Query request: backend={backend}, question={question!r}, filename={file.filename}") |
| 269 | + |
| 270 | + if backend not in settings.AVAILABLE_BACKENDS: |
| 271 | + raise HTTPException( |
| 272 | + status_code=400, |
| 273 | + detail=f"Backend '{backend}' not available. Available backends: {settings.AVAILABLE_BACKENDS}" |
| 274 | + ) |
| 275 | + |
| 276 | + try: |
| 277 | + detector = get_backend(backend) |
| 278 | + except Exception as e: |
| 279 | + logger.error(f"Failed to initialize backend {backend}: {str(e)}") |
| 280 | + raise HTTPException(status_code=500, detail=f"Backend initialization error: {str(e)}") |
| 281 | + |
| 282 | + # Read and validate image |
| 283 | + try: |
| 284 | + contents = await file.read() |
| 285 | + image = Image.open(io.BytesIO(contents)) |
| 286 | + validate_image(image, file.filename) |
| 287 | + processed_image = preprocess_image(image) |
| 288 | + except Exception as e: |
| 289 | + logger.error(f"Image validation/processing failed: {str(e)}") |
| 290 | + raise HTTPException(status_code=400, detail=f"Invalid image: {str(e)}") |
| 291 | + |
| 292 | + start_time = time.time() |
| 293 | + try: |
| 294 | + answer = detector.query(processed_image, question=question) |
| 295 | + process_time = time.time() - start_time |
| 296 | + logger.info(f"Query completed in {process_time*1000:.1f}ms") |
| 297 | + logger.info(f"Query answer: {answer}") |
| 298 | + except NotImplementedError: |
| 299 | + raise HTTPException( |
| 300 | + status_code=400, |
| 301 | + detail=f"Backend '{backend}' does not support visual Q&A" |
| 302 | + ) |
| 303 | + except Exception as e: |
| 304 | + logger.error(f"Query failed: {str(e)}", exc_info=True) |
| 305 | + raise HTTPException(status_code=500, detail=f"Query error: {str(e)}") |
| 306 | + |
| 307 | + return QueryResponse( |
| 308 | + backend=backend, |
| 309 | + filename=file.filename, |
| 310 | + question=question, |
| 311 | + answer=answer, |
| 312 | + process_time_ms=int(process_time * 1000), |
| 313 | + image_width=image.width, |
| 314 | + image_height=image.height, |
| 315 | + ) |
| 316 | + |
| 317 | + |
186 | 318 | @router.get("/backends", response_model=Dict[str, Any]) |
187 | 319 | async def list_backends(): |
188 | 320 | """ |
|
0 commit comments