diff --git a/module/webui/viewport.html b/module/webui/viewport.html index 85e0b5694..6f6bb0926 100644 --- a/module/webui/viewport.html +++ b/module/webui/viewport.html @@ -391,8 +391,8 @@
- - 30 + + 65
@@ -528,13 +528,22 @@ let clientCount = 0; // Number of connected clients let zoomLevel = 100; // Zoom percentage (50-150) + // H.264 / WebCodecs state + const supportsWebCodecs = typeof VideoDecoder !== 'undefined'; + let videoDecoder = null; + let isH264Mode = false; + let h264Timestamp = 0; + // Cached image object for frame rendering (prevents flickering) const frameImage = new Image(); let pendingFrame = null; // Frame image onload handler - set once globally, reused for all frames frameImage.onload = () => { - ctx.drawImage(frameImage, 0, 0, canvas.width, canvas.height); + // Only draw JPEG frames if NOT in H.264 mode + if (!isH264Mode) { + ctx.drawImage(frameImage, 0, 0, canvas.width, canvas.height); + } if (pendingFrame) { URL.revokeObjectURL(pendingFrame); pendingFrame = null; @@ -666,14 +675,35 @@ ws.onmessage = (event) => { if (event.data instanceof ArrayBuffer) { - // Binary frame data (JPEG) - // Use cached image object to prevent flickering - if (pendingFrame) { - URL.revokeObjectURL(pendingFrame); + // Binary frame data + if (isH264Mode && videoDecoder && videoDecoder.state !== 'closed') { + // H.264 mode: first byte is type header, rest is NAL data + const buffer = event.data; + if (buffer.byteLength < 2) return; + const view = new Uint8Array(buffer); + const isKey = view[0] === 0x01; + const nalData = buffer.slice(1); + + try { + const chunk = new EncodedVideoChunk({ + type: isKey ? 'key' : 'delta', + timestamp: h264Timestamp, + data: nalData, + }); + h264Timestamp += 33333; // ~30fps in microseconds + videoDecoder.decode(chunk); + } catch (e) { + console.error('H.264 decode error:', e); + } + } else { + // JPEG fallback mode + if (pendingFrame) { + URL.revokeObjectURL(pendingFrame); + } + pendingFrame = URL.createObjectURL(new Blob([event.data], { type: 'image/jpeg' })); + frameImage.src = pendingFrame; } - pendingFrame = URL.createObjectURL(new Blob([event.data], { type: 'image/jpeg' })); - frameImage.src = pendingFrame; - } else { + } else if (typeof event.data === 'string') { // JSON message try { const data = JSON.parse(event.data); @@ -700,10 +730,54 @@ console.error('WebSocket error:', err); showError('Connection error', 'connection_error'); }; + + // Request JPEG fallback if browser doesn't support WebCodecs + ws.addEventListener('open', () => { + if (!supportsWebCodecs) { + console.log('WebCodecs not supported, requesting JPEG fallback'); + sendAction({ action: 'set_encoding', encoding: 'jpeg' }); + } + }); } function handleMessage(data) { - if (data.type === 'status') { + if (data.type === 'codec_init') { + // H.264 encoder initialized on backend, set up VideoDecoder + if (supportsWebCodecs) { + if (videoDecoder && videoDecoder.state !== 'closed') { + videoDecoder.close(); + } + h264Timestamp = 0; + videoDecoder = new VideoDecoder({ + output: (frame) => { + ctx.drawImage(frame, 0, 0, canvas.width, canvas.height); + frame.close(); + // Update FPS counter + frameCount++; + const now = Date.now(); + if (now - lastFpsUpdate >= 1000) { + currentFps = frameCount; + frameCount = 0; + lastFpsUpdate = now; + fpsInfo.textContent = `${currentFps} FPS`; + } + }, + error: (e) => { + console.error('VideoDecoder error:', e); + // Fall back to JPEG on decoder error + isH264Mode = false; + sendAction({ action: 'set_encoding', encoding: 'jpeg' }); + } + }); + videoDecoder.configure({ + codec: data.codec, + codedWidth: data.width, + codedHeight: data.height, + }); + isH264Mode = true; + console.log(`H.264 decoder configured: ${data.codec} ${data.width}x${data.height}`); + } + } else if (data.type === 'status') { if (data.connected) { connectingOverlay.classList.add('hidden'); errorOverlay.classList.add('hidden'); diff --git a/module/webui/viewport.py b/module/webui/viewport.py index 7ae9a399a..5b6df571b 100644 --- a/module/webui/viewport.py +++ b/module/webui/viewport.py @@ -1,4 +1,5 @@ import asyncio +import base64 import json import threading import time @@ -6,6 +7,7 @@ from concurrent.futures import ThreadPoolExecutor from pathlib import Path from typing import Dict, Optional +import av import cv2 import numpy as np from starlette.applications import Starlette @@ -33,6 +35,95 @@ def get_screenshot_executor() -> ThreadPoolExecutor: return _screenshot_executor +class H264Encoder: + """Per-session H.264 encoder using PyAV/libx264. + + Each WebSocket client gets its own encoder instance to avoid + thread-safety issues when multiple clients are connected. + """ + + def __init__(self): + self._encoder = None + self._pts = 0 + self._resolution = None + self._crf = None + + def _init(self, width: int, height: int, crf: int = 23): + if self._encoder is not None: + try: + self._encoder.encode(None) + self._encoder.close() + except Exception: + pass + + codec = av.CodecContext.create('libx264', 'w') + codec.width = width + codec.height = height + codec.pix_fmt = 'yuv420p' + codec.time_base = '1/30' + codec.options = { + 'preset': 'ultrafast', + 'tune': 'zerolatency', + 'profile': 'baseline', + 'level': '3.0', + 'crf': str(crf), + 'colorprim': 'bt709', + 'transfer': 'bt709', + 'colormatrix': 'bt709', + } + codec.open() + + self._encoder = codec + self._pts = 0 + self._resolution = (width, height) + logger.info(f'[Viewport] H.264 encoder initialized: {width}x{height}, crf={crf}') + + def encode(self, img_bgr, crf: int = 23): + """Encode a BGR numpy array to H.264. + + Note: Uses format='rgb24' with BGR data intentionally to compensate for + WebCodecs hardware decoder swapping R↔B during YUV→RGB conversion. + + Returns: + tuple: (data_bytes_with_header, is_keyframe, resolution_changed) or (None, False, False) + """ + h, w = img_bgr.shape[:2] + + resolution_changed = False + if self._encoder is None or self._resolution != (w, h) or self._crf != crf: + self._init(w, h, crf) + self._crf = crf + resolution_changed = True + + vframe = av.VideoFrame.from_ndarray(img_bgr, format='rgb24') + vframe.pts = self._pts + self._pts += 1 + + packets = self._encoder.encode(vframe) + data = b'' + is_keyframe = False + for pkt in packets: + data += bytes(pkt) + if pkt.is_keyframe: + is_keyframe = True + + if not data: + return None, False, False + + # Prepend 1-byte header: 0x01=keyframe, 0x00=delta + header = b'\x01' if is_keyframe else b'\x00' + return header + data, is_keyframe, resolution_changed + + def close(self): + if self._encoder is not None: + try: + self._encoder.encode(None) + self._encoder.close() + except Exception: + pass + self._encoder = None + + class DeviceConnection: """ Device connection for viewport streaming. @@ -179,85 +270,55 @@ class DeviceConnection: self._connected = False return None - def screenshot_encode(self, quality: int = 30, scale: float = 1.0, skip_unchanged: bool = False) -> Optional[bytes]: - """Get screenshot as JPEG bytes. + def screenshot_raw(self, scale: float = 1.0): + """Capture screenshot and return as numpy array (resized, BGR order). - Args: - quality: JPEG quality (1-100) - scale: Resolution scale (0.25-1.0), e.g. 0.5 = half resolution - skip_unchanged: If True, skip encoding when frame content is unchanged + Note: Returns BGR intentionally. The H264Encoder uses format='rgb24' + which pre-swaps R↔B to compensate for WebCodecs hardware decoder + doing an additional R↔B swap during YUV→RGB conversion. Returns: - bytes: Encoded frame data, or None if screenshot failed or frame unchanged. + numpy array (BGR) or None if failed. """ - import time - t0 = time.perf_counter() - img = self.screenshot() - t1 = time.perf_counter() - if img is None: return None + try: - # Update resolution if changed h, w = img.shape[:2] if (w, h) != self._resolution: self._resolution = (w, h) logger.info(f'[Viewport] Updated resolution: {self._resolution}') - # Resize if scale < 1.0 if scale < 1.0: - new_w = int(w * scale) - new_h = int(h * scale) - img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + w = int(w * scale) + h = int(h * scale) + img = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR) - # Frame-skip detection: only check when idle (skip_unchanged=True) - if skip_unchanged: - if hasattr(self, '_last_frame') and self._last_frame is not None: - if self._last_frame.shape == img.shape: - diff = cv2.absdiff(img, self._last_frame) - if np.mean(diff) < 1.0: - # Frame unchanged, skip - return None - self._last_frame = img + return img - # Convert BGR to RGB for correct colors in browser - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - t2 = time.perf_counter() - - # Encode to JPEG - _, encoded = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, quality]) - t3 = time.perf_counter() - - result = encoded.tobytes() - t4 = time.perf_counter() - - # Log timing every 100 frames - if not hasattr(self, '_frame_count'): - self._frame_count = 0 - self._total_times = [0, 0, 0, 0] - self._frame_count += 1 - self._total_times[0] += t1 - t0 # screenshot - self._total_times[1] += t2 - t1 # resize + diff - self._total_times[2] += t3 - t2 # imencode - self._total_times[3] += t4 - t3 # tobytes - - if self._frame_count >= 100: - avg = [t / self._frame_count * 1000 for t in self._total_times] - total = sum(avg) - out_h, out_w = img.shape[:2] - logger.info( - f'[Viewport] Timing (avg ms): screenshot={avg[0]:.1f}, ' - f'resize={avg[1]:.1f}, imencode={avg[2]:.1f}, tobytes={avg[3]:.1f}, ' - f'total={total:.1f}, size={len(result)//1024}KB, res={out_w}x{out_h}' - ) - self._frame_count = 0 - self._total_times = [0, 0, 0, 0] - - return result except Exception as e: if self._error_count == 0: - logger.info(f'[Viewport] Encode error: {e}') + logger.info(f'[Viewport] Screenshot processing error: {e}') + return None + + def screenshot_jpeg(self, quality: int = 30, scale: float = 1.0) -> Optional[bytes]: + """Capture screenshot and encode as JPEG (fallback for browsers without WebCodecs).""" + img = self.screenshot() + if img is None: + return None + try: + h, w = img.shape[:2] + if (w, h) != self._resolution: + self._resolution = (w, h) + + if scale < 1.0: + img = cv2.resize(img, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_LINEAR) + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + _, encoded = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, quality]) + return encoded.tobytes() + except Exception: return None def touch(self, x: int, y: int): @@ -758,10 +819,13 @@ async def websocket_endpoint(websocket: WebSocket): manager.add_client(instance_name) # Track client connection try: - quality = 30 + quality = 65 scale = 0.5 # Resolution scale (1.0 = 720p, 0.5 = 360p, etc.) - default 360p for better performance target_fps = 30 # Default 30 FPS for smooth streaming is_paused = False # Pause state for visibility-based streaming + use_h264 = True # Use H.264 encoding by default, falls back to JPEG if frontend requests + codec_init_sent = False # Track whether codec_init was sent to this client + h264_encoder = H264Encoder() # Per-session H.264 encoder # Stats tracking stats_frame_count = 0 @@ -835,6 +899,9 @@ async def websocket_endpoint(websocket: WebSocket): target_fps = max(1, min(60, int(data['fps']))) elif action == 'set_scale': scale = max(0.25, min(1.0, float(data['scale']))) + elif action == 'set_encoding': + use_h264 = data.get('encoding', 'jpeg') == 'h264' + logger.info(f'[Viewport] Encoding set to {"H.264" if use_h264 else "JPEG"} for {instance_name}') elif action == 'resume_idle': last_interaction_time = time.monotonic() is_paused = False @@ -919,28 +986,62 @@ async def websocket_endpoint(websocket: WebSocket): # Capture and send frame t_cap_start = time.monotonic() idle_seconds = t_cap_start - last_interaction_time - skip_unchanged = idle_seconds >= 5.0 is_idle = idle_seconds >= 300.0 + # Gradual FPS reduction when idle: + # 0-5s: full target_fps + # 5-30s: linearly reduce from target_fps to 1fps + # 30-300s: 1fps + # 300s+: stop entirely + if idle_seconds < 5.0: + effective_fps = target_fps + elif idle_seconds < 30.0: + # Linear interpolation: 5s→target_fps, 30s→1fps + t = (idle_seconds - 5.0) / 25.0 # 0.0 to 1.0 + effective_fps = max(1, int(target_fps * (1 - t) + 1 * t)) + else: + effective_fps = 1 + # When idle for 300s, skip capturing entirely (save CPU) if is_idle: - jpeg_data = None + frame_data = None + elif use_h264: + # Map quality slider (10-99) to CRF (51-18): lower CRF = higher quality + crf = int(51 - (quality - 10) * 33 / 89) + img_rgb = await loop.run_in_executor( + executor, lambda: conn.screenshot_raw(scale) + ) + if img_rgb is not None: + frame_data, is_keyframe, resolution_changed = h264_encoder.encode(img_rgb, crf) + else: + frame_data = None + resolution_changed = False + + # Send codec init on first frame or encoder reset + if frame_data and (resolution_changed or not codec_init_sent): + codec_init_sent = True + await websocket.send_json({ + 'type': 'codec_init', + 'codec': 'avc1.42001e', + 'width': h264_encoder._resolution[0], + 'height': h264_encoder._resolution[1], + }) else: - jpeg_data = await loop.run_in_executor( - executor, lambda: conn.screenshot_encode(quality, scale, skip_unchanged) + frame_data = await loop.run_in_executor( + executor, lambda: conn.screenshot_jpeg(quality, scale) ) t_cap_end = time.monotonic() - if jpeg_data: + if frame_data: t_send_start = time.monotonic() - await websocket.send_bytes(jpeg_data) + await websocket.send_bytes(frame_data) t_send_end = time.monotonic() # Update stats frame_latency = (t_cap_end - t_cap_start) * 1000 # ms stats_frame_count += 1 stats_total_latency += frame_latency - stats_total_bytes += len(jpeg_data) + stats_total_bytes += len(frame_data) # Track WebSocket timing if not hasattr(websocket, '_ws_frame_count'): @@ -961,7 +1062,7 @@ async def websocket_endpoint(websocket: WebSocket): websocket._ws_cap_time = 0 websocket._ws_send_time = 0 - # Calculate stats every second (outside if jpeg_data so stats update during skips) + # Calculate stats every second (outside if frame_data so stats update during skips) stats_elapsed = time.monotonic() - stats_start_time if stats_elapsed >= 1.0: current_latency_ms = stats_total_latency / max(1, stats_frame_count) @@ -990,9 +1091,9 @@ async def websocket_endpoint(websocket: WebSocket): 'idle': is_idle }) - # Frame rate limiting + # Frame rate limiting (use effective_fps for idle throttling) elapsed = time.monotonic() - frame_start - sleep_time = (1.0 / target_fps) - elapsed + sleep_time = (1.0 / effective_fps) - elapsed if sleep_time > 0.001: await asyncio.sleep(sleep_time) @@ -1001,6 +1102,7 @@ async def websocket_endpoint(websocket: WebSocket): except Exception as e: logger.warning(f'[Viewport] WebSocket error for {instance_name}: {e}') finally: + h264_encoder.close() manager.remove_client(instance_name) # Track client disconnection manager.release_connection(instance_name) logger.info(f'[Viewport] Stream ended for {instance_name}, clients remaining: {manager.get_client_count(instance_name)}')