mirror of
https://github.com/W1NDes/M-AzurLaneAutoScript.git
synced 2026-05-14 07:08:04 +08:00
Feat(viewport): 添加H.264视频流编码降低带宽
- 使用PyAV(libx264)替代cv2.imencode实现H.264视频流编码 - 带宽从~15Mbps降至~0.8Mbps (20倍压缩), 编码耗时从5ms降至1.8ms - 前端使用WebCodecs-VideoDecoder硬件解码, 不支持的浏览器自动降级原JPEG图传模式 - WebSocket会话独立H264Encoder实例, 支持多客户端同时连接 - Quality滑条映射CRF(51-18) - 添加渐进式降帧: 5s无操作后从30fps线性降至1fps, 300s后完全停止
This commit is contained in:
parent
a96d9c450a
commit
d358adcb0e
@ -391,8 +391,8 @@
|
||||
</div>
|
||||
<div class="slider-container">
|
||||
<label>Quality:</label>
|
||||
<input type="range" id="qualitySlider" min="10" max="99" value="30">
|
||||
<span class="slider-value" id="qualityValue">30</span>
|
||||
<input type="range" id="qualitySlider" min="10" max="99" value="65">
|
||||
<span class="slider-value" id="qualityValue">65</span>
|
||||
</div>
|
||||
<div class="slider-container">
|
||||
<label>FPS:</label>
|
||||
@ -528,13 +528,22 @@
|
||||
let clientCount = 0; // Number of connected clients
|
||||
let zoomLevel = 100; // Zoom percentage (50-150)
|
||||
|
||||
// H.264 / WebCodecs state
|
||||
const supportsWebCodecs = typeof VideoDecoder !== 'undefined';
|
||||
let videoDecoder = null;
|
||||
let isH264Mode = false;
|
||||
let h264Timestamp = 0;
|
||||
|
||||
// Cached image object for frame rendering (prevents flickering)
|
||||
const frameImage = new Image();
|
||||
let pendingFrame = null;
|
||||
|
||||
// Frame image onload handler - set once globally, reused for all frames
|
||||
frameImage.onload = () => {
|
||||
ctx.drawImage(frameImage, 0, 0, canvas.width, canvas.height);
|
||||
// Only draw JPEG frames if NOT in H.264 mode
|
||||
if (!isH264Mode) {
|
||||
ctx.drawImage(frameImage, 0, 0, canvas.width, canvas.height);
|
||||
}
|
||||
if (pendingFrame) {
|
||||
URL.revokeObjectURL(pendingFrame);
|
||||
pendingFrame = null;
|
||||
@ -666,14 +675,35 @@
|
||||
|
||||
ws.onmessage = (event) => {
|
||||
if (event.data instanceof ArrayBuffer) {
|
||||
// Binary frame data (JPEG)
|
||||
// Use cached image object to prevent flickering
|
||||
if (pendingFrame) {
|
||||
URL.revokeObjectURL(pendingFrame);
|
||||
// Binary frame data
|
||||
if (isH264Mode && videoDecoder && videoDecoder.state !== 'closed') {
|
||||
// H.264 mode: first byte is type header, rest is NAL data
|
||||
const buffer = event.data;
|
||||
if (buffer.byteLength < 2) return;
|
||||
const view = new Uint8Array(buffer);
|
||||
const isKey = view[0] === 0x01;
|
||||
const nalData = buffer.slice(1);
|
||||
|
||||
try {
|
||||
const chunk = new EncodedVideoChunk({
|
||||
type: isKey ? 'key' : 'delta',
|
||||
timestamp: h264Timestamp,
|
||||
data: nalData,
|
||||
});
|
||||
h264Timestamp += 33333; // ~30fps in microseconds
|
||||
videoDecoder.decode(chunk);
|
||||
} catch (e) {
|
||||
console.error('H.264 decode error:', e);
|
||||
}
|
||||
} else {
|
||||
// JPEG fallback mode
|
||||
if (pendingFrame) {
|
||||
URL.revokeObjectURL(pendingFrame);
|
||||
}
|
||||
pendingFrame = URL.createObjectURL(new Blob([event.data], { type: 'image/jpeg' }));
|
||||
frameImage.src = pendingFrame;
|
||||
}
|
||||
pendingFrame = URL.createObjectURL(new Blob([event.data], { type: 'image/jpeg' }));
|
||||
frameImage.src = pendingFrame;
|
||||
} else {
|
||||
} else if (typeof event.data === 'string') {
|
||||
// JSON message
|
||||
try {
|
||||
const data = JSON.parse(event.data);
|
||||
@ -700,10 +730,54 @@
|
||||
console.error('WebSocket error:', err);
|
||||
showError('Connection error', 'connection_error');
|
||||
};
|
||||
|
||||
// Request JPEG fallback if browser doesn't support WebCodecs
|
||||
ws.addEventListener('open', () => {
|
||||
if (!supportsWebCodecs) {
|
||||
console.log('WebCodecs not supported, requesting JPEG fallback');
|
||||
sendAction({ action: 'set_encoding', encoding: 'jpeg' });
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function handleMessage(data) {
|
||||
if (data.type === 'status') {
|
||||
if (data.type === 'codec_init') {
|
||||
// H.264 encoder initialized on backend, set up VideoDecoder
|
||||
if (supportsWebCodecs) {
|
||||
if (videoDecoder && videoDecoder.state !== 'closed') {
|
||||
videoDecoder.close();
|
||||
}
|
||||
h264Timestamp = 0;
|
||||
videoDecoder = new VideoDecoder({
|
||||
output: (frame) => {
|
||||
ctx.drawImage(frame, 0, 0, canvas.width, canvas.height);
|
||||
frame.close();
|
||||
// Update FPS counter
|
||||
frameCount++;
|
||||
const now = Date.now();
|
||||
if (now - lastFpsUpdate >= 1000) {
|
||||
currentFps = frameCount;
|
||||
frameCount = 0;
|
||||
lastFpsUpdate = now;
|
||||
fpsInfo.textContent = `${currentFps} FPS`;
|
||||
}
|
||||
},
|
||||
error: (e) => {
|
||||
console.error('VideoDecoder error:', e);
|
||||
// Fall back to JPEG on decoder error
|
||||
isH264Mode = false;
|
||||
sendAction({ action: 'set_encoding', encoding: 'jpeg' });
|
||||
}
|
||||
});
|
||||
videoDecoder.configure({
|
||||
codec: data.codec,
|
||||
codedWidth: data.width,
|
||||
codedHeight: data.height,
|
||||
});
|
||||
isH264Mode = true;
|
||||
console.log(`H.264 decoder configured: ${data.codec} ${data.width}x${data.height}`);
|
||||
}
|
||||
} else if (data.type === 'status') {
|
||||
if (data.connected) {
|
||||
connectingOverlay.classList.add('hidden');
|
||||
errorOverlay.classList.add('hidden');
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
@ -6,6 +7,7 @@ from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional
|
||||
|
||||
import av
|
||||
import cv2
|
||||
import numpy as np
|
||||
from starlette.applications import Starlette
|
||||
@ -33,6 +35,95 @@ def get_screenshot_executor() -> ThreadPoolExecutor:
|
||||
return _screenshot_executor
|
||||
|
||||
|
||||
class H264Encoder:
|
||||
"""Per-session H.264 encoder using PyAV/libx264.
|
||||
|
||||
Each WebSocket client gets its own encoder instance to avoid
|
||||
thread-safety issues when multiple clients are connected.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._encoder = None
|
||||
self._pts = 0
|
||||
self._resolution = None
|
||||
self._crf = None
|
||||
|
||||
def _init(self, width: int, height: int, crf: int = 23):
|
||||
if self._encoder is not None:
|
||||
try:
|
||||
self._encoder.encode(None)
|
||||
self._encoder.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
codec = av.CodecContext.create('libx264', 'w')
|
||||
codec.width = width
|
||||
codec.height = height
|
||||
codec.pix_fmt = 'yuv420p'
|
||||
codec.time_base = '1/30'
|
||||
codec.options = {
|
||||
'preset': 'ultrafast',
|
||||
'tune': 'zerolatency',
|
||||
'profile': 'baseline',
|
||||
'level': '3.0',
|
||||
'crf': str(crf),
|
||||
'colorprim': 'bt709',
|
||||
'transfer': 'bt709',
|
||||
'colormatrix': 'bt709',
|
||||
}
|
||||
codec.open()
|
||||
|
||||
self._encoder = codec
|
||||
self._pts = 0
|
||||
self._resolution = (width, height)
|
||||
logger.info(f'[Viewport] H.264 encoder initialized: {width}x{height}, crf={crf}')
|
||||
|
||||
def encode(self, img_bgr, crf: int = 23):
|
||||
"""Encode a BGR numpy array to H.264.
|
||||
|
||||
Note: Uses format='rgb24' with BGR data intentionally to compensate for
|
||||
WebCodecs hardware decoder swapping R↔B during YUV→RGB conversion.
|
||||
|
||||
Returns:
|
||||
tuple: (data_bytes_with_header, is_keyframe, resolution_changed) or (None, False, False)
|
||||
"""
|
||||
h, w = img_bgr.shape[:2]
|
||||
|
||||
resolution_changed = False
|
||||
if self._encoder is None or self._resolution != (w, h) or self._crf != crf:
|
||||
self._init(w, h, crf)
|
||||
self._crf = crf
|
||||
resolution_changed = True
|
||||
|
||||
vframe = av.VideoFrame.from_ndarray(img_bgr, format='rgb24')
|
||||
vframe.pts = self._pts
|
||||
self._pts += 1
|
||||
|
||||
packets = self._encoder.encode(vframe)
|
||||
data = b''
|
||||
is_keyframe = False
|
||||
for pkt in packets:
|
||||
data += bytes(pkt)
|
||||
if pkt.is_keyframe:
|
||||
is_keyframe = True
|
||||
|
||||
if not data:
|
||||
return None, False, False
|
||||
|
||||
# Prepend 1-byte header: 0x01=keyframe, 0x00=delta
|
||||
header = b'\x01' if is_keyframe else b'\x00'
|
||||
return header + data, is_keyframe, resolution_changed
|
||||
|
||||
def close(self):
|
||||
if self._encoder is not None:
|
||||
try:
|
||||
self._encoder.encode(None)
|
||||
self._encoder.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._encoder = None
|
||||
|
||||
|
||||
class DeviceConnection:
|
||||
"""
|
||||
Device connection for viewport streaming.
|
||||
@ -179,85 +270,55 @@ class DeviceConnection:
|
||||
self._connected = False
|
||||
return None
|
||||
|
||||
def screenshot_encode(self, quality: int = 30, scale: float = 1.0, skip_unchanged: bool = False) -> Optional[bytes]:
|
||||
"""Get screenshot as JPEG bytes.
|
||||
def screenshot_raw(self, scale: float = 1.0):
|
||||
"""Capture screenshot and return as numpy array (resized, BGR order).
|
||||
|
||||
Args:
|
||||
quality: JPEG quality (1-100)
|
||||
scale: Resolution scale (0.25-1.0), e.g. 0.5 = half resolution
|
||||
skip_unchanged: If True, skip encoding when frame content is unchanged
|
||||
Note: Returns BGR intentionally. The H264Encoder uses format='rgb24'
|
||||
which pre-swaps R↔B to compensate for WebCodecs hardware decoder
|
||||
doing an additional R↔B swap during YUV→RGB conversion.
|
||||
|
||||
Returns:
|
||||
bytes: Encoded frame data, or None if screenshot failed or frame unchanged.
|
||||
numpy array (BGR) or None if failed.
|
||||
"""
|
||||
import time
|
||||
t0 = time.perf_counter()
|
||||
|
||||
img = self.screenshot()
|
||||
t1 = time.perf_counter()
|
||||
|
||||
if img is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Update resolution if changed
|
||||
h, w = img.shape[:2]
|
||||
if (w, h) != self._resolution:
|
||||
self._resolution = (w, h)
|
||||
logger.info(f'[Viewport] Updated resolution: {self._resolution}')
|
||||
|
||||
# Resize if scale < 1.0
|
||||
if scale < 1.0:
|
||||
new_w = int(w * scale)
|
||||
new_h = int(h * scale)
|
||||
img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
||||
w = int(w * scale)
|
||||
h = int(h * scale)
|
||||
img = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Frame-skip detection: only check when idle (skip_unchanged=True)
|
||||
if skip_unchanged:
|
||||
if hasattr(self, '_last_frame') and self._last_frame is not None:
|
||||
if self._last_frame.shape == img.shape:
|
||||
diff = cv2.absdiff(img, self._last_frame)
|
||||
if np.mean(diff) < 1.0:
|
||||
# Frame unchanged, skip
|
||||
return None
|
||||
self._last_frame = img
|
||||
return img
|
||||
|
||||
# Convert BGR to RGB for correct colors in browser
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
t2 = time.perf_counter()
|
||||
|
||||
# Encode to JPEG
|
||||
_, encoded = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, quality])
|
||||
t3 = time.perf_counter()
|
||||
|
||||
result = encoded.tobytes()
|
||||
t4 = time.perf_counter()
|
||||
|
||||
# Log timing every 100 frames
|
||||
if not hasattr(self, '_frame_count'):
|
||||
self._frame_count = 0
|
||||
self._total_times = [0, 0, 0, 0]
|
||||
self._frame_count += 1
|
||||
self._total_times[0] += t1 - t0 # screenshot
|
||||
self._total_times[1] += t2 - t1 # resize + diff
|
||||
self._total_times[2] += t3 - t2 # imencode
|
||||
self._total_times[3] += t4 - t3 # tobytes
|
||||
|
||||
if self._frame_count >= 100:
|
||||
avg = [t / self._frame_count * 1000 for t in self._total_times]
|
||||
total = sum(avg)
|
||||
out_h, out_w = img.shape[:2]
|
||||
logger.info(
|
||||
f'[Viewport] Timing (avg ms): screenshot={avg[0]:.1f}, '
|
||||
f'resize={avg[1]:.1f}, imencode={avg[2]:.1f}, tobytes={avg[3]:.1f}, '
|
||||
f'total={total:.1f}, size={len(result)//1024}KB, res={out_w}x{out_h}'
|
||||
)
|
||||
self._frame_count = 0
|
||||
self._total_times = [0, 0, 0, 0]
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
if self._error_count == 0:
|
||||
logger.info(f'[Viewport] Encode error: {e}')
|
||||
logger.info(f'[Viewport] Screenshot processing error: {e}')
|
||||
return None
|
||||
|
||||
def screenshot_jpeg(self, quality: int = 30, scale: float = 1.0) -> Optional[bytes]:
|
||||
"""Capture screenshot and encode as JPEG (fallback for browsers without WebCodecs)."""
|
||||
img = self.screenshot()
|
||||
if img is None:
|
||||
return None
|
||||
try:
|
||||
h, w = img.shape[:2]
|
||||
if (w, h) != self._resolution:
|
||||
self._resolution = (w, h)
|
||||
|
||||
if scale < 1.0:
|
||||
img = cv2.resize(img, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
_, encoded = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, quality])
|
||||
return encoded.tobytes()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def touch(self, x: int, y: int):
|
||||
@ -758,10 +819,13 @@ async def websocket_endpoint(websocket: WebSocket):
|
||||
manager.add_client(instance_name) # Track client connection
|
||||
|
||||
try:
|
||||
quality = 30
|
||||
quality = 65
|
||||
scale = 0.5 # Resolution scale (1.0 = 720p, 0.5 = 360p, etc.) - default 360p for better performance
|
||||
target_fps = 30 # Default 30 FPS for smooth streaming
|
||||
is_paused = False # Pause state for visibility-based streaming
|
||||
use_h264 = True # Use H.264 encoding by default, falls back to JPEG if frontend requests
|
||||
codec_init_sent = False # Track whether codec_init was sent to this client
|
||||
h264_encoder = H264Encoder() # Per-session H.264 encoder
|
||||
|
||||
# Stats tracking
|
||||
stats_frame_count = 0
|
||||
@ -835,6 +899,9 @@ async def websocket_endpoint(websocket: WebSocket):
|
||||
target_fps = max(1, min(60, int(data['fps'])))
|
||||
elif action == 'set_scale':
|
||||
scale = max(0.25, min(1.0, float(data['scale'])))
|
||||
elif action == 'set_encoding':
|
||||
use_h264 = data.get('encoding', 'jpeg') == 'h264'
|
||||
logger.info(f'[Viewport] Encoding set to {"H.264" if use_h264 else "JPEG"} for {instance_name}')
|
||||
elif action == 'resume_idle':
|
||||
last_interaction_time = time.monotonic()
|
||||
is_paused = False
|
||||
@ -919,28 +986,62 @@ async def websocket_endpoint(websocket: WebSocket):
|
||||
# Capture and send frame
|
||||
t_cap_start = time.monotonic()
|
||||
idle_seconds = t_cap_start - last_interaction_time
|
||||
skip_unchanged = idle_seconds >= 5.0
|
||||
is_idle = idle_seconds >= 300.0
|
||||
|
||||
# Gradual FPS reduction when idle:
|
||||
# 0-5s: full target_fps
|
||||
# 5-30s: linearly reduce from target_fps to 1fps
|
||||
# 30-300s: 1fps
|
||||
# 300s+: stop entirely
|
||||
if idle_seconds < 5.0:
|
||||
effective_fps = target_fps
|
||||
elif idle_seconds < 30.0:
|
||||
# Linear interpolation: 5s→target_fps, 30s→1fps
|
||||
t = (idle_seconds - 5.0) / 25.0 # 0.0 to 1.0
|
||||
effective_fps = max(1, int(target_fps * (1 - t) + 1 * t))
|
||||
else:
|
||||
effective_fps = 1
|
||||
|
||||
# When idle for 300s, skip capturing entirely (save CPU)
|
||||
if is_idle:
|
||||
jpeg_data = None
|
||||
frame_data = None
|
||||
elif use_h264:
|
||||
# Map quality slider (10-99) to CRF (51-18): lower CRF = higher quality
|
||||
crf = int(51 - (quality - 10) * 33 / 89)
|
||||
img_rgb = await loop.run_in_executor(
|
||||
executor, lambda: conn.screenshot_raw(scale)
|
||||
)
|
||||
if img_rgb is not None:
|
||||
frame_data, is_keyframe, resolution_changed = h264_encoder.encode(img_rgb, crf)
|
||||
else:
|
||||
frame_data = None
|
||||
resolution_changed = False
|
||||
|
||||
# Send codec init on first frame or encoder reset
|
||||
if frame_data and (resolution_changed or not codec_init_sent):
|
||||
codec_init_sent = True
|
||||
await websocket.send_json({
|
||||
'type': 'codec_init',
|
||||
'codec': 'avc1.42001e',
|
||||
'width': h264_encoder._resolution[0],
|
||||
'height': h264_encoder._resolution[1],
|
||||
})
|
||||
else:
|
||||
jpeg_data = await loop.run_in_executor(
|
||||
executor, lambda: conn.screenshot_encode(quality, scale, skip_unchanged)
|
||||
frame_data = await loop.run_in_executor(
|
||||
executor, lambda: conn.screenshot_jpeg(quality, scale)
|
||||
)
|
||||
t_cap_end = time.monotonic()
|
||||
|
||||
if jpeg_data:
|
||||
if frame_data:
|
||||
t_send_start = time.monotonic()
|
||||
await websocket.send_bytes(jpeg_data)
|
||||
await websocket.send_bytes(frame_data)
|
||||
t_send_end = time.monotonic()
|
||||
|
||||
# Update stats
|
||||
frame_latency = (t_cap_end - t_cap_start) * 1000 # ms
|
||||
stats_frame_count += 1
|
||||
stats_total_latency += frame_latency
|
||||
stats_total_bytes += len(jpeg_data)
|
||||
stats_total_bytes += len(frame_data)
|
||||
|
||||
# Track WebSocket timing
|
||||
if not hasattr(websocket, '_ws_frame_count'):
|
||||
@ -961,7 +1062,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
||||
websocket._ws_cap_time = 0
|
||||
websocket._ws_send_time = 0
|
||||
|
||||
# Calculate stats every second (outside if jpeg_data so stats update during skips)
|
||||
# Calculate stats every second (outside if frame_data so stats update during skips)
|
||||
stats_elapsed = time.monotonic() - stats_start_time
|
||||
if stats_elapsed >= 1.0:
|
||||
current_latency_ms = stats_total_latency / max(1, stats_frame_count)
|
||||
@ -990,9 +1091,9 @@ async def websocket_endpoint(websocket: WebSocket):
|
||||
'idle': is_idle
|
||||
})
|
||||
|
||||
# Frame rate limiting
|
||||
# Frame rate limiting (use effective_fps for idle throttling)
|
||||
elapsed = time.monotonic() - frame_start
|
||||
sleep_time = (1.0 / target_fps) - elapsed
|
||||
sleep_time = (1.0 / effective_fps) - elapsed
|
||||
if sleep_time > 0.001:
|
||||
await asyncio.sleep(sleep_time)
|
||||
|
||||
@ -1001,6 +1102,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
||||
except Exception as e:
|
||||
logger.warning(f'[Viewport] WebSocket error for {instance_name}: {e}')
|
||||
finally:
|
||||
h264_encoder.close()
|
||||
manager.remove_client(instance_name) # Track client disconnection
|
||||
manager.release_connection(instance_name)
|
||||
logger.info(f'[Viewport] Stream ended for {instance_name}, clients remaining: {manager.get_client_count(instance_name)}')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user