fix: voice chat (addSpeaker to worklet, sendPCMFrame scope, VAD threshold, gain=1), temp damage underground + reduced speed

This commit is contained in:
Mk 2026-05-26 20:20:26 +00:00
parent 79fc30945a
commit 42f3e59a42
4 changed files with 117 additions and 78 deletions

54
game.js
View File

@ -1479,7 +1479,7 @@ function customConfirm(msg, onYes) {
sampleRate: 16000, // 16kHz — sufficient for voice, saves 33% bandwidth sampleRate: 16000, // 16kHz — sufficient for voice, saves 33% bandwidth
frameMs: 20, // 20ms frames = 320 samples @ 16kHz frameMs: 20, // 20ms frames = 320 samples @ 16kHz
samplesPerFrame: 320, // 16000 * 0.02 samplesPerFrame: 320, // 16000 * 0.02
vadThreshold: 0.008, // RMS threshold for voice detection vadThreshold: 0.0005, // RMS threshold for voice detection
vadHangover: 5, // 100ms hangover after speech ends vadHangover: 5, // 100ms hangover after speech ends
jbufTargetMs: 80, // Target jitter: 80ms (was 200ms) jbufTargetMs: 80, // Target jitter: 80ms (was 200ms)
jbufMinMs: 40, jbufMinMs: 40,
@ -1514,7 +1514,7 @@ class VoiceCaptureProcessor extends AudioWorkletProcessor {
this._speaking = false; this._speaking = false;
this._silenceFrames = 0; this._silenceFrames = 0;
this._hangover = 5; // 100ms this._hangover = 5; // 100ms
this._vadThreshold = 0.008; this._vadThreshold = 0.0005;
this._lastRms = 0; this._lastRms = 0;
} }
process(inputs) { process(inputs) {
@ -1659,7 +1659,7 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
// ==================== Opus ENCODE/DECODE ==================== // ==================== Opus ENCODE/DECODE ====================
async function initVoiceEncoder() { async function initVoiceEncoder() {
if ('AudioEncoder' in window) { if (false && 'AudioEncoder' in window) {
try { try {
// Check if Opus is supported // Check if Opus is supported
const support = await AudioEncoder.isConfigSupported({ const support = await AudioEncoder.isConfigSupported({
@ -1676,6 +1676,7 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
const data = new Uint8Array(chunk.byteLength); const data = new Uint8Array(chunk.byteLength);
chunk.copyTo(data); chunk.copyTo(data);
// Send as binary frame via Socket.IO // Send as binary frame via Socket.IO
console.log('[voice] TX Opus chunk, seq:', voiceSeq, 'size:', data.buffer.byteLength);
voiceSocket.emit('voice_data', { voiceSocket.emit('voice_data', {
codec: 'opus', codec: 'opus',
data: data.buffer, data: data.buffer,
@ -1777,12 +1778,13 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
// Connect: lowpass → panner → gain → (connected to mixer later) // Connect: lowpass → panner → gain → (connected to mixer later)
sp.lowpassNode.connect(sp.pannerNode); sp.lowpassNode.connect(sp.pannerNode);
sp.pannerNode.connect(sp.gainNode); sp.pannerNode.connect(sp.gainNode);
sp.gainNode.gain.value = 0; // Start silent sp.gainNode.gain.value = 1.0; // Start with volume (worklet needs non-zero gain)
remoteSpeakers.set(socketId, sp); remoteSpeakers.set(socketId, sp);
// Register speaker in playback worklet // Register speaker in playback worklet
if (playbackNode) { if (playbackNode) {
playbackNode.port.postMessage({ type: 'addSpeaker', id: socketId, gain: sp.gainNode.gain.value, pan: sp.pannerNode.pan.value }); playbackNode.port.postMessage({ type: 'addSpeaker', id: socketId, gain: 1.0, pan: 0 });
console.log('[voice] addSpeaker posted to worklet, id:', socketId?.substring(0,8), 'gain:', sp.gainNode.gain.value);
} }
initDecoderForSpeaker(socketId, codec); initDecoderForSpeaker(socketId, codec);
return sp; return sp;
@ -1841,6 +1843,7 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
if (playbackNode) { if (playbackNode) {
const currentGain = sp.gainNode.gain.value; const currentGain = sp.gainNode.gain.value;
const currentPan = sp.pannerNode.pan.value; const currentPan = sp.pannerNode.pan.value;
if (currentGain > 0.01) console.log('[voice] updateSpatial, id:', sp.id?.substring(0,8), 'gain:', currentGain.toFixed(3), 'pan:', currentPan.toFixed(3));
playbackNode.port.postMessage({ playbackNode.port.postMessage({
type: 'updateSpatial', type: 'updateSpatial',
id: sp.id, id: sp.id,
@ -1971,14 +1974,26 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
// Per-speaker Web Audio nodes (lowpass, pan, gain) used for spatial UPDATES only (values sent to worklet via messages) // Per-speaker Web Audio nodes (lowpass, pan, gain) used for spatial UPDATES only (values sent to worklet via messages)
// The actual mixing happens inside the worklet // The actual mixing happens inside the worklet
// Connect to voice server FIRST so capture frames have a socket
voiceSocket = io(VOICE_SERVER, { transports: ['websocket'] });
// Handle capture frames // Handle capture frames
voiceSeq = 0; voiceSeq = 0;
voiceTimestamp = 0; voiceTimestamp = 0;
wasSpeaking = false; wasSpeaking = false;
voiceActive = true; // Enable capture BEFORE onmessage handler
let _frameCount = 0;
captureNode.port.onmessage = (e) => { captureNode.port.onmessage = (e) => {
const { type, samples, speaking, rms } = e.data; const { type, samples, speaking, rms } = e.data;
if (type !== 'frame') return; if (type !== 'frame') return;
if (!voiceActive || !voiceSocket || !voiceSocket.connected) return; _frameCount++;
if (_frameCount <= 5 || _frameCount % 100 === 0) {
console.log('[voice] capture frame #', _frameCount, 'rms:', rms?.toFixed(4), 'speaking:', speaking, 'voiceActive:', voiceActive, 'socket:', !!voiceSocket, 'connected:', voiceSocket?.connected, 'codec:', voiceCodec);
}
if (!voiceActive || !voiceSocket || !voiceSocket.connected) {
return;
}
voiceTimestamp += VC.samplesPerFrame; // 320 samples * (1/16000) = 20ms per frame voiceTimestamp += VC.samplesPerFrame; // 320 samples * (1/16000) = 20ms per frame
@ -2003,17 +2018,17 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
audioData.close(); audioData.close();
} catch (err) { } catch (err) {
// Fallback: send as PCM // Fallback: send as PCM
sendPCMFrame(samples); sendPCMFrame(samples, speaking, wasSpeaking);
} }
} else { } else {
sendPCMFrame(samples); sendPCMFrame(samples, speaking, wasSpeaking);
} }
wasSpeaking = speaking; wasSpeaking = speaking;
silenceFrames = 0; silenceFrames = 0;
}; };
function sendPCMFrame(samples) { function sendPCMFrame(samples, isSpeaking, wasSp) {
const int16 = new Int16Array(samples.length); const int16 = new Int16Array(samples.length);
for (let i = 0; i < samples.length; i++) { for (let i = 0; i < samples.length; i++) {
const s = Math.max(-1, Math.min(1, samples[i])); const s = Math.max(-1, Math.min(1, samples[i]));
@ -2024,7 +2039,7 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
data: int16.buffer, data: int16.buffer,
seq: voiceSeq++, seq: voiceSeq++,
ts: performance.now(), ts: performance.now(),
speaking: wasSpeaking || speaking speaking: isSpeaking || wasSp
}); });
} }
@ -2032,8 +2047,7 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
await initVoiceEncoder(); await initVoiceEncoder();
codecIndicator.textContent = voiceCodec === 'opus' ? '🔊 Opus' : '🔊 PCM'; codecIndicator.textContent = voiceCodec === 'opus' ? '🔊 Opus' : '🔊 PCM';
// Connect to voice server // Voice socket already created above
voiceSocket = io(VOICE_SERVER, { transports: ['websocket'] });
voiceSocket.on('connect', () => { voiceSocket.on('connect', () => {
console.log('[voice] Connected, id:', voiceSocket.id, 'codec:', voiceCodec); console.log('[voice] Connected, id:', voiceSocket.id, 'codec:', voiceCodec);
@ -2051,8 +2065,9 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
}); });
voiceSocket.on('voice_in', (payload) => { voiceSocket.on('voice_in', (payload) => {
console.log('[voice] RX voice_in, codec:', payload.codec, 'dataSize:', payload.data?.byteLength || payload.data?.length || 'N/A', 'from:', payload.meta?.from?.substring(0,8));
const { data, meta } = payload; const { data, meta } = payload;
if (!audioCtx || audioCtx.state === 'closed') return; if (!audioCtx || audioCtx.state === 'closed') { console.warn('[voice] audioCtx missing/closed'); return; }
let sp = remoteSpeakers.get(meta.from); let sp = remoteSpeakers.get(meta.from);
if (!sp) { if (!sp) {
@ -2111,7 +2126,7 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
for (const [id] of remoteSpeakers) removeRemoteSpeaker(id); for (const [id] of remoteSpeakers) removeRemoteSpeaker(id);
}); });
voiceActive = true; // voiceActive already set to true above
voiceBtn.textContent = '🎤'; voiceBtn.textContent = '🎤';
voiceBtn.style.background = '#2ecc71'; voiceBtn.style.background = '#2ecc71';
console.log('[voice] Voice chat ACTIVE, codec:', voiceCodec); console.log('[voice] Voice chat ACTIVE, codec:', voiceCodec);
@ -2125,6 +2140,7 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
// ==================== PCM decode helper ==================== // ==================== PCM decode helper ====================
function decodeAndPushPCM(speakerId, buffer) { function decodeAndPushPCM(speakerId, buffer) {
console.log('[voice] decodeAndPushPCM, id:', speakerId?.substring(0,8), 'bufSize:', buffer?.byteLength || buffer?.length || 'N/A');
const int16 = new Int16Array(buffer); const int16 = new Int16Array(buffer);
const float32 = new Float32Array(int16.length); const float32 = new Float32Array(int16.length);
for (let i = 0; i < int16.length; i++) { for (let i = 0; i < int16.length; i++) {
@ -4287,7 +4303,7 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
const biome = getCachedBiome(pGX); const biome = getCachedBiome(pGX);
const pGY = Math.floor(player.y / TILE); const pGY = Math.floor(player.y / TILE);
const sGY = surfaceGyAt(pGX); const sGY = surfaceGyAt(pGX);
const isUndergroundTemp = (sGY - pGY) > 2; const isUndergroundTemp = (pGY - sGY) > 2; // player deeper than 2 blocks below surface
let targetTemp = BIOME_TEMP[biome] || 15; let targetTemp = BIOME_TEMP[biome] || 15;
if (isUndergroundTemp) targetTemp = UNDERGROUND_TEMP; if (isUndergroundTemp) targetTemp = UNDERGROUND_TEMP;
if (isNight() && !isUndergroundTemp) targetTemp -= 10; if (isNight() && !isUndergroundTemp) targetTemp -= 10;
@ -4325,14 +4341,14 @@ registerProcessor('voice-playback', VoicePlaybackProcessor);
player.hp = Math.min(100, player.hp + 1 * dt); player.hp = Math.min(100, player.hp + 1 * dt);
} }
} }
if (player.temperature < COLD_THRESHOLD) { if (player.temperature < COLD_THRESHOLD && !isUndergroundTemp) {
const severity = Math.abs(player.temperature - COLD_THRESHOLD) / 15; const severity = Math.abs(player.temperature - COLD_THRESHOLD) / 15;
player.hp -= 3 * severity * dt; player.hp -= 1 * severity * dt;
if (severity > 0.5) player.vx *= (1 - 0.3 * Math.min(1, severity) * dt); if (severity > 0.5) player.vx *= (1 - 0.3 * Math.min(1, severity) * dt);
} }
if (player.temperature > HEAT_THRESHOLD) { if (player.temperature > HEAT_THRESHOLD && !isUndergroundTemp) {
const severity = (player.temperature - HEAT_THRESHOLD) / 15; const severity = (player.temperature - HEAT_THRESHOLD) / 15;
player.hp -= 2.5 * severity * dt; player.hp -= 1 * severity * dt;
if (severity > 0.5) player.hunger -= 0.5 * severity * dt; if (severity > 0.5) player.hunger -= 0.5 * severity * dt;
} }
// Игрок не может двигаться во время сна // Игрок не может двигаться во время сна

View File

@ -94,6 +94,6 @@
</div> </div>
</div> </div>
<script src="game.js?v=37"></script> <script src="game.js?v=44"></script>
</body> </body>
</html> </html>

View File

@ -3,7 +3,6 @@ server {
root /usr/share/nginx/html; root /usr/share/nginx/html;
index index.html; index index.html;
# CORS for ES modules
add_header Access-Control-Allow-Origin *; add_header Access-Control-Allow-Origin *;
location ~* \.(js|mjs|css)$ { location ~* \.(js|mjs|css)$ {

View File

@ -1,70 +1,94 @@
<!DOCTYPE html> <!DOCTYPE html>
<html><head><title>Voice Test</title></head><body> <html>
<h1>Voice Capture Test</h1> <head><title>Voice Test</title></head>
<button id="btn" style="padding:20px;font-size:24px;background:#2ecc71;color:#fff;border:none;border-radius:12px;cursor:pointer;">Start Mic</button> <body style="background:#1a1a2e;color:#eee;font-family:monospace;padding:20px">
<div id="log" style="font-family:monospace;white-space:pre;margin-top:20px;"></div> <h2>Voice Chat Debug</h2>
<div id="log" style="white-space:pre;overflow:auto;max-height:80vh;border:1px solid #444;padding:10px;font-size:12px"></div>
<script src="https://cdn.socket.io/4.7.4/socket.io.min.js"></script> <script src="https://cdn.socket.io/4.7.4/socket.io.min.js"></script>
<script> <script>
const log = document.getElementById('log'); const VOICE_SERVER = 'https://voicegrech.mkn8n.ru';
function addLog(msg) { log.textContent += msg + '\n'; console.log(msg); } const logEl = document.getElementById('log');
function log(msg) { logEl.textContent += new Date().toISOString().substr(11,8) + ' ' + msg + '\n'; logEl.scrollTop = logEl.scrollHeight; }
let voiceStream, audioCtx, voiceProcessor, voiceSocket; log('Starting voice test...');
let debugCount = 0;
document.getElementById('btn').onclick = async () => { (async () => {
try { try {
addLog('1. Requesting mic...'); log('Requesting microphone...');
voiceStream = await navigator.mediaDevices.getUserMedia({ audio: { echoCancellation: true, noiseSuppression: true } }); const stream = await navigator.mediaDevices.getUserMedia({audio: {echoCancellation:true,noiseSuppression:true,autoGainControl:true}});
addLog('2. Got stream: ' + voiceStream.getTracks().map(t => t.label + ' ' + t.readyState).join(', ')); log('Got mic stream: ' + stream.getTracks().map(t => t.label + ' ' + t.readyState).join(', '));
audioCtx = new AudioContext({ sampleRate: 24000 }); const audioCtx = new AudioContext({sampleRate: 16000});
if (audioCtx.state === 'suspended') await audioCtx.resume(); log('AudioContext: state=' + audioCtx.state + ' sampleRate=' + audioCtx.sampleRate);
addLog('3. AudioContext: state=' + audioCtx.state + ' sampleRate=' + audioCtx.sampleRate); if (audioCtx.state === 'suspended') { await audioCtx.resume(); log('AudioContext resumed: ' + audioCtx.state); }
const source = audioCtx.createMediaStreamSource(voiceStream); const source = audioCtx.createMediaStreamSource(stream);
voiceProcessor = audioCtx.createScriptProcessor(2048, 1, 1); const analyser = audioCtx.createAnalyser();
addLog('4. ScriptProcessor created'); analyser.fftSize = 2048;
source.connect(analyser);
voiceProcessor.onaudioprocess = (e) => { // Check mic levels
debugCount++; const dataArr = new Float32Array(analyser.fftSize);
const pcm = e.inputBuffer.getChannelData(0); let checkCount = 0;
const maxVal = Math.max(...Array.from(pcm).map(Math.abs)); const checkInterval = setInterval(() => {
if (debugCount <= 10) addLog('5. onaudioprocess #' + debugCount + ' samples=' + pcm.length + ' max=' + maxVal.toFixed(4)); analyser.getFloatTimeDomainData(dataArr);
let sum = 0;
for (let i = 0; i < dataArr.length; i++) sum += dataArr[i] * dataArr[i];
const rms = Math.sqrt(sum / dataArr.length);
checkCount++;
if (checkCount <= 20 || checkCount % 50 === 0) log('Mic RMS: ' + rms.toFixed(6) + (rms > 0.008 ? ' SPEAKING' : ''));
}, 200);
if (!voiceSocket || !voiceSocket.connected) return; log('Connecting to voice server...');
const int16 = new Int16Array(pcm.length); const socket = io(VOICE_SERVER, {transports: ['websocket']});
for (let i = 0; i < pcm.length; i++) {
const s = Math.max(-1, Math.min(1, pcm[i])); socket.on('connect', () => {
log('SOCKET CONNECTED: ' + socket.id);
socket.emit('voice_join', {world_id: 'test', x: 0, y: 0, name: 'Tester', mode: 'world', codec: 'pcm'});
log('voice_join sent');
});
socket.on('connect_error', (e) => {
log('SOCKET ERROR: ' + e.message);
});
socket.on('voice_in', (payload) => {
log('RX voice_in from ' + (payload.meta?.from||'?').substring(0,8) + ' codec:' + payload.codec + ' size:' + (payload.data?.byteLength || payload.data?.length || '?'));
});
// Capture and send
const processor = audioCtx.createScriptProcessor(320, 1, 1);
source.connect(processor);
processor.connect(audioCtx.destination);
let seq = 0;
let sendCount = 0;
processor.onaudioprocess = (e) => {
if (!socket.connected) return;
const float32 = e.inputBuffer.getChannelData(0);
let rms = 0;
for (let i = 0; i < float32.length; i++) rms += float32[i] * float32[i];
rms = Math.sqrt(rms / float32.length);
if (rms < 0.005) return; // Skip silence
const int16 = new Int16Array(float32.length);
for (let i = 0; i < float32.length; i++) {
const s = Math.max(-1, Math.min(1, float32[i]));
int16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF; int16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
} }
voiceSocket.emit('voice_data', int16.buffer);
socket.emit('voice_data', {codec: 'pcm', data: int16.buffer, seq: seq++, speaking: true});
sendCount++;
if (sendCount <= 5 || sendCount % 50 === 0) log('TX frame #' + sendCount + ' rms:' + rms.toFixed(4) + ' size:' + int16.buffer.byteLength);
}; };
const silentGain = audioCtx.createGain(); log('Voice capture active. Speak into mic!');
silentGain.gain.value = 0;
source.connect(voiceProcessor);
voiceProcessor.connect(silentGain);
silentGain.connect(audioCtx.destination);
addLog('6. Audio chain connected: source→processor→gain(0)→destination');
addLog('7. Connecting to voice server...');
voiceSocket = io('https://voicegrech.mkn8n.ru', { transports: ['websocket'] });
voiceSocket.on('connect', () => {
addLog('8. Socket connected: ' + voiceSocket.id);
voiceSocket.emit('voice_join', { world_id: 'test', x: 0, y: 0, name: 'Tester' });
addLog('9. Sent voice_join. Speak into mic — watch onaudioprocess logs above!');
});
voiceSocket.on('connect_error', (err) => addLog('ERROR: ' + err.message));
voiceSocket.on('voice_in', (payload) => {
addLog('VOICE_IN from ' + payload.meta.name + ' vol=' + payload.volume + ' bytes=' + payload.data.byteLength);
});
document.getElementById('btn').textContent = 'Listening...';
document.getElementById('btn').style.background = '#e74c3c';
} catch(e) { } catch(e) {
addLog('ERROR: ' + e.message + '\n' + e.stack); log('ERROR: ' + e.message);
} }
}; })();
</script> </script>
</body></html> </body>
</html>