Upload main file
This commit is contained in:
189
voice_cloak.py
Normal file
189
voice_cloak.py
Normal file
@@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
voice_cloak.py — Real-time mic → Whisper → TTS → virtual sink
|
||||
Speaks each phrase as it's transcribed, not after a long silence.
|
||||
|
||||
Requirements:
|
||||
pip install faster-whisper sounddevice numpy edge-tts
|
||||
pactl / pw-cli (PulseAudio/Pipewire)
|
||||
|
||||
First run: select "VoiceCloakMic" as your mic in Discord/whatever.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import subprocess
|
||||
import tempfile
|
||||
import threading
|
||||
import queue
|
||||
import sys
|
||||
import time
|
||||
import os
|
||||
import numpy as np
|
||||
import sounddevice as sd
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
# ── Config ────────────────────────────────────────────────────────────────────
|
||||
SAMPLE_RATE = 16000
|
||||
CHANNELS = 1
|
||||
BLOCK_SECONDS = 0.3 # smaller = more responsive
|
||||
SILENCE_THRESH = 0.012 # RMS threshold for silence
|
||||
SILENCE_CHUNKS = 5 # silence chunks before flushing (~1.5s)
|
||||
MAX_BUFFER_CHUNKS = 60 # hard cap ~18s
|
||||
WHISPER_MODEL = "base.en" # tiny.en / base.en / small.en
|
||||
TTS_VOICE = "en-US-RogerNeural"
|
||||
SINK_NAME = "VoiceCloak"
|
||||
VIRTUAL_MIC = "VoiceCloakMic"
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
audio_queue: queue.Queue = queue.Queue()
|
||||
tts_queue: queue.Queue = queue.Queue()
|
||||
stop_event = threading.Event()
|
||||
|
||||
|
||||
# ── Virtual sink ──────────────────────────────────────────────────────────────
|
||||
|
||||
def run(cmd: str) -> str:
|
||||
return subprocess.run(cmd, shell=True, capture_output=True, text=True).stdout.strip()
|
||||
|
||||
def setup_virtual_sink():
|
||||
if SINK_NAME in run("pactl list short sinks"):
|
||||
print(f"[sink] {SINK_NAME} already exists")
|
||||
return
|
||||
print(f"[sink] Creating virtual sink '{SINK_NAME}'...")
|
||||
mod1 = run(f"pactl load-module module-null-sink sink_name={SINK_NAME} sink_properties=device.description={SINK_NAME}")
|
||||
mod2 = run(f"pactl load-module module-virtual-source source_name={VIRTUAL_MIC} master={SINK_NAME}.monitor")
|
||||
if mod1 and mod2:
|
||||
print(f"[sink] ✓ Ready. Set '{VIRTUAL_MIC}' as your mic in apps.")
|
||||
else:
|
||||
print("[sink] ✗ Failed. Running PulseAudio/Pipewire?")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# ── Audio capture ─────────────────────────────────────────────────────────────
|
||||
|
||||
def audio_callback(indata, frames, time_info, status):
|
||||
if status:
|
||||
print(f"[audio] {status}", file=sys.stderr)
|
||||
audio_queue.put(indata.copy())
|
||||
|
||||
def capture_thread():
|
||||
with sd.InputStream(
|
||||
samplerate=SAMPLE_RATE,
|
||||
channels=CHANNELS,
|
||||
dtype="float32",
|
||||
blocksize=int(SAMPLE_RATE * BLOCK_SECONDS),
|
||||
callback=audio_callback,
|
||||
):
|
||||
print("[mic] Listening... (Ctrl+C to stop)")
|
||||
while not stop_event.is_set():
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
# ── Transcribe loop ───────────────────────────────────────────────────────────
|
||||
|
||||
def transcribe_loop(model: WhisperModel):
|
||||
buffer = []
|
||||
silent_count = 0
|
||||
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
chunk = audio_queue.get(timeout=0.5)
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
rms = float(np.sqrt(np.mean(chunk ** 2)))
|
||||
|
||||
if rms < SILENCE_THRESH:
|
||||
silent_count += 1
|
||||
else:
|
||||
silent_count = 0
|
||||
buffer.append(chunk)
|
||||
|
||||
flush = buffer and (
|
||||
silent_count >= SILENCE_CHUNKS or
|
||||
len(buffer) >= MAX_BUFFER_CHUNKS
|
||||
)
|
||||
|
||||
if flush:
|
||||
audio_np = np.concatenate(buffer, axis=0).flatten()
|
||||
buffer.clear()
|
||||
silent_count = 0
|
||||
|
||||
# Stream segments as Whisper produces them — fires per phrase
|
||||
segments, _ = model.transcribe(
|
||||
audio_np,
|
||||
language="en",
|
||||
vad_filter=True,
|
||||
vad_parameters={"min_silence_duration_ms": 200},
|
||||
)
|
||||
|
||||
for segment in segments:
|
||||
text = segment.text.strip()
|
||||
if text:
|
||||
print(f"[transcribed] {text}")
|
||||
tts_queue.put(text)
|
||||
|
||||
|
||||
# ── TTS worker (serial so segments don't overlap) ─────────────────────────────
|
||||
|
||||
def tts_worker():
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
text = tts_queue.get(timeout=0.5)
|
||||
except queue.Empty:
|
||||
continue
|
||||
loop.run_until_complete(speak(text))
|
||||
|
||||
|
||||
async def speak(text: str):
|
||||
import edge_tts
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
|
||||
tmp_path = f.name
|
||||
try:
|
||||
communicate = edge_tts.Communicate(text, TTS_VOICE)
|
||||
await communicate.save(tmp_path)
|
||||
# Play to both simultaneously
|
||||
p1 = subprocess.Popen(["paplay", "--device", SINK_NAME, tmp_path])
|
||||
p2 = subprocess.Popen(["paplay", tmp_path]) # default output = your speakers
|
||||
p1.wait()
|
||||
p2.wait()
|
||||
except Exception as e:
|
||||
print(f"[tts] Error: {e}")
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
print("=" * 55)
|
||||
print(" voice_cloak — streaming phrase-by-phrase")
|
||||
print("=" * 55)
|
||||
|
||||
setup_virtual_sink()
|
||||
|
||||
print(f"[whisper] Loading '{WHISPER_MODEL}'...")
|
||||
model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")
|
||||
print("[whisper] ✓ Ready")
|
||||
|
||||
threads = [
|
||||
threading.Thread(target=capture_thread, daemon=True),
|
||||
threading.Thread(target=tts_worker, daemon=True),
|
||||
]
|
||||
for t in threads:
|
||||
t.start()
|
||||
|
||||
try:
|
||||
transcribe_loop(model)
|
||||
except KeyboardInterrupt:
|
||||
print("\n[main] Stopping...")
|
||||
stop_event.set()
|
||||
for t in threads:
|
||||
t.join(timeout=2)
|
||||
print("[main] Done.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user