udp-audio.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

import socket
import struct
import pyaudio
import time

from threading import Thread
import numpy as np
import scipy.signal
from matplotlib import pyplot as plt

TGID_in_stream = True  # When set to True, we expect a 4 byte long int with the TGID prior to the audio in each packet
TGID_to_play = 0  # When TGID_in_stream is set to True, we'll only play audio if the received TGID matches this value
UDP_PORT = 9123  # UDP port to listen on
AUDIO_OUTPUT_DEVICE_INDEX = 4  # Audio device to play received audio on

# Set up a UDP server
UDPSock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
# UDPSock.settimeout(1)

listen_addr = ("", UDP_PORT)
UDPSock.bind(listen_addr)

p = pyaudio.PyAudio()

for i in range(p.get_device_count()):
    print(p.get_device_info_by_index(i))

CHUNK=1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
IN_RATE = 8_000
OUT_RATE = 48_000
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=OUT_RATE,
    input=False,
    output=True,
    frames_per_buffer=CHUNK,
    output_device_index=AUDIO_OUTPUT_DEVICE_INDEX,
)

FRAMES = []

def receive_audio():
    global FRAMES
    global TGID_in_stream
    global TGID_to_play
    while True:
        try:
            data, addr = UDPSock.recvfrom((CHUNK * CHANNELS * 2) + (4 if TGID_in_stream else 0))
            if TGID_in_stream:
                tgid = int.from_bytes(data[0:4], "little")
                if TGID_to_play == 0:
                    TGID_to_play = [tgid]
                if TGID_to_play == 0 or tgid in TGID_to_play:
                    audio_data = data[4:]
                    if len(audio_data) < 128:
                        continue
                    # print(f"TGID: {tgid}, audio_data: {len(audio_data)}")
                    signal = np.frombuffer(audio_data, dtype=np.int16)
                    res = scipy.signal.resample(signal, (len(signal) * OUT_RATE) // IN_RATE).astype(np.int16)
                    FRAMES.append(res)
            else:
                stream.write(data)
        except socket.timeout:
            pass

def play_audio():
    global FRAMES
    while True:
        while len(FRAMES) == 0:
            time.sleep(0.1)
        frames = FRAMES[:]
        FRAMES.clear()
        # print(f"Playing {len(frames)} frames")
        for frame in frames:
            stream.write(frame.tobytes())


stream_t = Thread(target=receive_audio)
play_t = Thread(target=play_audio)
stream_t.daemon = True
play_t.daemon = True
stream_t.start()
play_t.start()
stream_t.join()
play_t.join()