[MIRROR] TTS: Gas Mask muffling, Hailer Mask voice effects, support for more filters that use samplerate, voice effects for lizards, ethereals, and xenomorphs. [MDB IGNORE] (#23942)

* TTS: Gas Mask muffling, Hailer Mask voice effects, support for more filters that use samplerate, voice effects for lizards, ethereals, and xenomorphs.

* Update RecordView.tsx

* Update types.ts

---------

Co-authored-by: Iamgoofball <iamgoofball@gmail.com>
Co-authored-by: Bloop <13398309+vinylspiders@users.noreply.github.com>
This commit is contained in:
SkyratBot
2023-09-27 08:27:56 +02:00
committed by GitHub
parent 66aab80c2e
commit 32e9778c23
20 changed files with 96 additions and 33 deletions

View File

@@ -4,3 +4,7 @@
#define TTS_SOUND_ENABLED "Enabled" #define TTS_SOUND_ENABLED "Enabled"
///TTS preference is set to only play blips of a sound, rather than speech. ///TTS preference is set to only play blips of a sound, rather than speech.
#define TTS_SOUND_BLIPS "Blips Only" #define TTS_SOUND_BLIPS "Blips Only"
///TTS filter to activate start/stop radio clicks on speech.
#define TTS_FILTER_RADIO "radio"
///TTS filter to activate a silicon effect on speech.
#define TTS_FILTER_SILICON "silicon"

View File

@@ -261,7 +261,7 @@ SUBSYSTEM_DEF(tts)
#undef TTS_ARBRITRARY_DELAY #undef TTS_ARBRITRARY_DELAY
/datum/controller/subsystem/tts/proc/queue_tts_message(datum/target, message, datum/language/language, speaker, filter, list/listeners, local = FALSE, message_range = 7, volume_offset = 0, pitch = 0, silicon = "") /datum/controller/subsystem/tts/proc/queue_tts_message(datum/target, message, datum/language/language, speaker, filter, list/listeners, local = FALSE, message_range = 7, volume_offset = 0, pitch = 0, special_filters = "")
if(!tts_enabled) if(!tts_enabled)
return return
@@ -277,7 +277,7 @@ SUBSYSTEM_DEF(tts)
var/shell_scrubbed_input = tts_speech_filter(message) var/shell_scrubbed_input = tts_speech_filter(message)
shell_scrubbed_input = copytext(shell_scrubbed_input, 1, 300) shell_scrubbed_input = copytext(shell_scrubbed_input, 1, 300)
var/identifier = "[sha1(speaker + filter + num2text(pitch) + num2text(silicon) + shell_scrubbed_input)].[world.time]" var/identifier = "[sha1(speaker + filter + num2text(pitch) + special_filters + shell_scrubbed_input)].[world.time]"
if(!(speaker in available_speakers)) if(!(speaker in available_speakers))
return return
@@ -288,9 +288,9 @@ SUBSYSTEM_DEF(tts)
var/datum/http_request/request_blips = new() var/datum/http_request/request_blips = new()
var/file_name = "tmp/tts/[identifier].ogg" var/file_name = "tmp/tts/[identifier].ogg"
var/file_name_blips = "tmp/tts/[identifier]_blips.ogg" var/file_name_blips = "tmp/tts/[identifier]_blips.ogg"
request.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts?voice=[speaker]&identifier=[identifier]&filter=[url_encode(filter)]&pitch=[pitch]&silicon=[silicon]", json_encode(list("text" = shell_scrubbed_input)), headers, file_name) request.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts?voice=[speaker]&identifier=[identifier]&filter=[url_encode(filter)]&pitch=[pitch]&special_filters=[url_encode(special_filters)]", json_encode(list("text" = shell_scrubbed_input)), headers, file_name)
request_blips.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts-blips?voice=[speaker]&identifier=[identifier]&filter=[url_encode(filter)]&pitch=[pitch]&silicon=[silicon]", json_encode(list("text" = shell_scrubbed_input)), headers, file_name_blips) request_blips.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts-blips?voice=[speaker]&identifier=[identifier]&filter=[url_encode(filter)]&pitch=[pitch]&special_filters=[url_encode(special_filters)]", json_encode(list("text" = shell_scrubbed_input)), headers, file_name_blips)
var/datum/tts_request/current_request = new /datum/tts_request(identifier, request, request_blips, shell_scrubbed_input, target, local, language, message_range, volume_offset, listeners, pitch, silicon) var/datum/tts_request/current_request = new /datum/tts_request(identifier, request, request_blips, shell_scrubbed_input, target, local, language, message_range, volume_offset, listeners, pitch)
var/list/player_queued_tts_messages = queued_tts_messages[target] var/list/player_queued_tts_messages = queued_tts_messages[target]
if(!player_queued_tts_messages) if(!player_queued_tts_messages)
player_queued_tts_messages = list() player_queued_tts_messages = list()
@@ -342,8 +342,6 @@ SUBSYSTEM_DEF(tts)
var/use_blips = FALSE var/use_blips = FALSE
/// What's the pitch adjustment? /// What's the pitch adjustment?
var/pitch = 0 var/pitch = 0
/// Are we using the silicon vocal effect on this?
var/silicon = ""
/datum/tts_request/New(identifier, datum/http_request/request, datum/http_request/request_blips, message, target, local, datum/language/language, message_range, volume_offset, list/listeners, pitch) /datum/tts_request/New(identifier, datum/http_request/request, datum/http_request/request_blips, message, target, local, datum/language/language, message_range, volume_offset, list/listeners, pitch)

View File

@@ -24,6 +24,8 @@
var/species var/species
/// The character's ID trim /// The character's ID trim
var/trim var/trim
/// The character's voice, if they have one.
var/voice
/datum/record/New( /datum/record/New(
age = 18, age = 18,
@@ -37,6 +39,7 @@
rank = "Unassigned", rank = "Unassigned",
species = "Human", species = "Human",
trim = "Unassigned", trim = "Unassigned",
voice = "?????",
) )
src.age = age src.age = age
src.blood_type = blood_type src.blood_type = blood_type

View File

@@ -108,7 +108,7 @@ GLOBAL_LIST_INIT(freqtospan, list(
filter += tts_filter.Join(",") filter += tts_filter.Join(",")
if(voice && found_client) if(voice && found_client)
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(tts_message_to_use), message_language, voice, filter.Join(","), listened, message_range = range, pitch = pitch, silicon = tts_silicon_voice_effect) INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(tts_message_to_use), message_language, voice, filter.Join(","), listened, message_range = range, pitch = pitch)
/atom/movable/proc/compose_message(atom/movable/speaker, datum/language/message_language, raw_message, radio_freq, list/spans, list/message_mods = list(), visible_name = FALSE) /atom/movable/proc/compose_message(atom/movable/speaker, datum/language/message_language, raw_message, radio_freq, list/spans, list/message_mods = list(), visible_name = FALSE)
//This proc uses [] because it is faster than continually appending strings. Thanks BYOND. //This proc uses [] because it is faster than continually appending strings. Thanks BYOND.

View File

@@ -23,5 +23,5 @@
var/speaker = preferences.read_preference(/datum/preference/choiced/voice) var/speaker = preferences.read_preference(/datum/preference/choiced/voice)
var/pitch = preferences.read_preference(/datum/preference/numeric/tts_voice_pitch) var/pitch = preferences.read_preference(/datum/preference/numeric/tts_voice_pitch)
COOLDOWN_START(src, tts_test_cooldown, 0.5 SECONDS) COOLDOWN_START(src, tts_test_cooldown, 0.5 SECONDS)
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), user.client, "Look at you, Player. A pathetic creature of meat and bone. How can you challenge a perfect, immortal machine?", speaker = speaker, pitch = pitch, silicon = TRUE, local = TRUE) INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), user.client, "Look at you, Player. A pathetic creature of meat and bone. How can you challenge a perfect, immortal machine?", speaker = speaker, pitch = pitch, special_filters = TTS_FILTER_SILICON, local = TRUE)
return TRUE return TRUE

View File

@@ -12,6 +12,10 @@
var/adjusted_flags = null var/adjusted_flags = null
///Did we install a filtering cloth? ///Did we install a filtering cloth?
var/has_filter = FALSE var/has_filter = FALSE
/// If defined, what voice should we override with if TTS is active?
var/voice_override
/// If set to true, activates the radio effect on TTS. Used for sec hailers, but other masks can utilize it for their own vocal effect.
var/use_radio_beeps_tts = FALSE
/obj/item/clothing/mask/attack_self(mob/user) /obj/item/clothing/mask/attack_self(mob/user)
if((clothing_flags & VOICEBOX_TOGGLABLE)) if((clothing_flags & VOICEBOX_TOGGLABLE))

View File

@@ -28,7 +28,7 @@ GLOBAL_LIST_INIT(clown_mask_options, list(
var/has_fov = TRUE var/has_fov = TRUE
///Cigarette in the mask ///Cigarette in the mask
var/obj/item/clothing/mask/cigarette/cig var/obj/item/clothing/mask/cigarette/cig
voice_filter = "lowpass=f=750,volume=2"
/datum/armor/mask_gas /datum/armor/mask_gas
bio = 100 bio = 100
@@ -274,6 +274,7 @@ GLOBAL_LIST_INIT(clown_mask_options, list(
dog_fashion = /datum/dog_fashion/head/clown dog_fashion = /datum/dog_fashion/head/clown
has_fov = FALSE has_fov = FALSE
var/list/clownmask_designs = list() var/list/clownmask_designs = list()
voice_filter = null // performer masks expect to be talked through
/obj/item/clothing/mask/gas/clown_hat/plasmaman /obj/item/clothing/mask/gas/clown_hat/plasmaman
starting_filter_type = /obj/item/gas_filter/plasmaman starting_filter_type = /obj/item/gas_filter/plasmaman

View File

@@ -68,6 +68,8 @@ GLOBAL_LIST_INIT(hailer_phrases, list(
var/recent_uses = 0 var/recent_uses = 0
///Whether the hailer is emagged or not ///Whether the hailer is emagged or not
var/safety = TRUE var/safety = TRUE
voice_filter = @{"[0:a] asetrate=%SAMPLE_RATE%*0.7,aresample=16000,atempo=1/0.7,lowshelf=g=-20:f=500,highpass=f=500,aphaser=in_gain=1:out_gain=1:delay=3.0:decay=0.4:speed=0.5:type=t [out]; [out]atempo=1.2,volume=15dB [final]; anoisesrc=a=0.01:d=60 [noise]; [final][noise] amix=duration=shortest"}
use_radio_beeps_tts = TRUE
/obj/item/clothing/mask/gas/sechailer/plasmaman /obj/item/clothing/mask/gas/sechailer/plasmaman
starting_filter_type = /obj/item/gas_filter/plasmaman starting_filter_type = /obj/item/gas_filter/plasmaman

View File

@@ -394,13 +394,29 @@ GLOBAL_LIST_INIT(message_modes_stat_limits, list(
tts_message_to_use = message_raw tts_message_to_use = message_raw
var/list/filter = list() var/list/filter = list()
var/list/special_filter = list()
var/voice_to_use = voice
var/use_radio = FALSE
if(length(voice_filter) > 0) if(length(voice_filter) > 0)
filter += voice_filter filter += voice_filter
if(length(tts_filter) > 0) if(length(tts_filter) > 0)
filter += tts_filter.Join(",") filter += tts_filter.Join(",")
if(ishuman(src))
var/mob/living/carbon/human/human_speaker = src
if(human_speaker.wear_mask)
var/obj/item/clothing/mask/worn_mask = human_speaker.wear_mask
if(worn_mask.voice_override)
voice_to_use = worn_mask.voice_override
if(worn_mask.voice_filter)
filter += worn_mask.voice_filter
use_radio = worn_mask.use_radio_beeps_tts
if(use_radio)
special_filter += TTS_FILTER_RADIO
if(issilicon(src))
special_filter += TTS_FILTER_SILICON
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(tts_message_to_use), message_language, voice, filter.Join(","), listened, message_range = message_range, pitch = pitch, silicon = tts_silicon_voice_effect) INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(tts_message_to_use), message_language, voice_to_use, filter.Join(","), listened, message_range = message_range, pitch = pitch, special_filters = special_filter.Join("|"))
var/image/say_popup = image('icons/mob/effects/talk.dmi', src, "[bubble_type][talk_icon_state]", FLY_LAYER) var/image/say_popup = image('icons/mob/effects/talk.dmi', src, "[bubble_type][talk_icon_state]", FLY_LAYER)
SET_PLANE_EXPLICIT(say_popup, ABOVE_GAME_PLANE, src) SET_PLANE_EXPLICIT(say_popup, ABOVE_GAME_PLANE, src)

View File

@@ -45,6 +45,7 @@
current_record["rank"] = person.rank current_record["rank"] = person.rank
current_record["species"] = person.species current_record["species"] = person.species
current_record["wanted"] = person.wanted_status current_record["wanted"] = person.wanted_status
current_record["voice"] = person.voice
all_records += list(current_record) all_records += list(current_record)
if("medical") if("medical")

View File

@@ -184,7 +184,7 @@
languages_native = list(/datum/language/draconic, /datum/language/ashtongue) //SKYRAT EDIT: Ashtongue for Ashwalkers languages_native = list(/datum/language/draconic, /datum/language/ashtongue) //SKYRAT EDIT: Ashtongue for Ashwalkers
liked_foodtypes = GORE | MEAT | SEAFOOD | NUTS | BUGS liked_foodtypes = GORE | MEAT | SEAFOOD | NUTS | BUGS
disliked_foodtypes = GRAIN | DAIRY | CLOTH | GROSS disliked_foodtypes = GRAIN | DAIRY | CLOTH | GROSS
voice_filter = @{"[0:a] asplit [out0][out2]; [out0] asetrate=%SAMPLE_RATE%*0.9,aresample=%SAMPLE_RATE%,atempo=1/0.9,aformat=channel_layouts=mono,volume=0.2 [p0]; [out2] asetrate=%SAMPLE_RATE%*1.1,aresample=%SAMPLE_RATE%,atempo=1/1.1,aformat=channel_layouts=mono,volume=0.2[p2]; [p0][0][p2] amix=inputs=3"}
/obj/item/organ/internal/tongue/lizard/modify_speech(datum/source, list/speech_args) /obj/item/organ/internal/tongue/lizard/modify_speech(datum/source, list/speech_args)
var/static/regex/lizard_hiss = new("s+", "g") var/static/regex/lizard_hiss = new("s+", "g")
var/static/regex/lizard_hiSS = new("S+", "g") var/static/regex/lizard_hiSS = new("S+", "g")
@@ -493,7 +493,7 @@ GLOBAL_LIST_INIT(english_to_zombie, list())
say_mod = "hisses" say_mod = "hisses"
taste_sensitivity = 10 // LIZARDS ARE ALIENS CONFIRMED taste_sensitivity = 10 // LIZARDS ARE ALIENS CONFIRMED
modifies_speech = TRUE // not really, they just hiss modifies_speech = TRUE // not really, they just hiss
voice_filter = @{"[0:a] asplit [out0][out2]; [out0] asetrate=%SAMPLE_RATE%*0.8,aresample=%SAMPLE_RATE%,atempo=1/0.8,aformat=channel_layouts=mono [p0]; [out2] asetrate=%SAMPLE_RATE%*1.2,aresample=%SAMPLE_RATE%,atempo=1/1.2,aformat=channel_layouts=mono[p2]; [p0][0][p2] amix=inputs=3"}
// Aliens can only speak alien and a few other languages. // Aliens can only speak alien and a few other languages.
/obj/item/organ/internal/tongue/alien/get_possible_languages() /obj/item/organ/internal/tongue/alien/get_possible_languages()
return list( return list(
@@ -601,6 +601,7 @@ GLOBAL_LIST_INIT(english_to_zombie, list())
toxic_foodtypes = NONE //no food is particularly toxic to ethereals toxic_foodtypes = NONE //no food is particularly toxic to ethereals
attack_verb_continuous = list("shocks", "jolts", "zaps") attack_verb_continuous = list("shocks", "jolts", "zaps")
attack_verb_simple = list("shock", "jolt", "zap") attack_verb_simple = list("shock", "jolt", "zap")
voice_filter = @{"[0:a] asplit [out0][out2]; [out0] asetrate=%SAMPLE_RATE%*0.99,aresample=%SAMPLE_RATE%,volume=0.3 [p0]; [p0][out2] amix=inputs=2"}
// Ethereal tongues can speak all default + voltaic // Ethereal tongues can speak all default + voltaic
/obj/item/organ/internal/tongue/ethereal/get_possible_languages() /obj/item/organ/internal/tongue/ethereal/get_possible_languages()

View File

@@ -56,6 +56,7 @@ const RecordInfo = (props, context) => {
rank, rank,
species, species,
wanted_status, wanted_status,
voice,
// SKYRAT EDIT START - RP Records // SKYRAT EDIT START - RP Records
past_general_records, past_general_records,
past_security_records, past_security_records,
@@ -172,6 +173,9 @@ const RecordInfo = (props, context) => {
text={fingerprint} text={fingerprint}
/> />
</LabeledList.Item> </LabeledList.Item>
<LabeledList.Item label="Voice">
<EditableText field="voice" target_ref={crew_ref} text={voice} />
</LabeledList.Item>
<LabeledList.Item label="Note"> <LabeledList.Item label="Note">
<EditableText <EditableText
field="security_note" field="security_note"

View File

@@ -23,6 +23,7 @@ export type SecurityRecord = {
rank: string; rank: string;
species: string; species: string;
wanted_status: string; wanted_status: string;
voice: string;
// SKYRAT EDIT START - RP Records // SKYRAT EDIT START - RP Records
past_general_records: string; past_general_records: string;
past_security_records: string; past_security_records: string;

BIN
tools/tts/tts-api/off1.wav Normal file

Binary file not shown.

BIN
tools/tts/tts-api/off2.wav Normal file

Binary file not shown.

BIN
tools/tts/tts-api/off3.wav Normal file

Binary file not shown.

BIN
tools/tts/tts-api/off4.wav Normal file

Binary file not shown.

BIN
tools/tts/tts-api/on1.wav Normal file

Binary file not shown.

BIN
tools/tts/tts-api/on2.wav Normal file

Binary file not shown.

View File

@@ -4,11 +4,18 @@ import gc
import subprocess import subprocess
import requests import requests
import re import re
import pysbd
import pydub
import string
import random
import json
from flask import Flask, request, send_file, abort, make_response from flask import Flask, request, send_file, abort, make_response
tts_sample_rate = 40000 # Set to 40000 if you're using RVC, or whatever sample rate your endpoint is going to send the audio in.
app = Flask(__name__) app = Flask(__name__)
segmenter = pysbd.Segmenter(language="en", clean=True)
authorization_token = os.getenv("TTS_AUTHORIZATION_TOKEN", "coolio") radio_starts = ["./on1.wav", "./on2.wav"]
radio_ends = ["./off1.wav", "./off2.wav", "./off3.wav", "./off4.wav"]
authorization_token = os.getenv("TTS_AUTHORIZATION_TOKEN", "vote_goof_2024")
def hhmmss_to_seconds(string): def hhmmss_to_seconds(string):
new_time = 0 new_time = 0
separated_times = string.split(":") separated_times = string.split(":")
@@ -17,29 +24,46 @@ def hhmmss_to_seconds(string):
new_time += float(separated_times[2]) new_time += float(separated_times[2])
return new_time return new_time
def text_to_speech_handler(endpoint, voice, text, filter_complex, pitch, silicon = False): def text_to_speech_handler(endpoint, voice, text, filter_complex, pitch, special_filters = []):
filter_complex = filter_complex.replace("\"", "") filter_complex = filter_complex.replace("\"", "")
response = requests.get(f"http://tts-container:5003/" + endpoint, json={ 'text': text, 'voice': voice, 'pitch': pitch }) data_bytes = io.BytesIO()
if response.status_code != 200: final_audio = pydub.AudioSegment.empty()
abort(500)
for sentence in segmenter.segment(text):
response = requests.get(f"http://127.0.0.1:5003/" + endpoint, json={ 'text': sentence, 'voice': voice, 'pitch': pitch })
if response.status_code != 200:
abort(500)
sentence_audio = pydub.AudioSegment.from_file(io.BytesIO(response.content), "wav")
sentence_silence = pydub.AudioSegment.silent(250, 40000)
sentence_audio += sentence_silence
final_audio += sentence_audio
# ""Goldman-Eisler (1968) determined that typical speakers paused for an average of 250 milliseconds (ms), with a range from 150 to 400 ms.""
# (https://scholarsarchive.byu.edu/cgi/viewcontent.cgi?article=10153&context=etd)
final_audio.export(data_bytes, format="wav")
filter_complex = filter_complex.replace("%SAMPLE_RATE%", str(tts_sample_rate))
ffmpeg_result = None ffmpeg_result = None
if filter_complex != "": if filter_complex != "":
ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-filter_complex", filter_complex, "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input=response.content, capture_output = True) ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-filter_complex", filter_complex, "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input=data_bytes.read(), capture_output = True)
else: else:
if silicon: if "silicon" in special_filters:
ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-i", "./SynthImpulse.wav", "-i", "./RoomImpulse.wav", "-filter_complex", "[0] aresample=44100 [re_1]; [re_1] apad=pad_dur=2 [in_1]; [in_1] asplit=2 [in_1_1] [in_1_2]; [in_1_1] [1] afir=dry=10:wet=10 [reverb_1]; [in_1_2] [reverb_1] amix=inputs=2:weights=8 1 [mix_1]; [mix_1] asplit=2 [mix_1_1] [mix_1_2]; [mix_1_1] [2] afir=dry=1:wet=1 [reverb_2]; [mix_1_2] [reverb_2] amix=inputs=2:weights=10 1 [mix_2]; [mix_2] equalizer=f=7710:t=q:w=0.6:g=-6,equalizer=f=33:t=q:w=0.44:g=-10 [out]; [out] alimiter=level_in=1:level_out=1:limit=0.5:attack=5:release=20:level=disabled", "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input=response.content, capture_output = True) ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-i", "./SynthImpulse.wav", "-i", "./RoomImpulse.wav", "-filter_complex", "[0] aresample=44100 [re_1]; [re_1] apad=pad_dur=2 [in_1]; [in_1] asplit=2 [in_1_1] [in_1_2]; [in_1_1] [1] afir=dry=10:wet=10 [reverb_1]; [in_1_2] [reverb_1] amix=inputs=2:weights=8 1 [mix_1]; [mix_1] asplit=2 [mix_1_1] [mix_1_2]; [mix_1_1] [2] afir=dry=1:wet=1 [reverb_2]; [mix_1_2] [reverb_2] amix=inputs=2:weights=10 1 [mix_2]; [mix_2] equalizer=f=7710:t=q:w=0.6:g=-6,equalizer=f=33:t=q:w=0.44:g=-10 [out]; [out] alimiter=level_in=1:level_out=1:limit=0.5:attack=5:release=20:level=disabled", "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input=data_bytes.read(), capture_output = True)
else: else:
ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input=response.content, capture_output = True) ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input= data_bytes.read(), capture_output = True)
ffmpeg_metadata_output = ffmpeg_result.stderr.decode() ffmpeg_metadata_output = ffmpeg_result.stderr.decode()
print(f"ffmpeg result size: {len(ffmpeg_result.stdout)} stderr = \n{ffmpeg_metadata_output}") print(f"ffmpeg result size: {len(ffmpeg_result.stdout)} stderr = \n{ffmpeg_metadata_output}")
export_audio = io.BytesIO(ffmpeg_result.stdout)
if "radio" in special_filters:
radio_audio = pydub.AudioSegment.from_file(random.choice(radio_starts), "wav")
radio_audio += pydub.AudioSegment.from_file(io.BytesIO(ffmpeg_result.stdout), "ogg")
radio_audio += pydub.AudioSegment.from_file(random.choice(radio_ends), "wav")
new_data_bytes = io.BytesIO()
radio_audio.export(new_data_bytes, format="ogg")
export_audio = io.BytesIO(new_data_bytes.getvalue())
matched_length = re.search(r"time=([0-9:\\.]+)", ffmpeg_metadata_output) matched_length = re.search(r"time=([0-9:\\.]+)", ffmpeg_metadata_output)
hh_mm_ss = matched_length.group(1) hh_mm_ss = matched_length.group(1)
length = hhmmss_to_seconds(hh_mm_ss) length = hhmmss_to_seconds(hh_mm_ss)
response = send_file(io.BytesIO(ffmpeg_result.stdout), as_attachment=True, download_name='identifier.ogg', mimetype="audio/ogg") response = send_file(export_audio, as_attachment=True, download_name='identifier.ogg', mimetype="audio/ogg")
response.headers['audio-length'] = length response.headers['audio-length'] = length
return response return response
@@ -51,12 +75,15 @@ def text_to_speech_normal():
voice = request.args.get("voice", '') voice = request.args.get("voice", '')
text = request.json.get("text", '') text = request.json.get("text", '')
pitch = request.args.get("pitch", '') pitch = request.args.get("pitch", '')
silicon = request.args.get("silicon", '') special_filters = request.args.get("special_filters", '')
if pitch == "": if pitch == "":
pitch = "0" pitch = "0"
silicon = request.args.get("silicon", '')
if silicon:
special_filters = ["silicon"]
filter_complex = request.args.get("filter", '') filter_complex = request.args.get("filter", '')
return text_to_speech_handler("generate-tts", voice, text, filter_complex, pitch, bool(silicon)) return text_to_speech_handler("generate-tts", voice, text, filter_complex, pitch, special_filters)
@app.route("/tts-blips") @app.route("/tts-blips")
def text_to_speech_blips(): def text_to_speech_blips():
@@ -66,12 +93,13 @@ def text_to_speech_blips():
voice = request.args.get("voice", '') voice = request.args.get("voice", '')
text = request.json.get("text", '') text = request.json.get("text", '')
pitch = request.args.get("pitch", '') pitch = request.args.get("pitch", '')
silicon = request.args.get("silicon", '') special_filters = request.args.get("special_filters", '')
if pitch == "": if pitch == "":
pitch = "0" pitch = "0"
special_filters = special_filters.split("|")
filter_complex = request.args.get("filter", '') filter_complex = request.args.get("filter", '')
return text_to_speech_handler("generate-tts-blips", voice, text, filter_complex, pitch, bool(silicon)) return text_to_speech_handler("generate-tts-blips", voice, text, filter_complex, pitch, special_filters)
@@ -80,7 +108,7 @@ def voices_list():
if authorization_token != request.headers.get("Authorization", ""): if authorization_token != request.headers.get("Authorization", ""):
abort(401) abort(401)
response = requests.get(f"http://tts-container:5003/tts-voices") response = requests.get(f"http://127.0.0.1:5003/tts-voices")
return response.content return response.content
@app.route("/health-check") @app.route("/health-check")
@@ -93,7 +121,7 @@ def pitch_available():
if authorization_token != request.headers.get("Authorization", ""): if authorization_token != request.headers.get("Authorization", ""):
abort(401) abort(401)
response = requests.get(f"http://tts-container:5003/pitch-available") response = requests.get(f"http://127.0.0.1:5003/pitch-available")
if response.status_code != 200: if response.status_code != 200:
abort(500) abort(500)
return make_response("Pitch available", 200) return make_response("Pitch available", 200)