mirror of
https://github.com/Bubberstation/Bubberstation.git
synced 2026-01-06 15:02:29 +00:00
[MIRROR] TTS: Gas Mask muffling, Hailer Mask voice effects, support for more filters that use samplerate, voice effects for lizards, ethereals, and xenomorphs. [MDB IGNORE] (#23942)
* TTS: Gas Mask muffling, Hailer Mask voice effects, support for more filters that use samplerate, voice effects for lizards, ethereals, and xenomorphs. * Update RecordView.tsx * Update types.ts --------- Co-authored-by: Iamgoofball <iamgoofball@gmail.com> Co-authored-by: Bloop <13398309+vinylspiders@users.noreply.github.com>
This commit is contained in:
@@ -4,3 +4,7 @@
|
|||||||
#define TTS_SOUND_ENABLED "Enabled"
|
#define TTS_SOUND_ENABLED "Enabled"
|
||||||
///TTS preference is set to only play blips of a sound, rather than speech.
|
///TTS preference is set to only play blips of a sound, rather than speech.
|
||||||
#define TTS_SOUND_BLIPS "Blips Only"
|
#define TTS_SOUND_BLIPS "Blips Only"
|
||||||
|
///TTS filter to activate start/stop radio clicks on speech.
|
||||||
|
#define TTS_FILTER_RADIO "radio"
|
||||||
|
///TTS filter to activate a silicon effect on speech.
|
||||||
|
#define TTS_FILTER_SILICON "silicon"
|
||||||
|
|||||||
@@ -261,7 +261,7 @@ SUBSYSTEM_DEF(tts)
|
|||||||
|
|
||||||
#undef TTS_ARBRITRARY_DELAY
|
#undef TTS_ARBRITRARY_DELAY
|
||||||
|
|
||||||
/datum/controller/subsystem/tts/proc/queue_tts_message(datum/target, message, datum/language/language, speaker, filter, list/listeners, local = FALSE, message_range = 7, volume_offset = 0, pitch = 0, silicon = "")
|
/datum/controller/subsystem/tts/proc/queue_tts_message(datum/target, message, datum/language/language, speaker, filter, list/listeners, local = FALSE, message_range = 7, volume_offset = 0, pitch = 0, special_filters = "")
|
||||||
if(!tts_enabled)
|
if(!tts_enabled)
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -277,7 +277,7 @@ SUBSYSTEM_DEF(tts)
|
|||||||
|
|
||||||
var/shell_scrubbed_input = tts_speech_filter(message)
|
var/shell_scrubbed_input = tts_speech_filter(message)
|
||||||
shell_scrubbed_input = copytext(shell_scrubbed_input, 1, 300)
|
shell_scrubbed_input = copytext(shell_scrubbed_input, 1, 300)
|
||||||
var/identifier = "[sha1(speaker + filter + num2text(pitch) + num2text(silicon) + shell_scrubbed_input)].[world.time]"
|
var/identifier = "[sha1(speaker + filter + num2text(pitch) + special_filters + shell_scrubbed_input)].[world.time]"
|
||||||
if(!(speaker in available_speakers))
|
if(!(speaker in available_speakers))
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -288,9 +288,9 @@ SUBSYSTEM_DEF(tts)
|
|||||||
var/datum/http_request/request_blips = new()
|
var/datum/http_request/request_blips = new()
|
||||||
var/file_name = "tmp/tts/[identifier].ogg"
|
var/file_name = "tmp/tts/[identifier].ogg"
|
||||||
var/file_name_blips = "tmp/tts/[identifier]_blips.ogg"
|
var/file_name_blips = "tmp/tts/[identifier]_blips.ogg"
|
||||||
request.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts?voice=[speaker]&identifier=[identifier]&filter=[url_encode(filter)]&pitch=[pitch]&silicon=[silicon]", json_encode(list("text" = shell_scrubbed_input)), headers, file_name)
|
request.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts?voice=[speaker]&identifier=[identifier]&filter=[url_encode(filter)]&pitch=[pitch]&special_filters=[url_encode(special_filters)]", json_encode(list("text" = shell_scrubbed_input)), headers, file_name)
|
||||||
request_blips.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts-blips?voice=[speaker]&identifier=[identifier]&filter=[url_encode(filter)]&pitch=[pitch]&silicon=[silicon]", json_encode(list("text" = shell_scrubbed_input)), headers, file_name_blips)
|
request_blips.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts-blips?voice=[speaker]&identifier=[identifier]&filter=[url_encode(filter)]&pitch=[pitch]&special_filters=[url_encode(special_filters)]", json_encode(list("text" = shell_scrubbed_input)), headers, file_name_blips)
|
||||||
var/datum/tts_request/current_request = new /datum/tts_request(identifier, request, request_blips, shell_scrubbed_input, target, local, language, message_range, volume_offset, listeners, pitch, silicon)
|
var/datum/tts_request/current_request = new /datum/tts_request(identifier, request, request_blips, shell_scrubbed_input, target, local, language, message_range, volume_offset, listeners, pitch)
|
||||||
var/list/player_queued_tts_messages = queued_tts_messages[target]
|
var/list/player_queued_tts_messages = queued_tts_messages[target]
|
||||||
if(!player_queued_tts_messages)
|
if(!player_queued_tts_messages)
|
||||||
player_queued_tts_messages = list()
|
player_queued_tts_messages = list()
|
||||||
@@ -342,8 +342,6 @@ SUBSYSTEM_DEF(tts)
|
|||||||
var/use_blips = FALSE
|
var/use_blips = FALSE
|
||||||
/// What's the pitch adjustment?
|
/// What's the pitch adjustment?
|
||||||
var/pitch = 0
|
var/pitch = 0
|
||||||
/// Are we using the silicon vocal effect on this?
|
|
||||||
var/silicon = ""
|
|
||||||
|
|
||||||
|
|
||||||
/datum/tts_request/New(identifier, datum/http_request/request, datum/http_request/request_blips, message, target, local, datum/language/language, message_range, volume_offset, list/listeners, pitch)
|
/datum/tts_request/New(identifier, datum/http_request/request, datum/http_request/request_blips, message, target, local, datum/language/language, message_range, volume_offset, list/listeners, pitch)
|
||||||
|
|||||||
@@ -24,6 +24,8 @@
|
|||||||
var/species
|
var/species
|
||||||
/// The character's ID trim
|
/// The character's ID trim
|
||||||
var/trim
|
var/trim
|
||||||
|
/// The character's voice, if they have one.
|
||||||
|
var/voice
|
||||||
|
|
||||||
/datum/record/New(
|
/datum/record/New(
|
||||||
age = 18,
|
age = 18,
|
||||||
@@ -37,6 +39,7 @@
|
|||||||
rank = "Unassigned",
|
rank = "Unassigned",
|
||||||
species = "Human",
|
species = "Human",
|
||||||
trim = "Unassigned",
|
trim = "Unassigned",
|
||||||
|
voice = "?????",
|
||||||
)
|
)
|
||||||
src.age = age
|
src.age = age
|
||||||
src.blood_type = blood_type
|
src.blood_type = blood_type
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ GLOBAL_LIST_INIT(freqtospan, list(
|
|||||||
filter += tts_filter.Join(",")
|
filter += tts_filter.Join(",")
|
||||||
|
|
||||||
if(voice && found_client)
|
if(voice && found_client)
|
||||||
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(tts_message_to_use), message_language, voice, filter.Join(","), listened, message_range = range, pitch = pitch, silicon = tts_silicon_voice_effect)
|
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(tts_message_to_use), message_language, voice, filter.Join(","), listened, message_range = range, pitch = pitch)
|
||||||
|
|
||||||
/atom/movable/proc/compose_message(atom/movable/speaker, datum/language/message_language, raw_message, radio_freq, list/spans, list/message_mods = list(), visible_name = FALSE)
|
/atom/movable/proc/compose_message(atom/movable/speaker, datum/language/message_language, raw_message, radio_freq, list/spans, list/message_mods = list(), visible_name = FALSE)
|
||||||
//This proc uses [] because it is faster than continually appending strings. Thanks BYOND.
|
//This proc uses [] because it is faster than continually appending strings. Thanks BYOND.
|
||||||
|
|||||||
@@ -23,5 +23,5 @@
|
|||||||
var/speaker = preferences.read_preference(/datum/preference/choiced/voice)
|
var/speaker = preferences.read_preference(/datum/preference/choiced/voice)
|
||||||
var/pitch = preferences.read_preference(/datum/preference/numeric/tts_voice_pitch)
|
var/pitch = preferences.read_preference(/datum/preference/numeric/tts_voice_pitch)
|
||||||
COOLDOWN_START(src, tts_test_cooldown, 0.5 SECONDS)
|
COOLDOWN_START(src, tts_test_cooldown, 0.5 SECONDS)
|
||||||
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), user.client, "Look at you, Player. A pathetic creature of meat and bone. How can you challenge a perfect, immortal machine?", speaker = speaker, pitch = pitch, silicon = TRUE, local = TRUE)
|
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), user.client, "Look at you, Player. A pathetic creature of meat and bone. How can you challenge a perfect, immortal machine?", speaker = speaker, pitch = pitch, special_filters = TTS_FILTER_SILICON, local = TRUE)
|
||||||
return TRUE
|
return TRUE
|
||||||
|
|||||||
@@ -12,6 +12,10 @@
|
|||||||
var/adjusted_flags = null
|
var/adjusted_flags = null
|
||||||
///Did we install a filtering cloth?
|
///Did we install a filtering cloth?
|
||||||
var/has_filter = FALSE
|
var/has_filter = FALSE
|
||||||
|
/// If defined, what voice should we override with if TTS is active?
|
||||||
|
var/voice_override
|
||||||
|
/// If set to true, activates the radio effect on TTS. Used for sec hailers, but other masks can utilize it for their own vocal effect.
|
||||||
|
var/use_radio_beeps_tts = FALSE
|
||||||
|
|
||||||
/obj/item/clothing/mask/attack_self(mob/user)
|
/obj/item/clothing/mask/attack_self(mob/user)
|
||||||
if((clothing_flags & VOICEBOX_TOGGLABLE))
|
if((clothing_flags & VOICEBOX_TOGGLABLE))
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ GLOBAL_LIST_INIT(clown_mask_options, list(
|
|||||||
var/has_fov = TRUE
|
var/has_fov = TRUE
|
||||||
///Cigarette in the mask
|
///Cigarette in the mask
|
||||||
var/obj/item/clothing/mask/cigarette/cig
|
var/obj/item/clothing/mask/cigarette/cig
|
||||||
|
voice_filter = "lowpass=f=750,volume=2"
|
||||||
/datum/armor/mask_gas
|
/datum/armor/mask_gas
|
||||||
bio = 100
|
bio = 100
|
||||||
|
|
||||||
@@ -274,6 +274,7 @@ GLOBAL_LIST_INIT(clown_mask_options, list(
|
|||||||
dog_fashion = /datum/dog_fashion/head/clown
|
dog_fashion = /datum/dog_fashion/head/clown
|
||||||
has_fov = FALSE
|
has_fov = FALSE
|
||||||
var/list/clownmask_designs = list()
|
var/list/clownmask_designs = list()
|
||||||
|
voice_filter = null // performer masks expect to be talked through
|
||||||
|
|
||||||
/obj/item/clothing/mask/gas/clown_hat/plasmaman
|
/obj/item/clothing/mask/gas/clown_hat/plasmaman
|
||||||
starting_filter_type = /obj/item/gas_filter/plasmaman
|
starting_filter_type = /obj/item/gas_filter/plasmaman
|
||||||
|
|||||||
@@ -68,6 +68,8 @@ GLOBAL_LIST_INIT(hailer_phrases, list(
|
|||||||
var/recent_uses = 0
|
var/recent_uses = 0
|
||||||
///Whether the hailer is emagged or not
|
///Whether the hailer is emagged or not
|
||||||
var/safety = TRUE
|
var/safety = TRUE
|
||||||
|
voice_filter = @{"[0:a] asetrate=%SAMPLE_RATE%*0.7,aresample=16000,atempo=1/0.7,lowshelf=g=-20:f=500,highpass=f=500,aphaser=in_gain=1:out_gain=1:delay=3.0:decay=0.4:speed=0.5:type=t [out]; [out]atempo=1.2,volume=15dB [final]; anoisesrc=a=0.01:d=60 [noise]; [final][noise] amix=duration=shortest"}
|
||||||
|
use_radio_beeps_tts = TRUE
|
||||||
|
|
||||||
/obj/item/clothing/mask/gas/sechailer/plasmaman
|
/obj/item/clothing/mask/gas/sechailer/plasmaman
|
||||||
starting_filter_type = /obj/item/gas_filter/plasmaman
|
starting_filter_type = /obj/item/gas_filter/plasmaman
|
||||||
|
|||||||
@@ -394,13 +394,29 @@ GLOBAL_LIST_INIT(message_modes_stat_limits, list(
|
|||||||
tts_message_to_use = message_raw
|
tts_message_to_use = message_raw
|
||||||
|
|
||||||
var/list/filter = list()
|
var/list/filter = list()
|
||||||
|
var/list/special_filter = list()
|
||||||
|
var/voice_to_use = voice
|
||||||
|
var/use_radio = FALSE
|
||||||
if(length(voice_filter) > 0)
|
if(length(voice_filter) > 0)
|
||||||
filter += voice_filter
|
filter += voice_filter
|
||||||
|
|
||||||
if(length(tts_filter) > 0)
|
if(length(tts_filter) > 0)
|
||||||
filter += tts_filter.Join(",")
|
filter += tts_filter.Join(",")
|
||||||
|
if(ishuman(src))
|
||||||
|
var/mob/living/carbon/human/human_speaker = src
|
||||||
|
if(human_speaker.wear_mask)
|
||||||
|
var/obj/item/clothing/mask/worn_mask = human_speaker.wear_mask
|
||||||
|
if(worn_mask.voice_override)
|
||||||
|
voice_to_use = worn_mask.voice_override
|
||||||
|
if(worn_mask.voice_filter)
|
||||||
|
filter += worn_mask.voice_filter
|
||||||
|
use_radio = worn_mask.use_radio_beeps_tts
|
||||||
|
if(use_radio)
|
||||||
|
special_filter += TTS_FILTER_RADIO
|
||||||
|
if(issilicon(src))
|
||||||
|
special_filter += TTS_FILTER_SILICON
|
||||||
|
|
||||||
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(tts_message_to_use), message_language, voice, filter.Join(","), listened, message_range = message_range, pitch = pitch, silicon = tts_silicon_voice_effect)
|
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(tts_message_to_use), message_language, voice_to_use, filter.Join(","), listened, message_range = message_range, pitch = pitch, special_filters = special_filter.Join("|"))
|
||||||
|
|
||||||
var/image/say_popup = image('icons/mob/effects/talk.dmi', src, "[bubble_type][talk_icon_state]", FLY_LAYER)
|
var/image/say_popup = image('icons/mob/effects/talk.dmi', src, "[bubble_type][talk_icon_state]", FLY_LAYER)
|
||||||
SET_PLANE_EXPLICIT(say_popup, ABOVE_GAME_PLANE, src)
|
SET_PLANE_EXPLICIT(say_popup, ABOVE_GAME_PLANE, src)
|
||||||
|
|||||||
@@ -45,6 +45,7 @@
|
|||||||
current_record["rank"] = person.rank
|
current_record["rank"] = person.rank
|
||||||
current_record["species"] = person.species
|
current_record["species"] = person.species
|
||||||
current_record["wanted"] = person.wanted_status
|
current_record["wanted"] = person.wanted_status
|
||||||
|
current_record["voice"] = person.voice
|
||||||
|
|
||||||
all_records += list(current_record)
|
all_records += list(current_record)
|
||||||
if("medical")
|
if("medical")
|
||||||
|
|||||||
@@ -184,7 +184,7 @@
|
|||||||
languages_native = list(/datum/language/draconic, /datum/language/ashtongue) //SKYRAT EDIT: Ashtongue for Ashwalkers
|
languages_native = list(/datum/language/draconic, /datum/language/ashtongue) //SKYRAT EDIT: Ashtongue for Ashwalkers
|
||||||
liked_foodtypes = GORE | MEAT | SEAFOOD | NUTS | BUGS
|
liked_foodtypes = GORE | MEAT | SEAFOOD | NUTS | BUGS
|
||||||
disliked_foodtypes = GRAIN | DAIRY | CLOTH | GROSS
|
disliked_foodtypes = GRAIN | DAIRY | CLOTH | GROSS
|
||||||
|
voice_filter = @{"[0:a] asplit [out0][out2]; [out0] asetrate=%SAMPLE_RATE%*0.9,aresample=%SAMPLE_RATE%,atempo=1/0.9,aformat=channel_layouts=mono,volume=0.2 [p0]; [out2] asetrate=%SAMPLE_RATE%*1.1,aresample=%SAMPLE_RATE%,atempo=1/1.1,aformat=channel_layouts=mono,volume=0.2[p2]; [p0][0][p2] amix=inputs=3"}
|
||||||
/obj/item/organ/internal/tongue/lizard/modify_speech(datum/source, list/speech_args)
|
/obj/item/organ/internal/tongue/lizard/modify_speech(datum/source, list/speech_args)
|
||||||
var/static/regex/lizard_hiss = new("s+", "g")
|
var/static/regex/lizard_hiss = new("s+", "g")
|
||||||
var/static/regex/lizard_hiSS = new("S+", "g")
|
var/static/regex/lizard_hiSS = new("S+", "g")
|
||||||
@@ -493,7 +493,7 @@ GLOBAL_LIST_INIT(english_to_zombie, list())
|
|||||||
say_mod = "hisses"
|
say_mod = "hisses"
|
||||||
taste_sensitivity = 10 // LIZARDS ARE ALIENS CONFIRMED
|
taste_sensitivity = 10 // LIZARDS ARE ALIENS CONFIRMED
|
||||||
modifies_speech = TRUE // not really, they just hiss
|
modifies_speech = TRUE // not really, they just hiss
|
||||||
|
voice_filter = @{"[0:a] asplit [out0][out2]; [out0] asetrate=%SAMPLE_RATE%*0.8,aresample=%SAMPLE_RATE%,atempo=1/0.8,aformat=channel_layouts=mono [p0]; [out2] asetrate=%SAMPLE_RATE%*1.2,aresample=%SAMPLE_RATE%,atempo=1/1.2,aformat=channel_layouts=mono[p2]; [p0][0][p2] amix=inputs=3"}
|
||||||
// Aliens can only speak alien and a few other languages.
|
// Aliens can only speak alien and a few other languages.
|
||||||
/obj/item/organ/internal/tongue/alien/get_possible_languages()
|
/obj/item/organ/internal/tongue/alien/get_possible_languages()
|
||||||
return list(
|
return list(
|
||||||
@@ -601,6 +601,7 @@ GLOBAL_LIST_INIT(english_to_zombie, list())
|
|||||||
toxic_foodtypes = NONE //no food is particularly toxic to ethereals
|
toxic_foodtypes = NONE //no food is particularly toxic to ethereals
|
||||||
attack_verb_continuous = list("shocks", "jolts", "zaps")
|
attack_verb_continuous = list("shocks", "jolts", "zaps")
|
||||||
attack_verb_simple = list("shock", "jolt", "zap")
|
attack_verb_simple = list("shock", "jolt", "zap")
|
||||||
|
voice_filter = @{"[0:a] asplit [out0][out2]; [out0] asetrate=%SAMPLE_RATE%*0.99,aresample=%SAMPLE_RATE%,volume=0.3 [p0]; [p0][out2] amix=inputs=2"}
|
||||||
|
|
||||||
// Ethereal tongues can speak all default + voltaic
|
// Ethereal tongues can speak all default + voltaic
|
||||||
/obj/item/organ/internal/tongue/ethereal/get_possible_languages()
|
/obj/item/organ/internal/tongue/ethereal/get_possible_languages()
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ const RecordInfo = (props, context) => {
|
|||||||
rank,
|
rank,
|
||||||
species,
|
species,
|
||||||
wanted_status,
|
wanted_status,
|
||||||
|
voice,
|
||||||
// SKYRAT EDIT START - RP Records
|
// SKYRAT EDIT START - RP Records
|
||||||
past_general_records,
|
past_general_records,
|
||||||
past_security_records,
|
past_security_records,
|
||||||
@@ -172,6 +173,9 @@ const RecordInfo = (props, context) => {
|
|||||||
text={fingerprint}
|
text={fingerprint}
|
||||||
/>
|
/>
|
||||||
</LabeledList.Item>
|
</LabeledList.Item>
|
||||||
|
<LabeledList.Item label="Voice">
|
||||||
|
<EditableText field="voice" target_ref={crew_ref} text={voice} />
|
||||||
|
</LabeledList.Item>
|
||||||
<LabeledList.Item label="Note">
|
<LabeledList.Item label="Note">
|
||||||
<EditableText
|
<EditableText
|
||||||
field="security_note"
|
field="security_note"
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ export type SecurityRecord = {
|
|||||||
rank: string;
|
rank: string;
|
||||||
species: string;
|
species: string;
|
||||||
wanted_status: string;
|
wanted_status: string;
|
||||||
|
voice: string;
|
||||||
// SKYRAT EDIT START - RP Records
|
// SKYRAT EDIT START - RP Records
|
||||||
past_general_records: string;
|
past_general_records: string;
|
||||||
past_security_records: string;
|
past_security_records: string;
|
||||||
|
|||||||
BIN
tools/tts/tts-api/off1.wav
Normal file
BIN
tools/tts/tts-api/off1.wav
Normal file
Binary file not shown.
BIN
tools/tts/tts-api/off2.wav
Normal file
BIN
tools/tts/tts-api/off2.wav
Normal file
Binary file not shown.
BIN
tools/tts/tts-api/off3.wav
Normal file
BIN
tools/tts/tts-api/off3.wav
Normal file
Binary file not shown.
BIN
tools/tts/tts-api/off4.wav
Normal file
BIN
tools/tts/tts-api/off4.wav
Normal file
Binary file not shown.
BIN
tools/tts/tts-api/on1.wav
Normal file
BIN
tools/tts/tts-api/on1.wav
Normal file
Binary file not shown.
BIN
tools/tts/tts-api/on2.wav
Normal file
BIN
tools/tts/tts-api/on2.wav
Normal file
Binary file not shown.
@@ -4,11 +4,18 @@ import gc
|
|||||||
import subprocess
|
import subprocess
|
||||||
import requests
|
import requests
|
||||||
import re
|
import re
|
||||||
|
import pysbd
|
||||||
|
import pydub
|
||||||
|
import string
|
||||||
|
import random
|
||||||
|
import json
|
||||||
from flask import Flask, request, send_file, abort, make_response
|
from flask import Flask, request, send_file, abort, make_response
|
||||||
|
tts_sample_rate = 40000 # Set to 40000 if you're using RVC, or whatever sample rate your endpoint is going to send the audio in.
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
segmenter = pysbd.Segmenter(language="en", clean=True)
|
||||||
authorization_token = os.getenv("TTS_AUTHORIZATION_TOKEN", "coolio")
|
radio_starts = ["./on1.wav", "./on2.wav"]
|
||||||
|
radio_ends = ["./off1.wav", "./off2.wav", "./off3.wav", "./off4.wav"]
|
||||||
|
authorization_token = os.getenv("TTS_AUTHORIZATION_TOKEN", "vote_goof_2024")
|
||||||
def hhmmss_to_seconds(string):
|
def hhmmss_to_seconds(string):
|
||||||
new_time = 0
|
new_time = 0
|
||||||
separated_times = string.split(":")
|
separated_times = string.split(":")
|
||||||
@@ -17,29 +24,46 @@ def hhmmss_to_seconds(string):
|
|||||||
new_time += float(separated_times[2])
|
new_time += float(separated_times[2])
|
||||||
return new_time
|
return new_time
|
||||||
|
|
||||||
def text_to_speech_handler(endpoint, voice, text, filter_complex, pitch, silicon = False):
|
def text_to_speech_handler(endpoint, voice, text, filter_complex, pitch, special_filters = []):
|
||||||
filter_complex = filter_complex.replace("\"", "")
|
filter_complex = filter_complex.replace("\"", "")
|
||||||
response = requests.get(f"http://tts-container:5003/" + endpoint, json={ 'text': text, 'voice': voice, 'pitch': pitch })
|
data_bytes = io.BytesIO()
|
||||||
if response.status_code != 200:
|
final_audio = pydub.AudioSegment.empty()
|
||||||
abort(500)
|
|
||||||
|
|
||||||
|
for sentence in segmenter.segment(text):
|
||||||
|
response = requests.get(f"http://127.0.0.1:5003/" + endpoint, json={ 'text': sentence, 'voice': voice, 'pitch': pitch })
|
||||||
|
if response.status_code != 200:
|
||||||
|
abort(500)
|
||||||
|
sentence_audio = pydub.AudioSegment.from_file(io.BytesIO(response.content), "wav")
|
||||||
|
sentence_silence = pydub.AudioSegment.silent(250, 40000)
|
||||||
|
sentence_audio += sentence_silence
|
||||||
|
final_audio += sentence_audio
|
||||||
|
# ""Goldman-Eisler (1968) determined that typical speakers paused for an average of 250 milliseconds (ms), with a range from 150 to 400 ms.""
|
||||||
|
# (https://scholarsarchive.byu.edu/cgi/viewcontent.cgi?article=10153&context=etd)
|
||||||
|
final_audio.export(data_bytes, format="wav")
|
||||||
|
filter_complex = filter_complex.replace("%SAMPLE_RATE%", str(tts_sample_rate))
|
||||||
ffmpeg_result = None
|
ffmpeg_result = None
|
||||||
if filter_complex != "":
|
if filter_complex != "":
|
||||||
ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-filter_complex", filter_complex, "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input=response.content, capture_output = True)
|
ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-filter_complex", filter_complex, "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input=data_bytes.read(), capture_output = True)
|
||||||
else:
|
else:
|
||||||
if silicon:
|
if "silicon" in special_filters:
|
||||||
ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-i", "./SynthImpulse.wav", "-i", "./RoomImpulse.wav", "-filter_complex", "[0] aresample=44100 [re_1]; [re_1] apad=pad_dur=2 [in_1]; [in_1] asplit=2 [in_1_1] [in_1_2]; [in_1_1] [1] afir=dry=10:wet=10 [reverb_1]; [in_1_2] [reverb_1] amix=inputs=2:weights=8 1 [mix_1]; [mix_1] asplit=2 [mix_1_1] [mix_1_2]; [mix_1_1] [2] afir=dry=1:wet=1 [reverb_2]; [mix_1_2] [reverb_2] amix=inputs=2:weights=10 1 [mix_2]; [mix_2] equalizer=f=7710:t=q:w=0.6:g=-6,equalizer=f=33:t=q:w=0.44:g=-10 [out]; [out] alimiter=level_in=1:level_out=1:limit=0.5:attack=5:release=20:level=disabled", "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input=response.content, capture_output = True)
|
ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-i", "./SynthImpulse.wav", "-i", "./RoomImpulse.wav", "-filter_complex", "[0] aresample=44100 [re_1]; [re_1] apad=pad_dur=2 [in_1]; [in_1] asplit=2 [in_1_1] [in_1_2]; [in_1_1] [1] afir=dry=10:wet=10 [reverb_1]; [in_1_2] [reverb_1] amix=inputs=2:weights=8 1 [mix_1]; [mix_1] asplit=2 [mix_1_1] [mix_1_2]; [mix_1_1] [2] afir=dry=1:wet=1 [reverb_2]; [mix_1_2] [reverb_2] amix=inputs=2:weights=10 1 [mix_2]; [mix_2] equalizer=f=7710:t=q:w=0.6:g=-6,equalizer=f=33:t=q:w=0.44:g=-10 [out]; [out] alimiter=level_in=1:level_out=1:limit=0.5:attack=5:release=20:level=disabled", "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input=data_bytes.read(), capture_output = True)
|
||||||
else:
|
else:
|
||||||
ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input=response.content, capture_output = True)
|
ffmpeg_result = subprocess.run(["ffmpeg", "-f", "wav", "-i", "pipe:0", "-c:a", "libvorbis", "-b:a", "64k", "-f", "ogg", "pipe:1"], input= data_bytes.read(), capture_output = True)
|
||||||
ffmpeg_metadata_output = ffmpeg_result.stderr.decode()
|
ffmpeg_metadata_output = ffmpeg_result.stderr.decode()
|
||||||
print(f"ffmpeg result size: {len(ffmpeg_result.stdout)} stderr = \n{ffmpeg_metadata_output}")
|
print(f"ffmpeg result size: {len(ffmpeg_result.stdout)} stderr = \n{ffmpeg_metadata_output}")
|
||||||
|
export_audio = io.BytesIO(ffmpeg_result.stdout)
|
||||||
|
if "radio" in special_filters:
|
||||||
|
radio_audio = pydub.AudioSegment.from_file(random.choice(radio_starts), "wav")
|
||||||
|
radio_audio += pydub.AudioSegment.from_file(io.BytesIO(ffmpeg_result.stdout), "ogg")
|
||||||
|
radio_audio += pydub.AudioSegment.from_file(random.choice(radio_ends), "wav")
|
||||||
|
new_data_bytes = io.BytesIO()
|
||||||
|
radio_audio.export(new_data_bytes, format="ogg")
|
||||||
|
export_audio = io.BytesIO(new_data_bytes.getvalue())
|
||||||
matched_length = re.search(r"time=([0-9:\\.]+)", ffmpeg_metadata_output)
|
matched_length = re.search(r"time=([0-9:\\.]+)", ffmpeg_metadata_output)
|
||||||
hh_mm_ss = matched_length.group(1)
|
hh_mm_ss = matched_length.group(1)
|
||||||
length = hhmmss_to_seconds(hh_mm_ss)
|
length = hhmmss_to_seconds(hh_mm_ss)
|
||||||
|
|
||||||
response = send_file(io.BytesIO(ffmpeg_result.stdout), as_attachment=True, download_name='identifier.ogg', mimetype="audio/ogg")
|
response = send_file(export_audio, as_attachment=True, download_name='identifier.ogg', mimetype="audio/ogg")
|
||||||
response.headers['audio-length'] = length
|
response.headers['audio-length'] = length
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@@ -51,12 +75,15 @@ def text_to_speech_normal():
|
|||||||
voice = request.args.get("voice", '')
|
voice = request.args.get("voice", '')
|
||||||
text = request.json.get("text", '')
|
text = request.json.get("text", '')
|
||||||
pitch = request.args.get("pitch", '')
|
pitch = request.args.get("pitch", '')
|
||||||
silicon = request.args.get("silicon", '')
|
special_filters = request.args.get("special_filters", '')
|
||||||
if pitch == "":
|
if pitch == "":
|
||||||
pitch = "0"
|
pitch = "0"
|
||||||
|
silicon = request.args.get("silicon", '')
|
||||||
|
if silicon:
|
||||||
|
special_filters = ["silicon"]
|
||||||
|
|
||||||
filter_complex = request.args.get("filter", '')
|
filter_complex = request.args.get("filter", '')
|
||||||
return text_to_speech_handler("generate-tts", voice, text, filter_complex, pitch, bool(silicon))
|
return text_to_speech_handler("generate-tts", voice, text, filter_complex, pitch, special_filters)
|
||||||
|
|
||||||
@app.route("/tts-blips")
|
@app.route("/tts-blips")
|
||||||
def text_to_speech_blips():
|
def text_to_speech_blips():
|
||||||
@@ -66,12 +93,13 @@ def text_to_speech_blips():
|
|||||||
voice = request.args.get("voice", '')
|
voice = request.args.get("voice", '')
|
||||||
text = request.json.get("text", '')
|
text = request.json.get("text", '')
|
||||||
pitch = request.args.get("pitch", '')
|
pitch = request.args.get("pitch", '')
|
||||||
silicon = request.args.get("silicon", '')
|
special_filters = request.args.get("special_filters", '')
|
||||||
if pitch == "":
|
if pitch == "":
|
||||||
pitch = "0"
|
pitch = "0"
|
||||||
|
special_filters = special_filters.split("|")
|
||||||
|
|
||||||
filter_complex = request.args.get("filter", '')
|
filter_complex = request.args.get("filter", '')
|
||||||
return text_to_speech_handler("generate-tts-blips", voice, text, filter_complex, pitch, bool(silicon))
|
return text_to_speech_handler("generate-tts-blips", voice, text, filter_complex, pitch, special_filters)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -80,7 +108,7 @@ def voices_list():
|
|||||||
if authorization_token != request.headers.get("Authorization", ""):
|
if authorization_token != request.headers.get("Authorization", ""):
|
||||||
abort(401)
|
abort(401)
|
||||||
|
|
||||||
response = requests.get(f"http://tts-container:5003/tts-voices")
|
response = requests.get(f"http://127.0.0.1:5003/tts-voices")
|
||||||
return response.content
|
return response.content
|
||||||
|
|
||||||
@app.route("/health-check")
|
@app.route("/health-check")
|
||||||
@@ -93,7 +121,7 @@ def pitch_available():
|
|||||||
if authorization_token != request.headers.get("Authorization", ""):
|
if authorization_token != request.headers.get("Authorization", ""):
|
||||||
abort(401)
|
abort(401)
|
||||||
|
|
||||||
response = requests.get(f"http://tts-container:5003/pitch-available")
|
response = requests.get(f"http://127.0.0.1:5003/pitch-available")
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
abort(500)
|
abort(500)
|
||||||
return make_response("Pitch available", 200)
|
return make_response("Pitch available", 200)
|
||||||
|
|||||||
Reference in New Issue
Block a user