Files
ContentGen_BE/media-worker/Services/MinimaxTtsService.cs
T
Harun CAN a40619ef33
Backend Deploy 🚀 / build-and-deploy (push) Has been cancelled
main
2026-05-06 10:48:07 +02:00

133 lines
4.4 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SaasMediaWorker.Configuration;
using SaasMediaWorker.Models;
namespace SaasMediaWorker.Services;
/// <summary>
/// Minimax TTS API Client — Metin → Ses dönüşümü.
/// </summary>
public class MinimaxTtsService
{
private readonly HttpClient _httpClient;
private readonly ILogger<MinimaxTtsService> _logger;
private readonly ApiSettings _settings;
public MinimaxTtsService(
HttpClient httpClient,
ILogger<MinimaxTtsService> logger,
IOptions<ApiSettings> settings)
{
_httpClient = httpClient;
_logger = logger;
_settings = settings.Value;
_httpClient.BaseAddress = new Uri("https://api.minimax.chat/v1/");
_httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", _settings.MinimaxApiKey);
_httpClient.Timeout = TimeSpan.FromMinutes(2);
}
/// <summary>
/// Bir sahnenin narration metnini sese çevirir ve dosyaya kaydeder.
/// </summary>
public async Task<GeneratedMediaFile> GenerateNarrationAsync(
ScenePayload scene,
string outputDirectory,
string voiceStyle,
CancellationToken ct)
{
_logger.LogInformation(
"🎙️ Minimax TTS üretimi — Sahne {Order}: \"{Text}\"",
scene.Order,
scene.NarrationText[..Math.Min(60, scene.NarrationText.Length)]);
// Varsayılan voiceStyle kullan veya fallback
var voiceId = string.IsNullOrWhiteSpace(voiceStyle) ? _settings.MinimaxTtsVoiceId : voiceStyle;
var requestBody = new
{
model = "speech-01-turbo",
text = scene.NarrationText,
voice_setting = new
{
voice_id = voiceId,
speed = 1.0,
vol = 1.0,
pitch = 0
},
audio_setting = new
{
sample_rate = 32000,
bitrate = 128000,
format = "mp3",
channel = 1
}
};
var content = new StringContent(
JsonSerializer.Serialize(requestBody),
Encoding.UTF8,
"application/json");
var response = await _httpClient.PostAsync("t2a_v2", content, ct);
response.EnsureSuccessStatusCode();
// Minimax T2A V2 returns JSON with data.audio containing hex string
var responseString = await response.Content.ReadAsStringAsync(ct);
var jsonResponse = JsonSerializer.Deserialize<JsonElement>(responseString);
if (jsonResponse.TryGetProperty("data", out var dataElement) && dataElement.TryGetProperty("audio", out var audioHex))
{
var hexString = audioHex.GetString() ?? "";
byte[] audioBytes = ConvertHexStringToByteArray(hexString);
var outputPath = Path.Combine(outputDirectory, $"scene_{scene.Order:D2}_narration.mp3");
await File.WriteAllBytesAsync(outputPath, audioBytes, ct);
var fileInfo = new FileInfo(outputPath);
_logger.LogInformation(
"Minimax TTS tamamlandı — Sahne {Order}: {Size} bytes",
scene.Order, fileInfo.Length);
return new GeneratedMediaFile
{
SceneId = scene.Id,
SceneOrder = scene.Order,
Type = MediaFileType.AudioNarration,
LocalPath = outputPath,
FileSizeBytes = fileInfo.Length,
DurationSeconds = scene.Duration,
MimeType = "audio/mpeg",
AiProvider = "minimax"
};
}
else
{
throw new Exception("Minimax API response invalid: " + responseString);
}
}
private static byte[] ConvertHexStringToByteArray(string hexString)
{
if (hexString.Length % 2 != 0)
{
throw new ArgumentException("Hex string must have an even length.");
}
byte[] data = new byte[hexString.Length / 2];
for (int index = 0; index < data.Length; index++)
{
string byteValue = hexString.Substring(index * 2, 2);
data[index] = byte.Parse(byteValue, System.Globalization.NumberStyles.HexNumber, System.Globalization.CultureInfo.InvariantCulture);
}
return data;
}
}