Files
ContentGen_BE/media-worker/Services/VoiceboxTtsService.cs
T
Harun CAN 2e6c272eee
Backend Deploy 🚀 / build-and-deploy (push) Has been cancelled
main
2026-05-11 07:32:58 +02:00

167 lines
6.1 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SaasMediaWorker.Configuration;
using SaasMediaWorker.Models;
namespace SaasMediaWorker.Services;
/// <summary>
/// VoiceBox AI Studio Client — Metin → Ses dönüşümü (%100 Yerel ve Ücretsiz).
/// </summary>
public class VoiceboxTtsService
{
private readonly HttpClient _httpClient;
private readonly ILogger<VoiceboxTtsService> _logger;
private readonly ApiSettings _settings;
public VoiceboxTtsService(
HttpClient httpClient,
ILogger<VoiceboxTtsService> logger,
IOptions<ApiSettings> settings)
{
_httpClient = httpClient;
_logger = logger;
_settings = settings.Value;
// Docker ağı üzerinde voicebox servisine bağlanır
// C# projesinin appsettings.json dosyasından da okunabilir ama varsayılanı atıyoruz
_httpClient.BaseAddress = new Uri("http://contgen-ai-voicebox:17493/");
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Yerel render işlemci hızına bağlı sürebilir
}
/// <summary>
/// Bir sahnenin narration metnini sese çevirir ve dosyaya kaydeder.
/// </summary>
public async Task<GeneratedMediaFile> GenerateNarrationAsync(
ScenePayload scene,
string outputDirectory,
string voiceStyle,
CancellationToken ct)
{
_logger.LogInformation(
"🎙️ VoiceBox TTS üretimi — Sahne {Order}: \"{Text}\"",
scene.Order,
scene.NarrationText[..Math.Min(60, scene.NarrationText.Length)]);
// VoiceBox'ta varsayılan bir profil (Örn: Kokoro default)
var profileId = string.IsNullOrWhiteSpace(voiceStyle) ? "b6a8a474-0fc0-4a8f-b9f1-a1e4c84a8649" : voiceStyle;
var requestBody = new
{
text = scene.NarrationText,
profile_id = profileId,
language = "tr",
engine = "kokoro"
};
var content = new StringContent(
JsonSerializer.Serialize(requestBody),
Encoding.UTF8,
"application/json");
// 1. Asenkron üretim başlat
var generateResponse = await _httpClient.PostAsync("generate", content, ct);
generateResponse.EnsureSuccessStatusCode();
var genJsonStr = await generateResponse.Content.ReadAsStringAsync(ct);
using var genDoc = JsonDocument.Parse(genJsonStr);
var generationId = genDoc.RootElement.GetProperty("id").GetString();
if (string.IsNullOrEmpty(generationId))
{
throw new Exception("VoiceBox üretim başlatıldı ancak ID alınamadı.");
}
// 2. Durumu polling ile kontrol et
string status = "generating";
int attempts = 0;
int maxAttempts = 120; // 120 * 2sn = 4 dakika
while (status != "completed" && status != "failed" && attempts < maxAttempts && !ct.IsCancellationRequested)
{
await Task.Delay(2000, ct);
var historyResponse = await _httpClient.GetAsync("history", ct);
if (historyResponse.IsSuccessStatusCode)
{
var historyStr = await historyResponse.Content.ReadAsStringAsync(ct);
using var historyDoc = JsonDocument.Parse(historyStr);
JsonElement itemsElement;
if (historyDoc.RootElement.ValueKind == JsonValueKind.Array)
{
itemsElement = historyDoc.RootElement;
}
else if (historyDoc.RootElement.TryGetProperty("items", out var itemsProp) && itemsProp.ValueKind == JsonValueKind.Array)
{
itemsElement = itemsProp;
}
else
{
continue; // Geçersiz format
}
foreach (var item in itemsElement.EnumerateArray())
{
if (item.GetProperty("id").GetString() == generationId)
{
if (item.TryGetProperty("status", out var statusProp))
{
status = statusProp.GetString() ?? "completed";
}
else
{
status = "completed";
}
if (status == "failed")
{
var errorMsg = item.TryGetProperty("error", out var errProp) ? errProp.GetString() : "Bilinmeyen üretim hatası";
throw new Exception($"VoiceBox ses üretemedi: {errorMsg}");
}
break;
}
}
}
attempts++;
}
if (status != "completed")
{
throw new Exception("VoiceBox ses üretimi zaman aşımına uğradı.");
}
// 3. Üretilen ses dosyasını indir
var audioResponse = await _httpClient.GetAsync($"audio/{generationId}", ct);
audioResponse.EnsureSuccessStatusCode();
var audioBytes = await audioResponse.Content.ReadAsByteArrayAsync(ct);
var extension = audioResponse.Content.Headers.ContentType?.MediaType == "audio/mpeg" ? "mp3" : "wav";
var outputPath = Path.Combine(outputDirectory, $"scene_{scene.Order:D2}_narration.{extension}");
await File.WriteAllBytesAsync(outputPath, audioBytes, ct);
var fileInfo = new FileInfo(outputPath);
_logger.LogInformation(
"VoiceBox TTS tamamlandı — Sahne {Order}: {Size} bytes",
scene.Order, fileInfo.Length);
return new GeneratedMediaFile
{
SceneId = scene.Id,
SceneOrder = scene.Order,
Type = MediaFileType.AudioNarration,
LocalPath = outputPath,
FileSizeBytes = fileInfo.Length,
DurationSeconds = scene.Duration,
MimeType = $"audio/{extension}",
AiProvider = "voicebox"
};
}
}