using System.Net.Http.Headers; using System.Text; using System.Text.Json; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using SaasMediaWorker.Configuration; using SaasMediaWorker.Models; namespace SaasMediaWorker.Services; /// /// VoiceBox AI Studio Client — Metin → Ses dönüşümü (%100 Yerel ve Ücretsiz). /// public class VoiceboxTtsService { private readonly HttpClient _httpClient; private readonly ILogger _logger; private readonly ApiSettings _settings; public VoiceboxTtsService( HttpClient httpClient, ILogger logger, IOptions settings) { _httpClient = httpClient; _logger = logger; _settings = settings.Value; // Docker ağı üzerinde voicebox servisine bağlanır // C# projesinin appsettings.json dosyasından da okunabilir ama varsayılanı atıyoruz _httpClient.BaseAddress = new Uri("http://contgen-ai-voicebox:17493/"); _httpClient.Timeout = TimeSpan.FromMinutes(5); // Yerel render işlemci hızına bağlı sürebilir } /// /// Bir sahnenin narration metnini sese çevirir ve dosyaya kaydeder. /// public async Task GenerateNarrationAsync( ScenePayload scene, string outputDirectory, string voiceStyle, CancellationToken ct) { _logger.LogInformation( "🎙️ VoiceBox TTS üretimi — Sahne {Order}: \"{Text}\"", scene.Order, scene.NarrationText[..Math.Min(60, scene.NarrationText.Length)]); // VoiceBox'ta varsayılan bir profil (Örn: Kokoro default) var profileId = string.IsNullOrWhiteSpace(voiceStyle) ? "b6a8a474-0fc0-4a8f-b9f1-a1e4c84a8649" : voiceStyle; var requestBody = new { text = scene.NarrationText, profile_id = profileId, language = "tr", engine = "kokoro" }; var content = new StringContent( JsonSerializer.Serialize(requestBody), Encoding.UTF8, "application/json"); // 1. Asenkron üretim başlat var generateResponse = await _httpClient.PostAsync("generate", content, ct); generateResponse.EnsureSuccessStatusCode(); var genJsonStr = await generateResponse.Content.ReadAsStringAsync(ct); using var genDoc = JsonDocument.Parse(genJsonStr); var generationId = genDoc.RootElement.GetProperty("id").GetString(); if (string.IsNullOrEmpty(generationId)) { throw new Exception("VoiceBox üretim başlatıldı ancak ID alınamadı."); } // 2. Durumu polling ile kontrol et string status = "generating"; int attempts = 0; int maxAttempts = 120; // 120 * 2sn = 4 dakika while (status != "completed" && status != "failed" && attempts < maxAttempts && !ct.IsCancellationRequested) { await Task.Delay(2000, ct); var historyResponse = await _httpClient.GetAsync("history", ct); if (historyResponse.IsSuccessStatusCode) { var historyStr = await historyResponse.Content.ReadAsStringAsync(ct); using var historyDoc = JsonDocument.Parse(historyStr); JsonElement itemsElement; if (historyDoc.RootElement.ValueKind == JsonValueKind.Array) { itemsElement = historyDoc.RootElement; } else if (historyDoc.RootElement.TryGetProperty("items", out var itemsProp) && itemsProp.ValueKind == JsonValueKind.Array) { itemsElement = itemsProp; } else { continue; // Geçersiz format } foreach (var item in itemsElement.EnumerateArray()) { if (item.GetProperty("id").GetString() == generationId) { if (item.TryGetProperty("status", out var statusProp)) { status = statusProp.GetString() ?? "completed"; } else { status = "completed"; } if (status == "failed") { var errorMsg = item.TryGetProperty("error", out var errProp) ? errProp.GetString() : "Bilinmeyen üretim hatası"; throw new Exception($"VoiceBox ses üretemedi: {errorMsg}"); } break; } } } attempts++; } if (status != "completed") { throw new Exception("VoiceBox ses üretimi zaman aşımına uğradı."); } // 3. Üretilen ses dosyasını indir var audioResponse = await _httpClient.GetAsync($"audio/{generationId}", ct); audioResponse.EnsureSuccessStatusCode(); var audioBytes = await audioResponse.Content.ReadAsByteArrayAsync(ct); var extension = audioResponse.Content.Headers.ContentType?.MediaType == "audio/mpeg" ? "mp3" : "wav"; var outputPath = Path.Combine(outputDirectory, $"scene_{scene.Order:D2}_narration.{extension}"); await File.WriteAllBytesAsync(outputPath, audioBytes, ct); var fileInfo = new FileInfo(outputPath); _logger.LogInformation( "VoiceBox TTS tamamlandı — Sahne {Order}: {Size} bytes", scene.Order, fileInfo.Length); return new GeneratedMediaFile { SceneId = scene.Id, SceneOrder = scene.Order, Type = MediaFileType.AudioNarration, LocalPath = outputPath, FileSizeBytes = fileInfo.Length, DurationSeconds = scene.Duration, MimeType = $"audio/{extension}", AiProvider = "voicebox" }; } }