using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SaasMediaWorker.Configuration;
using SaasMediaWorker.Models;
namespace SaasMediaWorker.Services;
///
/// VoiceBox AI Studio Client — Metin → Ses dönüşümü (%100 Yerel ve Ücretsiz).
///
public class VoiceboxTtsService
{
private readonly HttpClient _httpClient;
private readonly ILogger _logger;
private readonly ApiSettings _settings;
public VoiceboxTtsService(
HttpClient httpClient,
ILogger logger,
IOptions settings)
{
_httpClient = httpClient;
_logger = logger;
_settings = settings.Value;
// Docker ağı üzerinde voicebox servisine bağlanır
// C# projesinin appsettings.json dosyasından da okunabilir ama varsayılanı atıyoruz
_httpClient.BaseAddress = new Uri("http://contgen-ai-voicebox:17493/");
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Yerel render işlemci hızına bağlı sürebilir
}
///
/// Bir sahnenin narration metnini sese çevirir ve dosyaya kaydeder.
///
public async Task GenerateNarrationAsync(
ScenePayload scene,
string outputDirectory,
string voiceStyle,
CancellationToken ct)
{
_logger.LogInformation(
"🎙️ VoiceBox TTS üretimi — Sahne {Order}: \"{Text}\"",
scene.Order,
scene.NarrationText[..Math.Min(60, scene.NarrationText.Length)]);
// VoiceBox'ta varsayılan bir profil (Örn: Kokoro default)
var profileId = string.IsNullOrWhiteSpace(voiceStyle) ? "b6a8a474-0fc0-4a8f-b9f1-a1e4c84a8649" : voiceStyle;
var requestBody = new
{
text = scene.NarrationText,
profile_id = profileId,
language = "tr",
engine = "kokoro"
};
var content = new StringContent(
JsonSerializer.Serialize(requestBody),
Encoding.UTF8,
"application/json");
// 1. Asenkron üretim başlat
var generateResponse = await _httpClient.PostAsync("generate", content, ct);
generateResponse.EnsureSuccessStatusCode();
var genJsonStr = await generateResponse.Content.ReadAsStringAsync(ct);
using var genDoc = JsonDocument.Parse(genJsonStr);
var generationId = genDoc.RootElement.GetProperty("id").GetString();
if (string.IsNullOrEmpty(generationId))
{
throw new Exception("VoiceBox üretim başlatıldı ancak ID alınamadı.");
}
// 2. Durumu polling ile kontrol et
string status = "generating";
int attempts = 0;
int maxAttempts = 120; // 120 * 2sn = 4 dakika
while (status != "completed" && status != "failed" && attempts < maxAttempts && !ct.IsCancellationRequested)
{
await Task.Delay(2000, ct);
var historyResponse = await _httpClient.GetAsync("history", ct);
if (historyResponse.IsSuccessStatusCode)
{
var historyStr = await historyResponse.Content.ReadAsStringAsync(ct);
using var historyDoc = JsonDocument.Parse(historyStr);
JsonElement itemsElement;
if (historyDoc.RootElement.ValueKind == JsonValueKind.Array)
{
itemsElement = historyDoc.RootElement;
}
else if (historyDoc.RootElement.TryGetProperty("items", out var itemsProp) && itemsProp.ValueKind == JsonValueKind.Array)
{
itemsElement = itemsProp;
}
else
{
continue; // Geçersiz format
}
foreach (var item in itemsElement.EnumerateArray())
{
if (item.GetProperty("id").GetString() == generationId)
{
if (item.TryGetProperty("status", out var statusProp))
{
status = statusProp.GetString() ?? "completed";
}
else
{
status = "completed";
}
if (status == "failed")
{
var errorMsg = item.TryGetProperty("error", out var errProp) ? errProp.GetString() : "Bilinmeyen üretim hatası";
throw new Exception($"VoiceBox ses üretemedi: {errorMsg}");
}
break;
}
}
}
attempts++;
}
if (status != "completed")
{
throw new Exception("VoiceBox ses üretimi zaman aşımına uğradı.");
}
// 3. Üretilen ses dosyasını indir
var audioResponse = await _httpClient.GetAsync($"audio/{generationId}", ct);
audioResponse.EnsureSuccessStatusCode();
var audioBytes = await audioResponse.Content.ReadAsByteArrayAsync(ct);
var extension = audioResponse.Content.Headers.ContentType?.MediaType == "audio/mpeg" ? "mp3" : "wav";
var outputPath = Path.Combine(outputDirectory, $"scene_{scene.Order:D2}_narration.{extension}");
await File.WriteAllBytesAsync(outputPath, audioBytes, ct);
var fileInfo = new FileInfo(outputPath);
_logger.LogInformation(
"VoiceBox TTS tamamlandı — Sahne {Order}: {Size} bytes",
scene.Order, fileInfo.Length);
return new GeneratedMediaFile
{
SceneId = scene.Id,
SceneOrder = scene.Order,
Type = MediaFileType.AudioNarration,
LocalPath = outputPath,
FileSizeBytes = fileInfo.Length,
DurationSeconds = scene.Duration,
MimeType = $"audio/{extension}",
AiProvider = "voicebox"
};
}
}