ContentGen_BE/media-worker/Services/AudioCraftService.cs

using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SaasMediaWorker.Configuration;
using SaasMediaWorker.Models;

namespace SaasMediaWorker.Services;

/// <summary>
/// AudioCraft Service — Meta MusicGen + AudioGen entegrasyonu.
///
/// HuggingFace Inference API üzerinden çalışır (self-hosted model gerekmez).
/// Raspberry Pi'da çalıştırılabilir — model inference cloud'da yapılır.
///
/// Yetenekler:
///   MusicGen: Text-to-music (müzik üretimi)
///     - Genre, BPM, enstrüman, mood bazlı prompt
///     - Stereo çıktı desteği
///     - 30 saniyeye kadar üretim
///   AudioGen: Text-to-sound (ses efekti üretimi)
///     - Ortam sesleri: yağmur, rüzgâr, deniz dalgaları
///     - Foley sesleri: ayak sesleri, kapı gıcırtısı
///     - Sahne bazlı ambient ses katmanları
///
/// multimodal-audiocraft skill'inden elde edilen bilgilerle tasarlandı.
/// </summary>
public class AudioCraftService
{
    private readonly HttpClient _httpClient;
    private readonly ILogger<AudioCraftService> _logger;
    private readonly ApiSettings _settings;

    // HuggingFace Inference API endpoints
    private const string MUSICGEN_MODEL = "facebook/musicgen-small";
    private const string AUDIOGEN_MODEL = "facebook/audiogen-medium";
    private const string HF_API_BASE = "https://api-inference.huggingface.co/models";

    public AudioCraftService(
        HttpClient httpClient,
        ILogger<AudioCraftService> logger,
        IOptions<ApiSettings> settings)
    {
        _httpClient = httpClient;
        _logger = logger;
        _settings = settings.Value;

        _httpClient.DefaultRequestHeaders.Add(
            "Authorization", $"Bearer {_settings.HuggingFaceApiKey}");
        _httpClient.Timeout = TimeSpan.FromMinutes(5);
    }

    /// <summary>
    /// MusicGen ile text-to-music üretimi.
    /// Proje için background müzik oluşturur.
    /// </summary>
    public async Task<GeneratedMediaFile> GenerateMusicAsync(
        string musicPrompt,
        MusicTechnicalParams? technicalParams,
        int targetDurationSeconds,
        string outputDirectory,
        CancellationToken ct)
    {
        // Prompt'u teknik parametrelerle zenginleştir
        var enrichedPrompt = EnrichMusicPrompt(musicPrompt, technicalParams);

        _logger.LogInformation(
            "🎵 MusicGen müzik üretimi — Prompt: \"{Prompt}\", Süre: {Duration}s",
            enrichedPrompt[..Math.Min(100, enrichedPrompt.Length)],
            targetDurationSeconds);

        var audioBytes = await CallHuggingFaceInference(
            MUSICGEN_MODEL, enrichedPrompt, ct);

        if (audioBytes == null || audioBytes.Length == 0)
        {
            throw new InvalidOperationException("MusicGen boş yanıt döndü");
        }

        // WAV dosyasını kaydet
        var outputPath = Path.Combine(outputDirectory, "background_music_audiocraft.wav");
        await File.WriteAllBytesAsync(outputPath, audioBytes, ct);

        var fileInfo = new FileInfo(outputPath);
        _logger.LogInformation(
            "✅ MusicGen müzik üretildi: {Path} ({Size:N0} bytes)",
            outputPath, fileInfo.Length);

        return new GeneratedMediaFile
        {
            SceneId = string.Empty,
            SceneOrder = 0,
            Type = MediaFileType.AudioMusic,
            LocalPath = outputPath,
            FileSizeBytes = fileInfo.Length,
            DurationSeconds = targetDurationSeconds,
            MimeType = "audio/wav",
            AiProvider = "audiocraft-musicgen"
        };
    }

    /// <summary>
    /// AudioGen ile sahne bazlı ambient ses efekti üretimi.
    /// Her sahne için farklı bir ortam sesi oluşturulabilir.
    /// </summary>
    public async Task<GeneratedMediaFile?> GenerateAmbientSoundAsync(
        string ambientPrompt,
        int sceneOrder,
        double durationSeconds,
        string outputDirectory,
        CancellationToken ct)
    {
        if (string.IsNullOrWhiteSpace(ambientPrompt))
            return null;

        _logger.LogInformation(
            "🔊 AudioGen ses efekti — Sahne: {Order}, Prompt: \"{Prompt}\"",
            sceneOrder, ambientPrompt[..Math.Min(80, ambientPrompt.Length)]);

        try
        {
            var audioBytes = await CallHuggingFaceInference(
                AUDIOGEN_MODEL, ambientPrompt, ct);

            if (audioBytes == null || audioBytes.Length == 0)
            {
                _logger.LogWarning("AudioGen boş yanıt — sahne {Order} için ambient atlanıyor", sceneOrder);
                return null;
            }

            var fileName = $"ambient_scene_{sceneOrder:D3}.wav";
            var outputPath = Path.Combine(outputDirectory, fileName);
            await File.WriteAllBytesAsync(outputPath, audioBytes, ct);

            _logger.LogInformation(
                "✅ Ambient ses üretildi: {FileName} ({Size:N0} bytes)",
                fileName, audioBytes.Length);

            return new GeneratedMediaFile
            {
                SceneId = string.Empty,
                SceneOrder = sceneOrder,
                Type = MediaFileType.AudioAmbient,
                LocalPath = outputPath,
                FileSizeBytes = audioBytes.Length,
                DurationSeconds = durationSeconds,
                MimeType = "audio/wav",
                AiProvider = "audiocraft-audiogen"
            };
        }
        catch (Exception ex)
        {
            _logger.LogWarning(ex,
                "Ambient ses üretimi başarısız (sahne {Order}) — devam ediliyor", sceneOrder);
            return null;
        }
    }

    /// <summary>
    /// Projenin tüm sahneleri için batch ambient ses üretimi.
    /// Paralel çalışır — Raspberry Pi'da bellek dostu.
    /// </summary>
    public async Task<List<GeneratedMediaFile>> GenerateAllAmbientSoundsAsync(
        List<ScenePayload> scenes,
        string outputDirectory,
        CancellationToken ct)
    {
        var results = new List<GeneratedMediaFile>();

        // Sıralı üret (HuggingFace rate limit + Pi bellek optimizasyonu)
        foreach (var scene in scenes)
        {
            var ambientPrompt = scene.AmbientSoundPrompt;
            if (string.IsNullOrWhiteSpace(ambientPrompt))
                continue;

            var result = await GenerateAmbientSoundAsync(
                ambientPrompt, scene.Order, scene.Duration,
                outputDirectory, ct);

            if (result != null)
                results.Add(result);

            // Rate limit koruma — 1 saniye bekle
            await Task.Delay(1000, ct);
        }

        _logger.LogInformation(
            "🔊 Toplam {Count} sahne için ambient ses üretildi", results.Count);

        return results;
    }

    // ── Private: HuggingFace Inference API çağrısı ──────────────────

    private async Task<byte[]?> CallHuggingFaceInference(
        string modelId, string prompt, CancellationToken ct)
    {
        var url = $"{HF_API_BASE}/{modelId}";

        var payload = new { inputs = prompt };
        var content = new StringContent(
            JsonSerializer.Serialize(payload),
            Encoding.UTF8,
            "application/json");

        var maxRetries = 3;
        for (var attempt = 1; attempt <= maxRetries; attempt++)
        {
            try
            {
                var response = await _httpClient.PostAsync(url, content, ct);

                // Model yükleniyor (cold start)
                if (response.StatusCode == System.Net.HttpStatusCode.ServiceUnavailable ||
                    (int)response.StatusCode == 503)
                {
                    var json = await response.Content.ReadAsStringAsync(ct);
                    _logger.LogInformation(
                        "Model yükleniyor ({Model}), deneme {Attempt}/{Max}...",
                        modelId, attempt, maxRetries);

                    // Model yüklenme süresi bekleme
                    var waitTime = ExtractEstimatedTime(json);
                    await Task.Delay(TimeSpan.FromSeconds(waitTime), ct);
                    continue;
                }

                response.EnsureSuccessStatusCode();

                // Audio binary yanıt
                return await response.Content.ReadAsByteArrayAsync(ct);
            }
            catch (HttpRequestException ex) when (attempt < maxRetries)
            {
                _logger.LogWarning(ex,
                    "HuggingFace API hatası, deneme {Attempt}/{Max}", attempt, maxRetries);
                await Task.Delay(3000 * attempt, ct);
            }
        }

        throw new InvalidOperationException(
            $"HuggingFace API {maxRetries} deneme sonrası başarısız — Model: {modelId}");
    }

    /// <summary>
    /// MusicGen prompt'unu teknik parametrelerle zenginleştirir.
    /// AudioCraft skill'den öğrenilen best practice'lere göre optimize eder.
    /// </summary>
    private string EnrichMusicPrompt(string basePrompt, MusicTechnicalParams? technical)
    {
        if (technical == null)
            return basePrompt;

        var enriched = basePrompt;

        // BPM ekle (prompt'ta yoksa)
        if (!enriched.Contains("BPM", StringComparison.OrdinalIgnoreCase) && technical.Bpm > 0)
        {
            enriched += $", {technical.Bpm} BPM";
        }

        // Key ekle
        if (!string.IsNullOrEmpty(technical.Key) &&
            !enriched.Contains(technical.Key, StringComparison.OrdinalIgnoreCase))
        {
            enriched += $", {technical.Key}";
        }

        // Emotional arc ekle
        if (!string.IsNullOrEmpty(technical.EmotionalArc))
        {
            enriched += $", {technical.EmotionalArc.Replace("-", " ")} energy";
        }

        return enriched;
    }

    private int ExtractEstimatedTime(string json)
    {
        try
        {
            var doc = JsonSerializer.Deserialize<JsonElement>(json);
            if (doc.TryGetProperty("estimated_time", out var time))
                return Math.Max(10, (int)time.GetDouble());
        }
        catch { }

        return 20; // Default: 20 saniye bekle
    }
}

/// <summary>
/// MusicGen teknik parametreleri — AI senaryo çıktısından parse edilir.
/// </summary>
public class MusicTechnicalParams
{
    public int Bpm { get; set; }
    public string? Key { get; set; }
    public List<string> Instruments { get; set; } = new();
    public string EmotionalArc { get; set; } = string.Empty;
}