Files
ContentGen_BE/media-worker/Services/AudioCraftService.cs
Harun CAN 85c35c73e8
Some checks failed
Backend Deploy 🚀 / build-and-deploy (push) Has been cancelled
main
2026-03-29 12:43:49 +03:00

303 lines
10 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SaasMediaWorker.Configuration;
using SaasMediaWorker.Models;
namespace SaasMediaWorker.Services;
/// <summary>
/// AudioCraft Service — Meta MusicGen + AudioGen entegrasyonu.
///
/// HuggingFace Inference API üzerinden çalışır (self-hosted model gerekmez).
/// Raspberry Pi'da çalıştırılabilir — model inference cloud'da yapılır.
///
/// Yetenekler:
/// MusicGen: Text-to-music (müzik üretimi)
/// - Genre, BPM, enstrüman, mood bazlı prompt
/// - Stereo çıktı desteği
/// - 30 saniyeye kadar üretim
/// AudioGen: Text-to-sound (ses efekti üretimi)
/// - Ortam sesleri: yağmur, rüzgâr, deniz dalgaları
/// - Foley sesleri: ayak sesleri, kapı gıcırtısı
/// - Sahne bazlı ambient ses katmanları
///
/// multimodal-audiocraft skill'inden elde edilen bilgilerle tasarlandı.
/// </summary>
public class AudioCraftService
{
private readonly HttpClient _httpClient;
private readonly ILogger<AudioCraftService> _logger;
private readonly ApiSettings _settings;
// HuggingFace Inference API endpoints
private const string MUSICGEN_MODEL = "facebook/musicgen-small";
private const string AUDIOGEN_MODEL = "facebook/audiogen-medium";
private const string HF_API_BASE = "https://api-inference.huggingface.co/models";
public AudioCraftService(
HttpClient httpClient,
ILogger<AudioCraftService> logger,
IOptions<ApiSettings> settings)
{
_httpClient = httpClient;
_logger = logger;
_settings = settings.Value;
_httpClient.DefaultRequestHeaders.Add(
"Authorization", $"Bearer {_settings.HuggingFaceApiKey}");
_httpClient.Timeout = TimeSpan.FromMinutes(5);
}
/// <summary>
/// MusicGen ile text-to-music üretimi.
/// Proje için background müzik oluşturur.
/// </summary>
public async Task<GeneratedMediaFile> GenerateMusicAsync(
string musicPrompt,
MusicTechnicalParams? technicalParams,
int targetDurationSeconds,
string outputDirectory,
CancellationToken ct)
{
// Prompt'u teknik parametrelerle zenginleştir
var enrichedPrompt = EnrichMusicPrompt(musicPrompt, technicalParams);
_logger.LogInformation(
"🎵 MusicGen müzik üretimi — Prompt: \"{Prompt}\", Süre: {Duration}s",
enrichedPrompt[..Math.Min(100, enrichedPrompt.Length)],
targetDurationSeconds);
var audioBytes = await CallHuggingFaceInference(
MUSICGEN_MODEL, enrichedPrompt, ct);
if (audioBytes == null || audioBytes.Length == 0)
{
throw new InvalidOperationException("MusicGen boş yanıt döndü");
}
// WAV dosyasını kaydet
var outputPath = Path.Combine(outputDirectory, "background_music_audiocraft.wav");
await File.WriteAllBytesAsync(outputPath, audioBytes, ct);
var fileInfo = new FileInfo(outputPath);
_logger.LogInformation(
"✅ MusicGen müzik üretildi: {Path} ({Size:N0} bytes)",
outputPath, fileInfo.Length);
return new GeneratedMediaFile
{
SceneId = string.Empty,
SceneOrder = 0,
Type = MediaFileType.AudioMusic,
LocalPath = outputPath,
FileSizeBytes = fileInfo.Length,
DurationSeconds = targetDurationSeconds,
MimeType = "audio/wav",
AiProvider = "audiocraft-musicgen"
};
}
/// <summary>
/// AudioGen ile sahne bazlı ambient ses efekti üretimi.
/// Her sahne için farklı bir ortam sesi oluşturulabilir.
/// </summary>
public async Task<GeneratedMediaFile?> GenerateAmbientSoundAsync(
string ambientPrompt,
int sceneOrder,
double durationSeconds,
string outputDirectory,
CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(ambientPrompt))
return null;
_logger.LogInformation(
"🔊 AudioGen ses efekti — Sahne: {Order}, Prompt: \"{Prompt}\"",
sceneOrder, ambientPrompt[..Math.Min(80, ambientPrompt.Length)]);
try
{
var audioBytes = await CallHuggingFaceInference(
AUDIOGEN_MODEL, ambientPrompt, ct);
if (audioBytes == null || audioBytes.Length == 0)
{
_logger.LogWarning("AudioGen boş yanıt — sahne {Order} için ambient atlanıyor", sceneOrder);
return null;
}
var fileName = $"ambient_scene_{sceneOrder:D3}.wav";
var outputPath = Path.Combine(outputDirectory, fileName);
await File.WriteAllBytesAsync(outputPath, audioBytes, ct);
_logger.LogInformation(
"✅ Ambient ses üretildi: {FileName} ({Size:N0} bytes)",
fileName, audioBytes.Length);
return new GeneratedMediaFile
{
SceneId = string.Empty,
SceneOrder = sceneOrder,
Type = MediaFileType.AudioAmbient,
LocalPath = outputPath,
FileSizeBytes = audioBytes.Length,
DurationSeconds = durationSeconds,
MimeType = "audio/wav",
AiProvider = "audiocraft-audiogen"
};
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"Ambient ses üretimi başarısız (sahne {Order}) — devam ediliyor", sceneOrder);
return null;
}
}
/// <summary>
/// Projenin tüm sahneleri için batch ambient ses üretimi.
/// Paralel çalışır — Raspberry Pi'da bellek dostu.
/// </summary>
public async Task<List<GeneratedMediaFile>> GenerateAllAmbientSoundsAsync(
List<ScenePayload> scenes,
string outputDirectory,
CancellationToken ct)
{
var results = new List<GeneratedMediaFile>();
// Sıralı üret (HuggingFace rate limit + Pi bellek optimizasyonu)
foreach (var scene in scenes)
{
var ambientPrompt = scene.AmbientSoundPrompt;
if (string.IsNullOrWhiteSpace(ambientPrompt))
continue;
var result = await GenerateAmbientSoundAsync(
ambientPrompt, scene.Order, scene.Duration,
outputDirectory, ct);
if (result != null)
results.Add(result);
// Rate limit koruma — 1 saniye bekle
await Task.Delay(1000, ct);
}
_logger.LogInformation(
"🔊 Toplam {Count} sahne için ambient ses üretildi", results.Count);
return results;
}
// ── Private: HuggingFace Inference API çağrısı ──────────────────
private async Task<byte[]?> CallHuggingFaceInference(
string modelId, string prompt, CancellationToken ct)
{
var url = $"{HF_API_BASE}/{modelId}";
var payload = new { inputs = prompt };
var content = new StringContent(
JsonSerializer.Serialize(payload),
Encoding.UTF8,
"application/json");
var maxRetries = 3;
for (var attempt = 1; attempt <= maxRetries; attempt++)
{
try
{
var response = await _httpClient.PostAsync(url, content, ct);
// Model yükleniyor (cold start)
if (response.StatusCode == System.Net.HttpStatusCode.ServiceUnavailable ||
(int)response.StatusCode == 503)
{
var json = await response.Content.ReadAsStringAsync(ct);
_logger.LogInformation(
"Model yükleniyor ({Model}), deneme {Attempt}/{Max}...",
modelId, attempt, maxRetries);
// Model yüklenme süresi bekleme
var waitTime = ExtractEstimatedTime(json);
await Task.Delay(TimeSpan.FromSeconds(waitTime), ct);
continue;
}
response.EnsureSuccessStatusCode();
// Audio binary yanıt
return await response.Content.ReadAsByteArrayAsync(ct);
}
catch (HttpRequestException ex) when (attempt < maxRetries)
{
_logger.LogWarning(ex,
"HuggingFace API hatası, deneme {Attempt}/{Max}", attempt, maxRetries);
await Task.Delay(3000 * attempt, ct);
}
}
throw new InvalidOperationException(
$"HuggingFace API {maxRetries} deneme sonrası başarısız — Model: {modelId}");
}
/// <summary>
/// MusicGen prompt'unu teknik parametrelerle zenginleştirir.
/// AudioCraft skill'den öğrenilen best practice'lere göre optimize eder.
/// </summary>
private string EnrichMusicPrompt(string basePrompt, MusicTechnicalParams? technical)
{
if (technical == null)
return basePrompt;
var enriched = basePrompt;
// BPM ekle (prompt'ta yoksa)
if (!enriched.Contains("BPM", StringComparison.OrdinalIgnoreCase) && technical.Bpm > 0)
{
enriched += $", {technical.Bpm} BPM";
}
// Key ekle
if (!string.IsNullOrEmpty(technical.Key) &&
!enriched.Contains(technical.Key, StringComparison.OrdinalIgnoreCase))
{
enriched += $", {technical.Key}";
}
// Emotional arc ekle
if (!string.IsNullOrEmpty(technical.EmotionalArc))
{
enriched += $", {technical.EmotionalArc.Replace("-", " ")} energy";
}
return enriched;
}
private int ExtractEstimatedTime(string json)
{
try
{
var doc = JsonSerializer.Deserialize<JsonElement>(json);
if (doc.TryGetProperty("estimated_time", out var time))
return Math.Max(10, (int)time.GetDouble());
}
catch { }
return 20; // Default: 20 saniye bekle
}
}
/// <summary>
/// MusicGen teknik parametreleri — AI senaryo çıktısından parse edilir.
/// </summary>
public class MusicTechnicalParams
{
public int Bpm { get; set; }
public string? Key { get; set; }
public List<string> Instruments { get; set; } = new();
public string EmotionalArc { get; set; } = string.Empty;
}