main
Some checks failed
Backend Deploy 🚀 / build-and-deploy (push) Has been cancelled

This commit is contained in:
Harun CAN
2026-03-29 12:43:49 +03:00
parent 829413f05d
commit 85c35c73e8
41 changed files with 6127 additions and 36 deletions

View File

@@ -0,0 +1,302 @@
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SaasMediaWorker.Configuration;
using SaasMediaWorker.Models;
namespace SaasMediaWorker.Services;
/// <summary>
/// AudioCraft Service — Meta MusicGen + AudioGen entegrasyonu.
///
/// HuggingFace Inference API üzerinden çalışır (self-hosted model gerekmez).
/// Raspberry Pi'da çalıştırılabilir — model inference cloud'da yapılır.
///
/// Yetenekler:
/// MusicGen: Text-to-music (müzik üretimi)
/// - Genre, BPM, enstrüman, mood bazlı prompt
/// - Stereo çıktı desteği
/// - 30 saniyeye kadar üretim
/// AudioGen: Text-to-sound (ses efekti üretimi)
/// - Ortam sesleri: yağmur, rüzgâr, deniz dalgaları
/// - Foley sesleri: ayak sesleri, kapı gıcırtısı
/// - Sahne bazlı ambient ses katmanları
///
/// multimodal-audiocraft skill'inden elde edilen bilgilerle tasarlandı.
/// </summary>
public class AudioCraftService
{
private readonly HttpClient _httpClient;
private readonly ILogger<AudioCraftService> _logger;
private readonly ApiSettings _settings;
// HuggingFace Inference API endpoints
private const string MUSICGEN_MODEL = "facebook/musicgen-small";
private const string AUDIOGEN_MODEL = "facebook/audiogen-medium";
private const string HF_API_BASE = "https://api-inference.huggingface.co/models";
public AudioCraftService(
HttpClient httpClient,
ILogger<AudioCraftService> logger,
IOptions<ApiSettings> settings)
{
_httpClient = httpClient;
_logger = logger;
_settings = settings.Value;
_httpClient.DefaultRequestHeaders.Add(
"Authorization", $"Bearer {_settings.HuggingFaceApiKey}");
_httpClient.Timeout = TimeSpan.FromMinutes(5);
}
/// <summary>
/// MusicGen ile text-to-music üretimi.
/// Proje için background müzik oluşturur.
/// </summary>
public async Task<GeneratedMediaFile> GenerateMusicAsync(
string musicPrompt,
MusicTechnicalParams? technicalParams,
int targetDurationSeconds,
string outputDirectory,
CancellationToken ct)
{
// Prompt'u teknik parametrelerle zenginleştir
var enrichedPrompt = EnrichMusicPrompt(musicPrompt, technicalParams);
_logger.LogInformation(
"🎵 MusicGen müzik üretimi — Prompt: \"{Prompt}\", Süre: {Duration}s",
enrichedPrompt[..Math.Min(100, enrichedPrompt.Length)],
targetDurationSeconds);
var audioBytes = await CallHuggingFaceInference(
MUSICGEN_MODEL, enrichedPrompt, ct);
if (audioBytes == null || audioBytes.Length == 0)
{
throw new InvalidOperationException("MusicGen boş yanıt döndü");
}
// WAV dosyasını kaydet
var outputPath = Path.Combine(outputDirectory, "background_music_audiocraft.wav");
await File.WriteAllBytesAsync(outputPath, audioBytes, ct);
var fileInfo = new FileInfo(outputPath);
_logger.LogInformation(
"✅ MusicGen müzik üretildi: {Path} ({Size:N0} bytes)",
outputPath, fileInfo.Length);
return new GeneratedMediaFile
{
SceneId = string.Empty,
SceneOrder = 0,
Type = MediaFileType.AudioMusic,
LocalPath = outputPath,
FileSizeBytes = fileInfo.Length,
DurationSeconds = targetDurationSeconds,
MimeType = "audio/wav",
AiProvider = "audiocraft-musicgen"
};
}
/// <summary>
/// AudioGen ile sahne bazlı ambient ses efekti üretimi.
/// Her sahne için farklı bir ortam sesi oluşturulabilir.
/// </summary>
public async Task<GeneratedMediaFile?> GenerateAmbientSoundAsync(
string ambientPrompt,
int sceneOrder,
double durationSeconds,
string outputDirectory,
CancellationToken ct)
{
if (string.IsNullOrWhiteSpace(ambientPrompt))
return null;
_logger.LogInformation(
"🔊 AudioGen ses efekti — Sahne: {Order}, Prompt: \"{Prompt}\"",
sceneOrder, ambientPrompt[..Math.Min(80, ambientPrompt.Length)]);
try
{
var audioBytes = await CallHuggingFaceInference(
AUDIOGEN_MODEL, ambientPrompt, ct);
if (audioBytes == null || audioBytes.Length == 0)
{
_logger.LogWarning("AudioGen boş yanıt — sahne {Order} için ambient atlanıyor", sceneOrder);
return null;
}
var fileName = $"ambient_scene_{sceneOrder:D3}.wav";
var outputPath = Path.Combine(outputDirectory, fileName);
await File.WriteAllBytesAsync(outputPath, audioBytes, ct);
_logger.LogInformation(
"✅ Ambient ses üretildi: {FileName} ({Size:N0} bytes)",
fileName, audioBytes.Length);
return new GeneratedMediaFile
{
SceneId = string.Empty,
SceneOrder = sceneOrder,
Type = MediaFileType.AudioAmbient,
LocalPath = outputPath,
FileSizeBytes = audioBytes.Length,
DurationSeconds = durationSeconds,
MimeType = "audio/wav",
AiProvider = "audiocraft-audiogen"
};
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"Ambient ses üretimi başarısız (sahne {Order}) — devam ediliyor", sceneOrder);
return null;
}
}
/// <summary>
/// Projenin tüm sahneleri için batch ambient ses üretimi.
/// Paralel çalışır — Raspberry Pi'da bellek dostu.
/// </summary>
public async Task<List<GeneratedMediaFile>> GenerateAllAmbientSoundsAsync(
List<ScenePayload> scenes,
string outputDirectory,
CancellationToken ct)
{
var results = new List<GeneratedMediaFile>();
// Sıralı üret (HuggingFace rate limit + Pi bellek optimizasyonu)
foreach (var scene in scenes)
{
var ambientPrompt = scene.AmbientSoundPrompt;
if (string.IsNullOrWhiteSpace(ambientPrompt))
continue;
var result = await GenerateAmbientSoundAsync(
ambientPrompt, scene.Order, scene.Duration,
outputDirectory, ct);
if (result != null)
results.Add(result);
// Rate limit koruma — 1 saniye bekle
await Task.Delay(1000, ct);
}
_logger.LogInformation(
"🔊 Toplam {Count} sahne için ambient ses üretildi", results.Count);
return results;
}
// ── Private: HuggingFace Inference API çağrısı ──────────────────
private async Task<byte[]?> CallHuggingFaceInference(
string modelId, string prompt, CancellationToken ct)
{
var url = $"{HF_API_BASE}/{modelId}";
var payload = new { inputs = prompt };
var content = new StringContent(
JsonSerializer.Serialize(payload),
Encoding.UTF8,
"application/json");
var maxRetries = 3;
for (var attempt = 1; attempt <= maxRetries; attempt++)
{
try
{
var response = await _httpClient.PostAsync(url, content, ct);
// Model yükleniyor (cold start)
if (response.StatusCode == System.Net.HttpStatusCode.ServiceUnavailable ||
(int)response.StatusCode == 503)
{
var json = await response.Content.ReadAsStringAsync(ct);
_logger.LogInformation(
"Model yükleniyor ({Model}), deneme {Attempt}/{Max}...",
modelId, attempt, maxRetries);
// Model yüklenme süresi bekleme
var waitTime = ExtractEstimatedTime(json);
await Task.Delay(TimeSpan.FromSeconds(waitTime), ct);
continue;
}
response.EnsureSuccessStatusCode();
// Audio binary yanıt
return await response.Content.ReadAsByteArrayAsync(ct);
}
catch (HttpRequestException ex) when (attempt < maxRetries)
{
_logger.LogWarning(ex,
"HuggingFace API hatası, deneme {Attempt}/{Max}", attempt, maxRetries);
await Task.Delay(3000 * attempt, ct);
}
}
throw new InvalidOperationException(
$"HuggingFace API {maxRetries} deneme sonrası başarısız — Model: {modelId}");
}
/// <summary>
/// MusicGen prompt'unu teknik parametrelerle zenginleştirir.
/// AudioCraft skill'den öğrenilen best practice'lere göre optimize eder.
/// </summary>
private string EnrichMusicPrompt(string basePrompt, MusicTechnicalParams? technical)
{
if (technical == null)
return basePrompt;
var enriched = basePrompt;
// BPM ekle (prompt'ta yoksa)
if (!enriched.Contains("BPM", StringComparison.OrdinalIgnoreCase) && technical.Bpm > 0)
{
enriched += $", {technical.Bpm} BPM";
}
// Key ekle
if (!string.IsNullOrEmpty(technical.Key) &&
!enriched.Contains(technical.Key, StringComparison.OrdinalIgnoreCase))
{
enriched += $", {technical.Key}";
}
// Emotional arc ekle
if (!string.IsNullOrEmpty(technical.EmotionalArc))
{
enriched += $", {technical.EmotionalArc.Replace("-", " ")} energy";
}
return enriched;
}
private int ExtractEstimatedTime(string json)
{
try
{
var doc = JsonSerializer.Deserialize<JsonElement>(json);
if (doc.TryGetProperty("estimated_time", out var time))
return Math.Max(10, (int)time.GetDouble());
}
catch { }
return 20; // Default: 20 saniye bekle
}
}
/// <summary>
/// MusicGen teknik parametreleri — AI senaryo çıktısından parse edilir.
/// </summary>
public class MusicTechnicalParams
{
public int Bpm { get; set; }
public string? Key { get; set; }
public List<string> Instruments { get; set; } = new();
public string EmotionalArc { get; set; } = string.Empty;
}