main
Backend Deploy 🚀 / build-and-deploy (push) Has been cancelled

This commit is contained in:
Harun CAN
2026-05-06 10:48:07 +02:00
parent 2d6f068363
commit a40619ef33
44 changed files with 4295 additions and 126 deletions
+132
View File
@@ -0,0 +1,132 @@
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using SaasMediaWorker.Configuration;
using SaasMediaWorker.Models;
namespace SaasMediaWorker.Services;
/// <summary>
/// Minimax TTS API Client — Metin → Ses dönüşümü.
/// </summary>
public class MinimaxTtsService
{
private readonly HttpClient _httpClient;
private readonly ILogger<MinimaxTtsService> _logger;
private readonly ApiSettings _settings;
public MinimaxTtsService(
HttpClient httpClient,
ILogger<MinimaxTtsService> logger,
IOptions<ApiSettings> settings)
{
_httpClient = httpClient;
_logger = logger;
_settings = settings.Value;
_httpClient.BaseAddress = new Uri("https://api.minimax.chat/v1/");
_httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", _settings.MinimaxApiKey);
_httpClient.Timeout = TimeSpan.FromMinutes(2);
}
/// <summary>
/// Bir sahnenin narration metnini sese çevirir ve dosyaya kaydeder.
/// </summary>
public async Task<GeneratedMediaFile> GenerateNarrationAsync(
ScenePayload scene,
string outputDirectory,
string voiceStyle,
CancellationToken ct)
{
_logger.LogInformation(
"🎙️ Minimax TTS üretimi — Sahne {Order}: \"{Text}\"",
scene.Order,
scene.NarrationText[..Math.Min(60, scene.NarrationText.Length)]);
// Varsayılan voiceStyle kullan veya fallback
var voiceId = string.IsNullOrWhiteSpace(voiceStyle) ? _settings.MinimaxTtsVoiceId : voiceStyle;
var requestBody = new
{
model = "speech-01-turbo",
text = scene.NarrationText,
voice_setting = new
{
voice_id = voiceId,
speed = 1.0,
vol = 1.0,
pitch = 0
},
audio_setting = new
{
sample_rate = 32000,
bitrate = 128000,
format = "mp3",
channel = 1
}
};
var content = new StringContent(
JsonSerializer.Serialize(requestBody),
Encoding.UTF8,
"application/json");
var response = await _httpClient.PostAsync("t2a_v2", content, ct);
response.EnsureSuccessStatusCode();
// Minimax T2A V2 returns JSON with data.audio containing hex string
var responseString = await response.Content.ReadAsStringAsync(ct);
var jsonResponse = JsonSerializer.Deserialize<JsonElement>(responseString);
if (jsonResponse.TryGetProperty("data", out var dataElement) && dataElement.TryGetProperty("audio", out var audioHex))
{
var hexString = audioHex.GetString() ?? "";
byte[] audioBytes = ConvertHexStringToByteArray(hexString);
var outputPath = Path.Combine(outputDirectory, $"scene_{scene.Order:D2}_narration.mp3");
await File.WriteAllBytesAsync(outputPath, audioBytes, ct);
var fileInfo = new FileInfo(outputPath);
_logger.LogInformation(
"Minimax TTS tamamlandı — Sahne {Order}: {Size} bytes",
scene.Order, fileInfo.Length);
return new GeneratedMediaFile
{
SceneId = scene.Id,
SceneOrder = scene.Order,
Type = MediaFileType.AudioNarration,
LocalPath = outputPath,
FileSizeBytes = fileInfo.Length,
DurationSeconds = scene.Duration,
MimeType = "audio/mpeg",
AiProvider = "minimax"
};
}
else
{
throw new Exception("Minimax API response invalid: " + responseString);
}
}
private static byte[] ConvertHexStringToByteArray(string hexString)
{
if (hexString.Length % 2 != 0)
{
throw new ArgumentException("Hex string must have an even length.");
}
byte[] data = new byte[hexString.Length / 2];
for (int index = 0; index < data.Length; index++)
{
string byteValue = hexString.Substring(index * 2, 2);
data[index] = byte.Parse(byteValue, System.Globalization.NumberStyles.HexNumber, System.Globalization.CultureInfo.InvariantCulture);
}
return data;
}
}
+2
View File
@@ -21,6 +21,7 @@ public class RemotionService
List<GeneratedMediaFile> generatedMedia,
string? musicPath,
int targetDurationSeconds,
string visualEffect,
CancellationToken ct)
{
_logger.LogInformation("🎬 Remotion render başlatılıyor — Project: {Id}", projectId);
@@ -32,6 +33,7 @@ public class RemotionService
var props = new
{
musicPath = musicPath,
visualEffect = visualEffect,
scenes = scenes.Select(s => new
{
imagePath = s.ImagePath,
+47 -5
View File
@@ -24,6 +24,7 @@ public class VideoRenderPipeline
private readonly HiggsFieldService _higgsField;
private readonly TtsService _tts;
private readonly OpenAiTtsService _openAiTts;
private readonly MinimaxTtsService _minimaxTts;
private readonly SunoMusicService _sunoMusic;
private readonly AudioCraftService _audioCraft;
private readonly RemotionService _remotion;
@@ -36,6 +37,7 @@ public class VideoRenderPipeline
HiggsFieldService higgsField,
TtsService tts,
OpenAiTtsService openAiTts,
MinimaxTtsService minimaxTts,
SunoMusicService sunoMusic,
AudioCraftService audioCraft,
RemotionService remotion,
@@ -47,6 +49,7 @@ public class VideoRenderPipeline
_higgsField = higgsField;
_tts = tts;
_openAiTts = openAiTts;
_minimaxTts = minimaxTts;
_sunoMusic = sunoMusic;
_audioCraft = audioCraft;
_remotion = remotion;
@@ -176,13 +179,48 @@ public class VideoRenderPipeline
await progressCallback(70, "AMBIENT_GENERATION");
// ═══════════════════════════════════════
// ADIM 5: REMOTION — Video render (Ken Burns + Audio Merge + Subtitles)
// ADIM 5: REMOTION — Segmented Video render + FFmpeg Merge
// ═══════════════════════════════════════
_logger.LogInformation("🎬 Adım 5/6: Remotion render — Ken Burns + audio merge + subtitle");
await progressCallback(75, "MEDIA_MERGE");
_logger.LogInformation("🎬 Adım 5/6: Remotion Segmented Render — Ken Burns + audio merge + subtitle");
await progressCallback(70, "MEDIA_MERGE");
var finalLocalPath = await _remotion.RenderVideoAsync(
job.ProjectId, projectDir, scenes, allMediaFiles, musicFile?.LocalPath, job.TargetDuration, ct);
int chunkSize = 20; // 20 scenes per chunk to prevent OOM
var chunkPaths = new List<string>();
var chunkIndex = 0;
for (int i = 0; i < scenes.Count; i += chunkSize)
{
var chunkScenes = scenes.Skip(i).Take(chunkSize).ToList();
chunkIndex++;
_logger.LogInformation("Render Chunk {ChunkIndex} (Scenes {Start} to {End})",
chunkIndex, i + 1, i + chunkScenes.Count);
// Pass null for musicPath so Remotion doesn't add music to each chunk
var chunkPath = await _remotion.RenderVideoAsync(
$"{job.ProjectId}_chunk_{chunkIndex}",
projectDir,
chunkScenes,
allMediaFiles,
null, // No music per chunk
0,
job.VisualEffect,
ct);
chunkPaths.Add(chunkPath);
var progress = 70 + (int)(20.0 * (i + chunkScenes.Count) / scenes.Count);
await progressCallback(progress, "MEDIA_MERGE");
}
_logger.LogInformation("🎬 Chunklar birleştiriliyor ve müzik ekleniyor (FFmpeg)");
var finalLocalPath = await _ffmpeg.ConcatenateAndFinalize(
chunkPaths,
musicFile?.LocalPath,
projectDir,
job.ProjectId,
job.TargetDuration,
ct);
allMediaFiles.Add(new GeneratedMediaFile
{
@@ -234,6 +272,10 @@ public class VideoRenderPipeline
{
result = await _openAiTts.GenerateNarrationAsync(scene, outputDir, voiceStyle, ct);
}
else if (!string.IsNullOrEmpty(scene.TtsProvider) && scene.TtsProvider.Equals("minimax", StringComparison.OrdinalIgnoreCase))
{
result = await _minimaxTts.GenerateNarrationAsync(scene, outputDir, voiceStyle, ct);
}
else
{
// Default: ElevenLabs