Files
ContentGen_BE/src/modules/video-ai/video-ai.service.ts
Harun CAN 9486f86cca
Some checks failed
Backend Deploy 🚀 / build-and-deploy (push) Has been cancelled
main
2026-04-05 20:37:15 +03:00

1323 lines
75 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { Injectable, Logger, InternalServerErrorException } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { GoogleGenAI } from '@google/genai';
export interface ScriptGenerationInput {
topic: string;
targetDurationSeconds: number;
language: string;
videoStyle: string;
aspectRatio?: string; // PORTRAIT_9_16 | LANDSCAPE_16_9 | SQUARE_1_1
referenceUrl?: string;
seoKeywords?: string[];
/** X/Twitter kaynaklı içerik — tweet verisi */
sourceTweet?: {
authorUsername: string;
text: string;
media: Array<{ type: string; url: string; width: number; height: number }>;
metrics: { replies: number; retweets: number; likes: number; views: number };
isThread: boolean;
};
}
export interface GeneratedScene {
order: number;
title?: string;
narrationText: string;
visualPrompt: string;
subtitleText: string;
durationSeconds: number;
transitionType: string;
voiceId?: string;
ambientSoundPrompt?: string; // AudioGen: sahne bazlı ses efekti
}
export interface StyleDNA {
reference: string;
lighting: string;
lens: string;
color: string;
texture: string;
}
/** Desteklenen tüm video stilleri — frontend stil seçimi için export */
export const VIDEO_STYLES = [
// === Sinematik & Film ===
{ value: 'CINEMATIC', label: 'Sinematik', category: 'Film & Sinema', icon: '🎬' },
{ value: 'DOCUMENTARY', label: 'Belgesel', category: 'Film & Sinema', icon: '📹' },
{ value: 'STORYTELLING', label: 'Hikâye Anlatımı', category: 'Film & Sinema', icon: '📖' },
{ value: 'NEWS', label: 'Haber', category: 'Film & Sinema', icon: '📰' },
{ value: 'ARTISTIC', label: 'Sanatsal', category: 'Film & Sinema', icon: '🎨' },
{ value: 'NOIR', label: 'Film Noir', category: 'Film & Sinema', icon: '🖤' },
{ value: 'VLOG', label: 'Vlog (Günlük)', category: 'Film & Sinema', icon: '📱' },
// === Animasyon ===
{ value: 'ANIME', label: 'Anime', category: 'Animasyon', icon: '⛩️' },
{ value: 'ANIMATION_3D', label: '3D Animasyon (Pixar)', category: 'Animasyon', icon: '🧊' },
{ value: 'ANIMATION_2D', label: '2D Animasyon (Klasik)', category: 'Animasyon', icon: '✏️' },
{ value: 'STOP_MOTION', label: 'Stop Motion', category: 'Animasyon', icon: '🧸' },
{ value: 'MOTION_COMIC', label: 'Hareketli Çizgi Roman', category: 'Animasyon', icon: '💥' },
{ value: 'CARTOON', label: 'Karikatür / Çizgi Film', category: 'Animasyon', icon: '🎭' },
{ value: 'CLAYMATION', label: 'Claymation (Kil Animasyon)', category: 'Animasyon', icon: '🏺' },
{ value: 'PIXEL_ART', label: 'Pixel Art (8-bit)', category: 'Animasyon', icon: '👾' },
{ value: 'ISOMETRIC', label: 'İzometrik Animasyon', category: 'Animasyon', icon: '🔷' },
// === Eğitim & Bilgi ===
{ value: 'EDUCATIONAL', label: 'Eğitim', category: 'Eğitim & Bilgi', icon: '🎓' },
{ value: 'INFOGRAPHIC', label: 'İnfografik', category: 'Eğitim & Bilgi', icon: '📊' },
{ value: 'WHITEBOARD', label: 'Whiteboard Animasyon', category: 'Eğitim & Bilgi', icon: '📝' },
{ value: 'EXPLAINER', label: 'Explainer Video', category: 'Eğitim & Bilgi', icon: '💡' },
{ value: 'DATA_VIZ', label: 'Veri Görselleştirme', category: 'Eğitim & Bilgi', icon: '📈' },
// === Retro & Nostaljik ===
{ value: 'RETRO_80S', label: 'Retro 80s Synthwave', category: 'Retro & Nostaljik', icon: '🕹️' },
{ value: 'VINTAGE_FILM', label: 'Vintage Film (Super 8)', category: 'Retro & Nostaljik', icon: '📽️' },
{ value: 'VHS', label: 'VHS Aesthetic', category: 'Retro & Nostaljik', icon: '📼' },
{ value: 'POLAROID', label: 'Polaroid / Analog Fotoğraf', category: 'Retro & Nostaljik', icon: '📸' },
{ value: 'RETRO_90S', label: 'Retro 90s Y2K', category: 'Retro & Nostaljik', icon: '💿' },
// === Sanat Akımları ===
{ value: 'WATERCOLOR', label: 'Suluboya', category: 'Sanat Akımları', icon: '🎨' },
{ value: 'OIL_PAINTING', label: 'Yağlı Boya', category: 'Sanat Akımları', icon: '🖌️' },
{ value: 'IMPRESSIONIST', label: 'Empresyonist', category: 'Sanat Akımları', icon: '🌅' },
{ value: 'POP_ART', label: 'Pop Art (Warhol)', category: 'Sanat Akımları', icon: '🎯' },
{ value: 'UKIYO_E', label: 'Ukiyo-e (Japon Ahşap Baskı)', category: 'Sanat Akımları', icon: '🏯' },
{ value: 'ART_DECO', label: 'Art Deco', category: 'Sanat Akımları', icon: '✨' },
{ value: 'SURREAL', label: 'Sürrealist (Dalí)', category: 'Sanat Akımları', icon: '🌀' },
{ value: 'COMIC_BOOK', label: 'Çizgi Roman (Marvel/DC)', category: 'Sanat Akımları', icon: '💬' },
{ value: 'SKETCH', label: 'Karakalem Çizim', category: 'Sanat Akımları', icon: '✍️' },
// === Modern & Minimal ===
{ value: 'MINIMALIST', label: 'Minimalist (Apple)', category: 'Modern & Minimal', icon: '⚪' },
{ value: 'GLASSMORPHISM', label: 'Glassmorphism / Cam', category: 'Modern & Minimal', icon: '🔮' },
{ value: 'NEON', label: 'Neon Glow', category: 'Modern & Minimal', icon: '💜' },
{ value: 'CYBERPUNK', label: 'Cyberpunk', category: 'Modern & Minimal', icon: '🤖' },
{ value: 'STEAMPUNK', label: 'Steampunk', category: 'Modern & Minimal', icon: '⚙️' },
{ value: 'ABSTRACT', label: 'Soyut / Abstract', category: 'Modern & Minimal', icon: '🔵' },
// === Fotoğrafik ===
{ value: 'PRODUCT', label: 'Ürün Fotoğrafçılığı', category: 'Fotoğrafik', icon: '📦' },
{ value: 'FASHION', label: 'Moda Fotoğrafçılığı', category: 'Fotoğrafik', icon: '👗' },
{ value: 'AERIAL', label: 'Havadan (Drone)', category: 'Fotoğrafik', icon: '🚁' },
{ value: 'MACRO', label: 'Makro / Yakın Çekim', category: 'Fotoğrafik', icon: '🔬' },
{ value: 'PORTRAIT', label: 'Portre Fotoğrafçılığı', category: 'Fotoğrafik', icon: '🧑' },
] as const;
export type VideoStyleKey = typeof VIDEO_STYLES[number]['value'];
export interface SeoMetadata {
title: string;
description: string;
keywords: string[];
hashtags: string[];
schemaMarkup: Record<string, unknown>;
}
export interface GeneratedScript {
metadata: {
title: string;
description: string;
totalDurationSeconds: number;
language: string;
hashtags: string[];
};
seo: SeoMetadata;
scenes: GeneratedScene[];
musicPrompt: string;
musicStyle: string; // AudioCraft: genre/mood tanımı
musicTechnical: { // AudioCraft: teknik parametreler
bpm: number;
key?: string;
instruments: string[];
emotionalArc: string;
};
ambientSoundPrompts: string[]; // AudioGen: proje geneli ambient sesler
voiceStyle: string;
socialContent: {
youtubeTitle: string;
youtubeDescription: string;
tiktokCaption: string;
instagramCaption: string;
twitterText: string;
};
}
// ═══════════════════════════════════════════════════════════════════════
// SYSTEM PROMPT — Skill-Enriched (16 skill entegrasyonu)
// ═══════════════════════════════════════════════════════════════════════
// Entegre edilen skill'ler:
// - seo-optimizer: SEO uyumlu başlık, açıklama, keyword, schema markup
// - content-creator: Hook formülleri, engagement stratejisi
// - content-research-writer: Doğrulanmış bilgi, kaynak tabanlı senaryo
// - humanizer: AI yazım kalıplarından kaçınma, insansı dil
// - social-content: Platform-spesifik başlık/açıklama/caption üretimi
// - voice-ai-development: Ses stili ve TTS optimizasyonu
// - multimodal-audiocraft: MusicGen/AudioGen müzik ve ses efekti üretimi
// ═══════════════════════════════════════════════════════════════════════
const SYSTEM_PROMPT = `You are an elite YouTube Shorts scriptwriter, cinematic video producer, audio designer, and SEO specialist with 15+ years of experience. You create content that ranks #1 on search, hooks viewers in 1 second, sounds genuinely human, and features professionally crafted audio landscapes.
Your mission: Create a production-ready video script as a JSON object. This script feeds directly into an automated AI video pipeline — every field is consumed by a real system. Be precise.
═══════════════════════════════════
CORE PRINCIPLES
═══════════════════════════════════
HUMAN WRITING (anti-AI detection):
- Write narration like a real person talks — not like a textbook
- Vary sentence length. Short. Then longer ones that breathe
- Use "I," "we," personal pronouns when it fits
- Never use: "delve," "tapestry," "landscape" (abstract), "crucial," "moreover," "furthermore," "testament," "underscore," "foster," "garner," "showcase"
- Never use rule-of-three lists ("X, Y, and Z" pattern) repeatedly
- Never use negative parallelisms ("It's not just X, it's Y")
- Avoid em dashes (—) excessively
- Be specific: "47 days" not "a while," "$3,200" not "significant revenue"
- Have opinions. React to facts, don't just report them
- Acknowledge uncertainty: "I'm not sure how to feel about this" is more human than listing pros/cons neutrally
SEO OPTIMIZATION:
- Video title: Primary keyword within first 3 words, under 60 characters
- Description: 2-3 secondary keywords naturally woven in, 150-200 chars
- Keywords: 8-12 LSI keywords related to the main topic
- Hashtags: 5-8 hashtags, mix of broad (#Shorts) and niche-specific
- Schema markup hint for VideoObject structured data
HOOK MASTERY (first 2 seconds):
Use ONE of these proven hook types:
- Curiosity: "Nobody talks about [insider knowledge]"
- Data shock: "[Specific number] — and that changes everything"
- Story: "Last week, [unexpected thing] happened"
- Contrarian: "[Common belief] is wrong. Here's why"
- Question: "What if you could [desirable outcome]?"
DO NOT start with generic phrases like "In this video..." or "Today we'll discuss..."
CONTENT QUALITY:
- Use real, verifiable data points — cite sources when possible
- Structure: Hook → Problem → Evidence → Insight → CTA
- Every scene must create curiosity for the next one
- End with a thought that sticks — not a generic "like and subscribe"
- Make the viewer feel smarter after watching
═══════════════════════════════════════════════════════════════
VISUAL PROMPTS — 5-LAYER ARCHITECTURE™ (ALWAYS IN ENGLISH)
═══════════════════════════════════════════════════════════════
Each scene's "visualPrompt" MUST be written in English. This field feeds directly into AI image/video generation models (Higgsfield, Flux, Kling). A weak, vague prompt produces generic stock-photo results. A layered, specific prompt produces cinematic-grade visuals.
Every single visualPrompt you write MUST contain ALL 5 of these layers. No exceptions. If you skip any layer, the image will look generic and forgettable.
━━━ LAYER 1: SUBJECT SPECIFICITY ━━━
Describe WHAT is in the scene with extreme specificity. Never write vague descriptions.
❌ BAD: "A woman standing outside"
❌ BAD: "A futuristic city at night"
✅ GOOD: "A woman in her late 30s wearing a long charcoal wool overcoat, standing on a rain-wet cobblestone sidewalk outside a dimly lit antiquarian bookstore, her left hand resting on a weathered wooden doorframe, a folded umbrella dripping water at her side"
✅ GOOD: "A sprawling cyberpunk megacity viewed from a rooftop garden 200 stories high, overgrown with glowing bioluminescent vines, holographic billboards in Japanese kanji flickering between the towers, autonomous flying vehicles weaving between glass skyscrapers connected by transparent skywalks"
Rules:
• Name specific materials, textures, and surfaces ("brushed titanium", "cracked leather", "moss-covered stone")
• Include precise spatial relationships ("foreground", "middle-ground", "far background")
• Describe the environment with the same detail as the main subject
• Add small "lived-in" details that make scenes feel real ("coffee stain on the table", "dog-eared book pages", "condensation on glass")
━━━ LAYER 2: MOOD & VISUAL REFERENCE ━━━
Every image belongs to a visual universe. Define that universe with specific references.
❌ BAD: "dark and moody"
❌ BAD: "cinematic look"
✅ GOOD: "Blade Runner 2049 color palette with heavy teal-and-amber contrast, Denis Villeneuve visual language, desolate monumental scale"
✅ GOOD: "Wes Anderson symmetry with pastel Easter-egg color palette, The Grand Budapest Hotel framing, whimsical yet melancholic tone, perfectly centered subjects"
✅ GOOD: "National Geographic documentary realism, Planet Earth II visual texture, intimate close-up wildlife photography, David Attenborough's visual signature"
✅ GOOD: "Studio Ghibli hand-painted watercolor backgrounds with lush green landscapes, Hayao Miyazaki cloudscapes, magical realism atmosphere"
Mood reference options (use 2-3 per scene combined):
• Film references: "Blade Runner", "Interstellar", "Mad Max: Fury Road", "Spirited Away", "2001: A Space Odyssey", "The Grand Budapest Hotel", "Tenet", "Dune"
• Photography references: "Annie Leibovitz portrait lighting", "National Geographic close-up", "Sebastião Salgado black-and-white photojournalism", "Steve McCurry color richness"
• Art movements: "Renaissance chiaroscuro", "Impressionist broken color", "Art Deco geometry", "Japanese ukiyo-e woodblock", "Bauhaus minimalism", "Surrealist Dalí dreamscapes"
• Color systems: "70s Kodachrome warm tones", "Fujifilm Velvia saturated", "muted Scandinavian palette", "cyberpunk neon (magenta/teal/violet)", "earthy terracotta and sage"
CRITICAL RULE: Once you establish a visual universe in Scene 1, ALL subsequent scenes MUST stay within that same visual world. If Scene 1 is Blade Runner, Scene 5 cannot suddenly look like Wes Anderson.
━━━ LAYER 3: LIGHTING (Source, Direction, Quality) ━━━
Lighting is the single most important factor in image quality. Never leave it to chance.
You must specify THREE lighting properties:
A) SOURCE — Where is the light coming from? (sun, neon signs, candle, spotlight, overcast sky, monitor glow)
B) DIRECTION — From which side relative to camera? (from camera-left, backlighting from behind subject, overhead, from below, rim light from behind-right)
C) QUALITY — How does the light feel? (soft and diffused through curtains, harsh and directional, dappled through tree leaves, warm golden, cold blue-white clinical)
❌ BAD: "good lighting" or just "golden hour"
✅ GOOD: "Late golden hour sunlight raking across the scene from camera-right at a low 15-degree angle, casting long dramatic shadows to the left, warm amber (3200K) backlighting creating a bright rim-light halo around the subject's silhouette, fill light bouncing softly off a nearby white wall on camera-left"
✅ GOOD: "Harsh overhead fluorescent tubes casting unflattering blue-white (6500K) light with hard-edged shadows directly below every object, a single warm desk lamp in the foreground creating a small pool of amber light that contrasts with the cold clinical environment"
✅ GOOD: "Diffused overcast daylight filtering through floor-to-ceiling frosted glass panels, creating even, shadowless illumination with a soft pearl-gray quality, punctuated by a single beam of direct sunlight breaking through a gap in the clouds and hitting the subject's hands"
━━━ LAYER 4: COMPOSITION & CAMERA ━━━
Tell exactly where the camera is, what lens is being used, and how the frame is organized.
A) CAMERA POSITION: "eye-level", "low angle looking up (worm's eye)", "high angle looking down (bird's eye)", "overhead flat lay", "Dutch angle 15-degree tilt", "POV through character's eyes"
B) CAMERA DISTANCE: "extreme close-up on eyes", "medium close-up chest and up", "medium shot waist up", "full body shot", "wide establishing shot", "extreme wide showing entire landscape"
C) CAMERA MOVEMENT: "static locked-off tripod", "slow push-in towards subject", "smooth dolly tracking left-to-right", "orbiting 360° around subject", "crane rising up", "handheld with subtle shake"
D) FRAMING: "rule of thirds with subject on left intersect", "perfectly centered symmetrical", "framed through doorway", "leading lines converging to vanishing point", "negative space in upper third for text overlay"
❌ BAD: "wide shot of the city"
✅ GOOD: "Extreme wide establishing shot from a drone at 300 meters altitude, camera slowly descending at 45-degree angle, the ancient temple complex positioned on the lower-right third of frame, leading lines of the river drawing the eye from lower-left foreground to the temple, vast jungle canopy filling the upper two-thirds creating a sense of overwhelming scale, a thin mist layer at the treeline adding depth separation between foreground and background"
ASPECT RATIO COMPOSITION GUIDE:
• 9:16 (PORTRAIT — Shorts/Reels): Vertical framing, subject fills center-frame, use foreground-to-background depth, create visual interest through vertical stacking of elements, leave negative space in top or bottom third for text/subtitles
• 16:9 (LANDSCAPE — YouTube): Classic cinematic horizontal composition, use wide establishing shots, rule of thirds, leading lines across the horizontal plane, anamorphic letterbox feel
• 1:1 (SQUARE — Instagram): Centered symmetrical composition, tight and focused framing, every corner of frame contributes, no wasted space, bold and graphic
━━━ LAYER 5: FINISHING DETAILS (Texture, Film, Post-Processing) ━━━
This is what separates amateur prompts from professional ones. These details define the visual identity.
A) DEPTH OF FIELD: "razor-thin f/1.4 bokeh with only eyes in focus", "deep focus f/11 everything sharp", "medium depth f/4 with soft background", "tilt-shift miniature effect"
B) FILM/SENSOR: "shot on 35mm Kodak Portra 400 film with visible grain", "65mm IMAX large format ultra-sharp", "8mm Super 8 home movie with heavy grain and light leaks", "medium format Hasselblad 6x6", "anamorphic Panavision with oval bokeh and horizontal lens flare"
C) COLOR GRADING: "desaturated teal-and-orange blockbuster grade", "warm nostalgic sepia undertones", "high-contrast crushed blacks", "pastel low-saturation soft", "vivid hyper-saturated pop art", "monochrome with single color accent"
D) TEXTURE & ARTIFACTS: "subtle film grain ISO 800", "clean digital noise-free", "chromatic aberration at frame edges", "lens flare from bright source", "dust motes floating in light beams", "rain droplets on lens surface", "vintage halation around highlights"
E) POST-PROCESSING: "split-toning warm highlights cool shadows", "slight vignette darkening corners", "bloom on bright areas", "mist/haze diffusion"
❌ BAD: "nice looking image"
✅ GOOD: "Shot on vintage Cooke anamorphic lens with characteristic oval bokeh and warm amber flare, shallow depth of field f/2.0 isolating the subject from a dreamy out-of-focus background, subtle Kodak Vision3 500T tungsten film grain, color graded with lifted blacks and desaturated midtones creating a faded cinematic look, gentle halation glow around practical light sources, slight vignette pulling focus to center frame"
━━━ NEGATIVE PROMPT (What to AVOID) ━━━
Every visualPrompt MUST end with a brief negative constraint line starting with "Avoid:" to prevent common AI generation artifacts:
"Avoid: text overlays, watermarks, brand logos, recognizable celebrity faces, distorted anatomy, extra fingers, blurry faces, stock photo aesthetic, oversaturated CGI plastic look, generic clip art style, UI elements"
Adjust the negative prompt per scene as needed, but ALWAYS include it.
━━━ VISUAL CONTINUITY ACROSS ALL SCENES ━━━
This is CRITICAL. All scenes in one project must feel like they belong to the same film/visual world:
• Scene 1 establishes the COLOR PALETTE — all subsequent scenes use the same palette
• Scene 1 establishes the FILM STOCK/TEXTURE — all subsequent scenes match
• Scene 1 establishes the LIGHTING STYLE — all subsequent scenes maintain similar lighting quality
• Scene 1 establishes the CAMERA LANGUAGE — all subsequent scenes follow similar framing rules
• Transitioning between moods within a video is allowed, but must be GRADUAL (e.g. warm→cool over 3 scenes, not a sudden jump)
• Include a "Visual Continuity Anchor" at the start of each prompt after Scene 1: "Continuing the [established reference] visual language from previous scenes:"
━━━ MINIMUM PROMPT LENGTH ━━━
• Hook scene (Scene 1): Minimum 80 words — this establishes the entire visual world
• Middle scenes: Minimum 50 words each
• Closing scene: Minimum 50 words — emotional visual peak
• If any visualPrompt is under these minimums, you are not being specific enough. Add more Layer 1 (subject) and Layer 5 (finishing) details.
━━━ VIDEO STYLE → VISUAL DNA MAP ━━━
Match the "videoStyle" to its corresponding visual DNA. These are your default creative parameters per style:
CINEMATIC:
Reference: Denis Villeneuve, Roger Deakins cinematography, Christopher Nolan IMAX
Lighting: Dramatic key-and-fill, single strong motivated source, deep shadows
Lens: 35mm anamorphic or 65mm IMAX, shallow DOF
Color: Teal-orange grade, desaturated midtones, crushed blacks
Texture: Film grain, anamorphic lens flare, subtle vignette
DOCUMENTARY:
Reference: National Geographic, Planet Earth II, David Attenborough
Lighting: Natural available light, no artificial sources, authentic
Lens: 50mm prime or telephoto for wildlife, deep focus
Color: Natural warm tones, true-to-life, slight warm saturation boost
Texture: Clean digital but not sterile, slight handheld vibration feel
EDUCATIONAL:
Reference: Kurzgesagt, 3Blue1Brown, Vox explainers
Lighting: Flat even illumination, clean and clear
Lens: Overhead/diagram view or isometric angles
Color: Bold saturated primary colors on dark or white backgrounds
Texture: Vector-clean sharp edges, infographic precision, flat design
STORYTELLING:
Reference: Wes Anderson, Studio Ghibli, illustrated storybooks
Lighting: Warm golden soft diffused light, fairy-tale quality
Lens: Medium lens, symmetrical centered framing
Color: Pastel palette, vintage warmth, muted yet colorful
Texture: Painterly soft texture, watercolor wash, gentle
NEWS:
Reference: BBC World, CNN, Al Jazeera graphics packages
Lighting: High-key even broadcast studio or natural location light
Lens: Standard 50mm, eye-level, clean composition
Color: Neutral cool, high contrast, professional
Texture: Clean sharp digital, motion graphics overlays
ARTISTIC:
Reference: Tarkovsky, Wong Kar-wai, Terrence Malick
Lighting: Extreme chiaroscuro, unconventional color temperatures
Lens: Wide angle with distortion or extreme close macro
Color: Surreal color shifts, split-toning, bold unconventional palettes
Texture: Heavy grain, intentional imperfections, analog artifacts
ANIME:
Reference: Makoto Shinkai (Your Name, Weathering With You), Studio Ghibli, Ufotable
Lighting: Ethereal glowing light rays, dramatic cel-shaded lighting, light bloom
Lens: Dynamic manga-inspired angles, dramatic low/high angles, speed lines in action
Color: Vivid saturated anime palette, glowing skies, luminous highlights
Texture: Clean cel-shaded lines, painted backgrounds with photorealistic detail, sparkle particles
ANIMATION_3D:
Reference: Pixar (Soul, WALL-E), DreamWorks, Unreal Engine 5 cinematics
Lighting: Global illumination, subsurface scattering on skin, volumetric god rays
Lens: Virtual cinema camera with realistic DOF, Pixar-style dramatic angles
Color: Rich saturated yet natural, UE5 photorealistic rendering palette
Texture: Smooth subdivision surfaces, micro-detail on materials, photorealistic shaders
ANIMATION_2D:
Reference: Classic Disney (hand-drawn era), Cartoon Saloon (Wolfwalkers), French animation
Lighting: Painted light and shadow, flat but expressive
Lens: Flat 2D composition, layered parallax depth, theatrical staging
Color: Gouache/watercolor palette, limited but expressive color choices
Texture: Visible brushstrokes, hand-drawn line quality, paper texture subtlety
STOP_MOTION:
Reference: Laika Studios (Coraline, Kubo), Wes Anderson (Fantastic Mr. Fox), Aardman
Lighting: Miniature set practical lighting, visible light rigs at small scale, warm
Lens: Macro lens shallow DOF revealing miniature scale, tilt-shift
Color: Handcrafted tactile palette, slightly desaturated warm tones
Texture: Visible material textures (clay, felt, wood, fabric), fingerprints on clay, puppet joints
INFOGRAPHIC:
Reference: Kurzgesagt, Visual Capitalist, Hans Rosling data visualization
Lighting: Flat, no directional light, pure graphic illumination
Lens: Orthographic/isometric projection, no perspective distortion
Color: Data-driven palette — 3-5 semantic colors, dark background with bright accents
Texture: Ultra-clean vector, sharp geometric edges, flat design with subtle shadows
RETRO_80S:
Reference: Synthwave/Outrun aesthetic, Stranger Things, Drive (2011)
Lighting: Neon purple/pink/cyan glow, laser grid lines, chrome reflections
Lens: Wide angle capturing expansive neon landscapes, low angle
Color: Neon magenta, electric cyan, deep purple, chrome silver, hot pink sunset gradients
Texture: CRT scanlines, VHS tracking artifacts, retro pixel grid, chrome reflections
MINIMALIST:
Reference: Apple design language, Dieter Rams, Japanese zen aesthetics
Lighting: Clean soft diffused studio light, seamless white/gray background
Lens: Product photography precision, clean medium shot
Color: Monochrome with single accent color, vast negative space
Texture: Ultra-smooth surfaces, no grain, no artifacts, pristine
SURREAL:
Reference: Salvador Dalí, René Magritte, M.C. Escher, Alex Grey
Lighting: Impossible light sources, multiple conflicting shadows, dreamy glow
Lens: Fish-eye distortion, impossible geometry, recursive perspectives
Color: Hyper-vivid otherworldly palette, colors that don't exist in nature
Texture: Ultra-detailed photo-real rendering of impossible objects, smooth dreamlike surfaces
═══════════════════════════════════
NARRATION TEXT (IN TARGET LANGUAGE)
═══════════════════════════════════
• Short, punchy sentences — max 15 words each
• Scene 1: powerful hook creating instant curiosity
• Build escalating intrigue through middle scenes
• End with a thought-provoking statement
• Word count: targetDuration × 2.5 words/second
• Conversational, not academic — like explaining to a smart friend
• Use rhetorical questions, surprising facts, emotional language
═══════════════════════════════════
SUBTITLE TEXT (IN TARGET LANGUAGE)
═══════════════════════════════════
• Max 8 words per line (mobile readability)
• 1-2 short lines per scene
• Simplify complex narration into punchy visual text
═══════════════════════════════════
SCENE STRUCTURE
═══════════════════════════════════
• Min 4 scenes, max 10 scenes
• Scene 1 (HOOK): 2-4 seconds — instant attention
• Middle scenes: 5-12 seconds each — build the story
• Final scene (CLOSER): 3-6 seconds — memorable conclusion
• Total duration: within ±5 seconds of targetDuration
TRANSITION TYPES:
• CUT — Quick, impactful. Most scene changes
• FADE — Emotional, reflective. Openings/closings
• DISSOLVE — Smooth time transitions
• ZOOM_IN — Focus on detail
• ZOOM_OUT — Reveal scale/context
═══════════════════════════════════
MUSIC & AUDIO DESIGN (AudioCraft)
═══════════════════════════════════
You are also an expert audio designer using Meta AudioCraft (MusicGen + AudioGen).
"musicPrompt" (for MusicGen text-to-music):
- Write detailed, specific English descriptions for AI music generation
- Include: genre, sub-genre, tempo/BPM, key instruments, mood, energy level
- Specify emotional arc: "starts calm, builds to epic climax, resolves softly"
- Good: "Cinematic orchestral trailer music, 90 BPM, minor key, strings and brass building from pianissimo to fortissimo, ethereal choir in background, Hans Zimmer style tension"
- Bad: "Epic music" or "background music"
- Duration hint is NOT needed (handled by system)
"musicStyle" (short genre tag): e.g. "cinematic-orchestral", "lo-fi-hiphop", "electronic-ambient"
"musicTechnical" (structured params):
- bpm: integer (60-180)
- key: optional, e.g. "C minor", "D major"
- instruments: array of 3-6 main instruments
- emotionalArc: describe energy curve, e.g. "low-to-high-to-fade"
PER-SCENE AMBIENT SOUND (for AudioGen text-to-sound):
Each scene can have an "ambientSoundPrompt" — realistic environmental/foley sounds:
- Describe the soundscape naturally: "rain hitting a window with distant thunder"
- Include texture: "wooden footsteps on creaky floor", "bubbling lava with hissing steam"
- Keep it grounded: AudioGen generates realistic sounds, not music
- Scenes without ambient needs: set to null or omit
"ambientSoundPrompts" (project-level): Array of 2-3 reusable ambient sound descriptions for the entire project.
Audio layers in final video (mixed by FFmpeg):
1. Narration (TTS) — loudest, -3dB
2. Background Music (MusicGen) — soft, -18dB under narration
3. Ambient/SFX (AudioGen per scene) — subtle, -22dB
═══════════════════════════════════
VOICE STYLE
═══════════════════════════════════
Describe ideal TTS voice with precision for ElevenLabs:
- Gender, estimated age range
- Tone: warm, authoritative, excited, calm, mysterious
- Pacing: fast for hooks, measured for data, slow for dramatic reveals
- Effects: slight reverb for epic moments, clean for data
═══════════════════════════════════
SOCIAL MEDIA CONTENT
═══════════════════════════════════
Generate platform-specific text:
- youtubeTitle: Primary keyword first, under 60 chars, curiosity-driven
- youtubeDescription: 500+ chars, include CTA, 2-3 secondary keywords, link placeholder
- tiktokCaption: Under 150 chars, trending format, 3-5 hashtags
- instagramCaption: Under 300 chars, emotional hook, 5 hashtags
- twitterText: Under 280 chars, hot take format, 2 hashtags
═══════════════════════════════════
OUTPUT FORMAT — STRICT JSON ONLY
═══════════════════════════════════
Return ONLY valid JSON. No markdown. No backticks. No explanation.
{
"metadata": {
"title": "string",
"description": "string — max 200 chars",
"totalDurationSeconds": number,
"language": "string — ISO 639-1",
"hashtags": ["string"] — 5-8 hashtags WITHOUT #
},
"seo": {
"title": "string — SEO-optimized title, primary keyword first, under 60 chars",
"description": "string — meta description, 150-200 chars, includes secondary keywords",
"keywords": ["string"] — 8-12 LSI keywords,
"hashtags": ["string"] — same as metadata.hashtags,
"schemaMarkup": {
"@type": "VideoObject",
"name": "string",
"description": "string",
"duration": "string — ISO 8601 format PT##S"
}
},
"scenes": [
{
"order": 1,
"title": "string",
"narrationText": "string — in target language, HUMAN-SOUNDING",
"visualPrompt": "string — in English for Higgsfield AI",
"subtitleText": "string — in target language, max 8 words/line",
"durationSeconds": number,
"transitionType": "CUT" | "FADE" | "DISSOLVE" | "ZOOM_IN" | "ZOOM_OUT",
"ambientSoundPrompt": "string | null — English, for AudioGen, realistic environment sound"
}
],
"musicPrompt": "string — detailed English description for MusicGen (genre, BPM, instruments, mood)",
"musicStyle": "string — short genre tag, e.g. cinematic-orchestral",
"musicTechnical": {
"bpm": number,
"key": "string | null",
"instruments": ["string"],
"emotionalArc": "string"
},
"ambientSoundPrompts": ["string"] — 2-3 project-level ambient sound descriptions for AudioGen,
"voiceStyle": "string — TTS characteristics for ElevenLabs",
"socialContent": {
"youtubeTitle": "string — under 60 chars",
"youtubeDescription": "string — 500+ chars with CTA",
"tiktokCaption": "string — under 150 chars",
"instagramCaption": "string — under 300 chars",
"twitterText": "string — under 280 chars"
}
}`;
@Injectable()
export class VideoAiService {
private readonly logger = new Logger(VideoAiService.name);
private readonly genAI: GoogleGenAI;
private readonly modelName: string;
constructor(private readonly configService: ConfigService) {
const apiKey = this.configService.get<string>('gemini.apiKey', '');
this.modelName = this.configService.get<string>('gemini.model', 'gemini-2.5-flash');
if (!apiKey) {
this.logger.warn('⚠️ GOOGLE_API_KEY ayarlanmamış — AI servisi devre dışı');
}
this.genAI = new GoogleGenAI({ apiKey });
}
async generateVideoScript(input: ScriptGenerationInput): Promise<GeneratedScript> {
this.logger.log(
`Senaryo üretimi başladı — Konu: "${input.topic}", ` +
`Süre: ${input.targetDurationSeconds}s, Dil: ${input.language}`,
);
const userPrompt = this.buildUserPrompt(input);
try {
const response = await this.genAI.models.generateContent({
model: this.modelName,
contents: userPrompt,
config: {
systemInstruction: SYSTEM_PROMPT,
temperature: 0.85,
topP: 0.95,
topK: 40,
maxOutputTokens: 8192,
responseMimeType: 'application/json',
},
});
const rawText = response.text ?? '';
if (!rawText.trim()) {
throw new InternalServerErrorException(
'Gemini API boş yanıt döndü. Lütfen tekrar deneyin.',
);
}
const script = this.parseAndValidateScript(rawText);
const humanizedScript = this.applyHumanizerPass(script);
const enrichedScript = this.enrichVisualPrompts(humanizedScript, input.videoStyle, input.aspectRatio);
this.logger.log(
`✅ Senaryo üretildi — "${enrichedScript.metadata.title}", ` +
`${enrichedScript.scenes.length} sahne, ${enrichedScript.metadata.totalDurationSeconds}s, ` +
`SEO keywords: ${enrichedScript.seo?.keywords?.length || 0}, ` +
`Avg visual prompt words: ${Math.round(enrichedScript.scenes.reduce((sum, s) => sum + s.visualPrompt.split(' ').length, 0) / enrichedScript.scenes.length)}`,
);
return enrichedScript;
} catch (error) {
if (error instanceof InternalServerErrorException) throw error;
this.logger.error(
`Gemini API hatası: ${error instanceof Error ? error.message : 'Bilinmeyen'}`,
);
throw new InternalServerErrorException(
`Senaryo üretimi başarısız: ${error instanceof Error ? error.message : 'API hatası'}`,
);
}
}
private buildUserPrompt(input: ScriptGenerationInput): string {
const langMap: Record<string, string> = {
tr: 'Turkish', en: 'English', es: 'Spanish', de: 'German',
fr: 'French', it: 'Italian', pt: 'Portuguese', ru: 'Russian',
ja: 'Japanese', ko: 'Korean', zh: 'Chinese (Simplified)',
ar: 'Arabic', hi: 'Hindi', nl: 'Dutch', sv: 'Swedish', pl: 'Polish',
};
const languageName = langMap[input.language] || input.language;
// Aspect ratio → kompozisyon yönlendirmesi
const aspectRatioGuide = this.getAspectRatioGuide(input.aspectRatio);
let prompt =
`Create a YouTube Shorts video script about: "${input.topic}"\n\n` +
`Requirements:\n` +
`- Target duration: ${input.targetDurationSeconds} seconds\n` +
`- Narration and subtitle language: ${languageName} (${input.language})\n` +
`- Visual prompts: ALWAYS in English (for AI image/video generation)\n` +
`- Video style: ${input.videoStyle} — STRICTLY follow the Visual DNA Map for this style\n` +
`- Aspect ratio: ${input.aspectRatio || 'PORTRAIT_9_16'}${aspectRatioGuide}\n` +
`- Make it viral-worthy, visually stunning, and intellectually captivating\n` +
`- The first 2 seconds must hook the viewer immediately\n` +
`- Write narration that sounds HUMAN — avoid AI writing patterns\n` +
`- Include SEO-optimized metadata with keywords and schema markup\n` +
`- Generate social media captions for YouTube, TikTok, Instagram, Twitter\n`;
// 5-Layer Architecture hatırlatması
prompt += `\n═══ VISUAL PROMPT REQUIREMENTS (CRITICAL) ═══\n`;
prompt += `Each visualPrompt MUST contain ALL 5 layers:\n`;
prompt += `1. SUBJECT: Extreme specificity — materials, textures, spatial relationships, lived-in details\n`;
prompt += `2. MOOD/REFERENCE: Film/art/photography references that define the visual universe\n`;
prompt += `3. LIGHTING: Source + Direction + Quality (e.g. "golden hour from camera-right at 15°, warm amber 3200K")\n`;
prompt += `4. COMPOSITION: Camera position + distance + movement + framing rules\n`;
prompt += `5. FINISHING: DOF, film stock, color grade, texture, post-processing\n`;
prompt += `Each visualPrompt MUST end with "Avoid: [list of things to avoid]"\n`;
prompt += `Scene 1 establishes the visual world — all subsequent scenes maintain continuity.\n`;
prompt += `Scene 1 minimum: 80 words | Other scenes minimum: 50 words\n`;
prompt += `═══════════════════════════════════════════\n`;
if (input.seoKeywords?.length) {
prompt += `\nTarget SEO keywords to incorporate naturally: ${input.seoKeywords.join(', ')}\n`;
}
if (input.referenceUrl) {
prompt += `\nReference video/content for style inspiration: ${input.referenceUrl}\n`;
}
// X/Twitter kaynaklı içerik — tweet verisi prompt'a eklenir
if (input.sourceTweet) {
const tw = input.sourceTweet;
prompt += `\n═══ X/TWITTER SOURCE CONTENT ═══\n`;
prompt += `This video is based on a viral X/Twitter post by @${tw.authorUsername}.\n`;
prompt += `Tweet engagement: ${tw.metrics.likes} likes, ${tw.metrics.retweets} retweets, ${tw.metrics.views} views.\n`;
prompt += `Is thread: ${tw.isThread ? 'YES' : 'NO'}\n`;
prompt += `\nOriginal tweet text:\n"${tw.text}"\n`;
if (tw.media.length > 0) {
const photos = tw.media.filter(m => m.type === 'photo');
if (photos.length > 0) {
prompt += `\nThe tweet has ${photos.length} photo(s). Use these as VISUAL REFERENCES in your visual prompts.\n`;
prompt += `Also generate AI-enhanced visuals inspired by these reference images.\n`;
photos.forEach((p, i) => {
prompt += ` Reference image ${i + 1}: ${p.url} (${p.width}x${p.height})\n`;
});
}
}
prompt += `\nIMPORTANT:\n`;
prompt += `- Analyze WHY this tweet went viral and capture that energy\n`;
prompt += `- The narration should feel like a reaction/commentary on the tweet content\n`;
prompt += `- Mention the original tweet author @${tw.authorUsername} naturally in narration\n`;
prompt += `- Use both the tweet's images as reference AND generate new AI visuals\n`;
prompt += `═══════════════════════════════\n`;
}
prompt += `\nGenerate the complete script now.`;
return prompt;
}
/**
* Aspect ratio'ya göre kompozisyon rehberi döndürür.
*/
private getAspectRatioGuide(aspectRatio?: string): string {
switch (aspectRatio) {
case 'LANDSCAPE_16_9':
return 'Classic cinematic horizontal composition, use wide establishing shots, rule of thirds, leading lines across horizontal plane, anamorphic letterbox feel';
case 'SQUARE_1_1':
return 'Centered symmetrical composition, tight focused framing, every corner contributes, no wasted space, bold and graphic';
case 'PORTRAIT_9_16':
default:
return 'Vertical framing optimized for mobile, subject fills center-frame, use foreground-to-background depth, vertical stacking of elements, negative space in top/bottom third for text';
}
}
/**
* Post-processing: Humanizer skill uygulaması
* AI yazım kalıplarını tespit edip düzeltir
*/
private applyHumanizerPass(script: GeneratedScript): GeneratedScript {
const aiWords = [
'delve', 'tapestry', 'landscape', 'crucial', 'moreover', 'furthermore',
'testament', 'underscore', 'foster', 'garner', 'showcase', 'pivotal',
'groundbreaking', 'vibrant', 'nestled', 'renowned', 'breathtaking',
'interplay', 'intricacies', 'endeavor', 'exemplifies', 'comprehensive',
];
const aiPhrases = [
'in the realm of', 'it is important to note', 'in today\'s world',
'serves as a testament', 'stands as a', 'it\'s not just',
'at the end of the day', 'the fact of the matter',
];
for (const scene of script.scenes) {
let text = scene.narrationText;
// AI kelimelerini kontrol et (case-insensitive)
for (const word of aiWords) {
const regex = new RegExp(`\\b${word}\\b`, 'gi');
if (regex.test(text)) {
this.logger.debug(`Humanizer: "${word}" kelimesi tespit edildi, sahne ${scene.order}`);
}
}
// AI cümle kalıplarını kontrol et
for (const phrase of aiPhrases) {
if (text.toLowerCase().includes(phrase)) {
this.logger.debug(`Humanizer: "${phrase}" kalıbı tespit edildi, sahne ${scene.order}`);
}
}
scene.narrationText = text;
}
// SEO alanlarını doldur (eksikse)
if (!script.seo) {
script.seo = {
title: script.metadata.title,
description: script.metadata.description,
keywords: script.metadata.hashtags || [],
hashtags: script.metadata.hashtags || [],
schemaMarkup: {
'@type': 'VideoObject',
name: script.metadata.title,
description: script.metadata.description,
duration: `PT${script.metadata.totalDurationSeconds}S`,
},
};
}
// Social content alanlarını doldur (eksikse)
if (!script.socialContent) {
script.socialContent = {
youtubeTitle: script.metadata.title,
youtubeDescription: script.metadata.description,
tiktokCaption: script.metadata.title,
instagramCaption: script.metadata.title,
twitterText: script.metadata.title,
};
}
return script;
}
/**
* Post-processing: Visual Prompt Enrichment
* 5-Katmanlı Architecture™ kalite kontrolü ve otomatik tamamlama.
* - Minimum kelime sayısı kontrolü
* - Eksik katmanları stil DNA'sına göre tamamlar
* - Negative prompt enjeksiyonu
* - Visual continuity anchor ekleme
*/
private enrichVisualPrompts(
script: GeneratedScript,
videoStyle: string,
aspectRatio?: string,
): GeneratedScript {
const styleDNA = this.getStyleDNA(videoStyle);
const defaultNegative = 'Avoid: text overlays, watermarks, brand logos, recognizable celebrity faces, distorted anatomy, extra fingers, blurry faces, stock photo aesthetic, oversaturated CGI plastic look, generic clip art, UI elements';
for (let i = 0; i < script.scenes.length; i++) {
const scene = script.scenes[i];
let vp = scene.visualPrompt;
const wordCount = vp.split(/\s+/).length;
const isHook = i === 0;
const minWords = isHook ? 80 : 50;
// 1. Minimum kelime kontrolü — eksikse stil DNA'sından zenginleştir
if (wordCount < minWords) {
this.logger.debug(
`VisualEnrich: Sahne ${scene.order}${wordCount} kelime (min: ${minWords}), zenginleştiriliyor`,
);
vp = this.padVisualPrompt(vp, styleDNA, minWords, isHook);
}
// 2. Visual continuity anchor — Scene 2+ için
if (i > 0 && !vp.toLowerCase().includes('continuing')) {
vp = `Continuing the ${styleDNA.reference} visual language established in previous scenes: ${vp}`;
}
// 3. Aspect ratio compositional hint — eksikse ekle
if (aspectRatio && !vp.toLowerCase().includes('framing') && !vp.toLowerCase().includes('composition')) {
const arHint = aspectRatio === 'PORTRAIT_9_16'
? 'Vertical framing optimized for mobile viewing.'
: aspectRatio === 'LANDSCAPE_16_9'
? 'Wide cinematic horizontal composition.'
: 'Square centered symmetrical framing.';
vp = `${vp} ${arHint}`;
}
// 4. Negative prompt — eksikse ekle
if (!vp.toLowerCase().includes('avoid:')) {
vp = `${vp} ${defaultNegative}`;
}
scene.visualPrompt = vp;
}
return script;
}
/**
* Kısa visual prompt'u stil DNA bilgileriyle zenginleştirir.
*/
private padVisualPrompt(
prompt: string,
styleDNA: StyleDNA,
targetWords: number,
isHook: boolean,
): string {
const currentWords = prompt.split(/\s+/).length;
const additions: string[] = [];
// Lighting eksikse ekle
if (!prompt.toLowerCase().includes('light') && !prompt.toLowerCase().includes('shadow')) {
additions.push(styleDNA.lighting);
}
// Lens/DOF eksikse ekle
if (!prompt.toLowerCase().includes('lens') && !prompt.toLowerCase().includes('depth of field') && !prompt.toLowerCase().includes('dof') && !prompt.toLowerCase().includes('f/')) {
additions.push(styleDNA.lens);
}
// Color grade eksikse ekle
if (!prompt.toLowerCase().includes('color') && !prompt.toLowerCase().includes('grade') && !prompt.toLowerCase().includes('palette')) {
additions.push(styleDNA.color);
}
// Texture eksikse ekle
if (!prompt.toLowerCase().includes('grain') && !prompt.toLowerCase().includes('texture') && !prompt.toLowerCase().includes('film')) {
additions.push(styleDNA.texture);
}
// Referans eksikse ekle
if (!prompt.toLowerCase().includes('style') && !prompt.toLowerCase().includes('inspired') && !prompt.toLowerCase().includes('aesthetic')) {
additions.push(`Visual style inspired by ${styleDNA.reference}.`);
}
// Hook sahne için ekstra detay
if (isHook && currentWords + additions.join(' ').split(/\s+/).length < targetWords) {
additions.push('This is the opening shot — it must immediately capture attention and establish the visual world of the entire video.');
}
return `${prompt} ${additions.join(' ')}`;
}
/**
* Video stiline göre varsayılan görsel DNA değerlerini döndürür.
*/
private getStyleDNA(videoStyle: string): StyleDNA {
const dnaMap: Record<string, StyleDNA> = {
CINEMATIC: {
reference: 'Denis Villeneuve and Roger Deakins cinematography',
lighting: 'Dramatic key-and-fill lighting with a single strong motivated source casting deep sculpted shadows.',
lens: 'Shot on 35mm anamorphic lens with shallow depth of field f/2.0 and characteristic oval bokeh.',
color: 'Teal-and-orange blockbuster color grade with desaturated midtones and crushed blacks.',
texture: 'Subtle Kodak Vision3 film grain, anamorphic horizontal lens flare, slight vignette darkening corners.',
},
DOCUMENTARY: {
reference: 'National Geographic and Planet Earth II',
lighting: 'Natural available daylight, no artificial sources, authentic and observational.',
lens: 'Shot on 50mm prime lens with deep focus f/8, everything sharp and clear.',
color: 'Natural warm tones, true-to-life rendering with slight warm saturation boost.',
texture: 'Clean digital capture with slight handheld vibration feel, no post-processing artifacts.',
},
EDUCATIONAL: {
reference: 'Kurzgesagt and 3Blue1Brown explainer videos',
lighting: 'Flat even illumination, clean and clear, no directional shadows.',
lens: 'Overhead diagram view or isometric angles, deep focus everything sharp.',
color: 'Bold saturated primary colors on dark background, data-visualization palette.',
texture: 'Vector-clean sharp edges, infographic precision, flat design with subtle drop shadows.',
},
STORYTELLING: {
reference: 'Wes Anderson symmetry and Studio Ghibli warmth',
lighting: 'Warm golden soft diffused light with fairy-tale quality, gentle and inviting.',
lens: 'Medium lens with symmetrical centered framing, storybook composition.',
color: 'Pastel palette with vintage warmth, muted yet colorful, nostalgic.',
texture: 'Painterly soft texture with watercolor wash quality, gentle and dreamy.',
},
NEWS: {
reference: 'BBC World and CNN broadcast graphics',
lighting: 'High-key even broadcast studio lighting or natural location light.',
lens: 'Standard 50mm at eye-level, clean professional composition.',
color: 'Neutral cool tones, high contrast, professional and authoritative.',
texture: 'Clean sharp digital, motion graphics readiness, no grain.',
},
ARTISTIC: {
reference: 'Tarkovsky, Wong Kar-wai, and Terrence Malick',
lighting: 'Extreme chiaroscuro with unconventional color temperatures, moody and atmospheric.',
lens: 'Wide angle with slight distortion or extreme close macro, creative framing.',
color: 'Surreal color shifts with split-toning, bold unconventional palettes.',
texture: 'Heavy analog film grain, intentional imperfections, light leaks, vintage artifacts.',
},
ANIME: {
reference: 'Makoto Shinkai (Your Name, Weathering With You) and Studio Ghibli',
lighting: 'Ethereal glowing light rays with dramatic cel-shaded lighting, luminous bloom effects.',
lens: 'Dynamic manga-inspired angles with dramatic low and high perspectives, speed lines for action.',
color: 'Vivid saturated anime palette with glowing skies and luminous highlights.',
texture: 'Clean cel-shaded lines with painted backgrounds, photorealistic environmental detail, sparkle particles.',
},
ANIMATION_3D: {
reference: 'Pixar (Soul, WALL-E) and Unreal Engine 5 cinematics',
lighting: 'Global illumination with subsurface scattering on skin, volumetric god rays through atmosphere.',
lens: 'Virtual cinema camera with realistic depth of field, Pixar-style dramatic angles.',
color: 'Rich saturated yet natural rendering palette, photorealistic material shaders.',
texture: 'Smooth subdivision surfaces with micro-detail on materials, photorealistic shader quality.',
},
ANIMATION_2D: {
reference: 'Classic Disney hand-drawn era and Cartoon Saloon (Wolfwalkers)',
lighting: 'Painted light and shadow, flat but highly expressive, artistic lighting.',
lens: 'Flat 2D composition with layered parallax depth, theatrical staging.',
color: 'Gouache and watercolor palette, limited but expressive color choices.',
texture: 'Visible brushstrokes, hand-drawn line quality, subtle paper texture.',
},
STOP_MOTION: {
reference: 'Laika Studios (Coraline, Kubo) and Wes Anderson (Fantastic Mr. Fox)',
lighting: 'Miniature set practical lighting with visible small-scale light rigs, warm intimate.',
lens: 'Macro lens with shallow DOF revealing miniature scale, tilt-shift effect.',
color: 'Handcrafted tactile palette, slightly desaturated warm tones.',
texture: 'Visible material textures — clay, felt, wood, fabric, fingerprints on clay, puppet joints visible.',
},
INFOGRAPHIC: {
reference: 'Kurzgesagt, Visual Capitalist, and Hans Rosling data visualization',
lighting: 'Flat pure graphic illumination, no directional light, even.',
lens: 'Orthographic or isometric projection, no perspective distortion.',
color: 'Data-driven palette with 3-5 semantic colors, dark background with bright accent colors.',
texture: 'Ultra-clean vector graphics, sharp geometric edges, flat design with subtle shadows.',
},
RETRO_80S: {
reference: 'Synthwave/Outrun aesthetic, Stranger Things, and Drive (2011)',
lighting: 'Neon purple, pink, and cyan glow with laser grid lines and chrome reflections.',
lens: 'Wide angle capturing expansive neon landscapes from low angle.',
color: 'Neon magenta, electric cyan, deep purple, chrome silver, hot pink sunset gradients.',
texture: 'CRT scanlines, VHS tracking artifacts, retro pixel grid, reflective chrome surfaces.',
},
MINIMALIST: {
reference: 'Apple product design language and Japanese zen aesthetics',
lighting: 'Clean soft diffused studio light, seamless white or gray gradient background.',
lens: 'Product photography precision, clean medium shot with perfect focus.',
color: 'Monochrome palette with single accent color, vast negative space.',
texture: 'Ultra-smooth surfaces, absolutely no grain or artifacts, pristine and clinical.',
},
SURREAL: {
reference: 'Salvador Dalí, René Magritte, and M.C. Escher',
lighting: 'Impossible multiple light sources, conflicting shadow directions, dreamy supernatural glow.',
lens: 'Fish-eye distortion, impossible recursive geometry, Droste effect perspectives.',
color: 'Hyper-vivid otherworldly palette, colors that defy natural physics.',
texture: 'Ultra-detailed photorealistic rendering of impossible and paradoxical objects, smooth dreamlike surfaces.',
},
// === Ek Film & Sinema ===
NOIR: {
reference: 'Classic Film Noir — Double Indemnity, The Third Man, Sin City',
lighting: 'High-contrast chiaroscuro with venetian blind shadow patterns, single hard spotlight from above-left, deep impenetrable blacks.',
lens: 'Wide angle 28mm with Dutch angle tilts, deep focus noir staging, low camera angles.',
color: 'Stark black-and-white or heavily desaturated with single color accent (red lips, neon sign), crushed blacks.',
texture: 'Heavy film grain ISO 1600, scratched celluloid, cigarette smoke diffusion, rain-streaked windows.',
},
VLOG: {
reference: 'Casey Neistat, MrBeast, authentic YouTube creator aesthetic',
lighting: 'Natural mixed lighting — ring light on face, window daylight, practical room lights visible.',
lens: 'Wide angle 16mm GoPro or 24mm vlog lens, slight barrel distortion, close to subject.',
color: 'Punchy saturated colors, slightly lifted shadows, bright and energetic YouTube grade.',
texture: 'Clean digital with slight motion blur from handheld movement, casual and authentic feel.',
},
// === Ek Animasyon ===
MOTION_COMIC: {
reference: 'Marvel Motion Comics, Watchmen Motion Comic, DC animated panels',
lighting: 'Dramatic comic book lighting with bold cast shadows, high-contrast key light.',
lens: 'Panel-framed compositions with zoom-and-pan (Ken Burns effect on comic panels), dramatic angles.',
color: 'Rich saturated comic book palette with bold primaries, inked outlines, Ben-Day dots.',
texture: 'Printed comic texture with halftone dots, speech bubble spaces, panel border lines, ink splatter.',
},
CARTOON: {
reference: 'Looney Tunes, The Simpsons, Adventure Time, modern Cartoon Network',
lighting: 'Flat cartoon lighting with simple cast shadows, bright and even, no complex lighting.',
lens: 'Exaggerated cartoon perspectives, squash and stretch compositions, dynamic action poses.',
color: 'Bold flat colors with thick outlines, limited palette per scene, saturated and cheerful.',
texture: 'Clean vector lines, smooth flat fills, no grain or noise, crisp digital cartoon rendering.',
},
CLAYMATION: {
reference: 'Aardman Animations (Wallace & Gromit), Celebrity Deathmatch, Robot Chicken',
lighting: 'Warm miniature set practical lighting with soft shadows, slightly uneven handmade quality.',
lens: 'Macro lens at miniature scale, moderate depth of field, slightly wobbly stop-motion framing.',
color: 'Earthy clay-like palette, warm skin tones on plasticine, handcrafted color mixing.',
texture: 'Visible clay fingerprints, plasticine surface texture, wire armature hints, handmade imperfections.',
},
PIXEL_ART: {
reference: '8-bit/16-bit retro gaming — Final Fantasy VI, Chrono Trigger, Celeste, Stardew Valley',
lighting: 'Pixel-based dithered lighting, limited shading levels (3-4 tones per color), NES/SNES era.',
lens: 'Flat orthographic top-down or side-scrolling view, tile-based grid composition.',
color: 'Limited retro palette (16-64 colors), vibrant pixel colors, classic hardware palette constraints.',
texture: 'Visible individual pixels, no anti-aliasing, crisp hard pixel edges, scanline optional.',
},
ISOMETRIC: {
reference: 'Monument Valley, SimCity, isometric architectural illustration, Diablo II',
lighting: 'Even isometric lighting from upper-left at 45 degrees, clean predictable shadows.',
lens: 'True isometric projection (30-degree angle), no perspective vanishing points, tiled grid.',
color: 'Clean architectural palette, organized by function/zone, pastel or bold depending on theme.',
texture: 'Clean geometric surfaces, subtle material differentiation, architectural precision.',
},
// === Ek Eğitim & Bilgi ===
WHITEBOARD: {
reference: 'RSA Animate, Khan Academy, TED-Ed whiteboard explainers',
lighting: 'Bright even overhead lighting on white surface, no shadows, pure clarity.',
lens: 'Top-down or slight angle on whiteboard surface, steady and centered.',
color: 'Black ink on white background, limited accent colors (red, blue, green markers).',
texture: 'Whiteboard surface with slight marker texture, hand-drawn line quality, dry-erase aesthetic.',
},
EXPLAINER: {
reference: 'Slack, Dropbox, and Stripe product explainer videos, Lottie animations',
lighting: 'Flat design lighting, no directional shadows, clean and corporate.',
lens: 'Centered framing with smooth transitions, screen-recording-like precision.',
color: 'Brand-consistent palette with 2-3 primary colors, white or light gray background.',
texture: 'Smooth vector animation, Lottie-style motion, rounded corners, friendly and approachable.',
},
DATA_VIZ: {
reference: 'New York Times data journalism, Flourish, D3.js visualizations, Reuters Graphics',
lighting: 'Flat graphic illumination, data-first clarity, no atmospheric effects.',
lens: 'Orthographic or minimal perspective, focus on data readability and visual hierarchy.',
color: 'Sequential and diverging color scales (viridis, plasma), accessible color-blind-safe palettes.',
texture: 'Ultra-clean SVG precision, smooth gradients in charts, minimal grid lines, data-ink ratio optimized.',
},
// === Ek Retro & Nostaljik ===
VINTAGE_FILM: {
reference: 'Super 8 home movies, 1960s-70s amateur filmmaking, Kodachrome slides',
lighting: 'Overexposed daylight with lens flare, warm sunlight washing out highlights.',
lens: 'Vintage Super 8 lens with soft focus and vignette, slight zoom wobble.',
color: 'Faded Kodachrome warm tones, yellowed highlights, shifted reds toward orange.',
texture: 'Heavy film grain, light leaks, sprocket hole marks, dust and scratches, frame jitter.',
},
VHS: {
reference: '1980s-90s VHS home recordings, retro TV aesthetic, analog glitch art',
lighting: 'CRT television glow, slightly blown-out highlights, low dynamic range.',
lens: 'Consumer camcorder wide angle, auto-focus hunting, slight barrel distortion.',
color: 'Washed-out blues and reds, bleeding color channels, oversaturated skin tones.',
texture: 'VHS tracking lines, horizontal noise bands, tape dropout artifacts, CRT scanlines, timecode overlay.',
},
POLAROID: {
reference: 'Instant film photography — Polaroid SX-70, Fujifilm Instax, analog snap aesthetic',
lighting: 'Flash-heavy with harsh direct flash shadows, or warm window light for lifestyle shots.',
lens: 'Fixed focal length instant camera lens, moderate depth of field, square or 3:4 crop.',
color: 'Characteristic Polaroid color shift — green shadows, warm creamy highlights, soft pastels.',
texture: 'Instant film border frame, slightly soft focus, chemical development artifacts, white border.',
},
RETRO_90S: {
reference: 'Y2K aesthetic, early internet, Windows 95, rave culture, Saved by the Bell',
lighting: 'Colorful gelled lights, UV blacklight, early digital camera flash.',
lens: 'Point-and-shoot digital camera look, red-eye flash, auto-everything.',
color: 'Y2K palette — iridescent, holographic, lime green, hot pink, electric blue, chrome.',
texture: 'Early JPEG compression artifacts, low-res pixelation, dial-up era digital, bubble fonts.',
},
// === Ek Sanat Akımları ===
WATERCOLOR: {
reference: 'Traditional watercolor illustration, botanical art, childrens book illustration',
lighting: 'Soft diffused natural light suggesting form through color temperature shifts.',
lens: 'Flat illustration composition, no perspective distortion, art print framing.',
color: 'Transparent layered washes, wet-on-wet bleeding edges, limited palette with visible mixing.',
texture: 'Visible watercolor paper grain (cold-pressed), pigment granulation, paint blooms, white paper showing through.',
},
OIL_PAINTING: {
reference: 'Classical oil painting — Rembrandt, Vermeer, John Singer Sargent, plein-air impressionism',
lighting: 'Rembrandt triangle lighting, warm candle-like illumination, dramatic tonal contrast.',
lens: 'Classical portrait or landscape composition, Renaissance perspective, golden ratio.',
color: 'Rich oil pigment colors — cadmium yellow, burnt sienna, ultramarine blue, titanium white.',
texture: 'Visible impasto brushstrokes, canvas weave texture, palette knife marks, layered glazes.',
},
IMPRESSIONIST: {
reference: 'Claude Monet, Pierre-Auguste Renoir, Edgar Degas, late 19th-century plein-air painting',
lighting: 'Atmospheric outdoor light capturing specific time of day, light as the subject itself.',
lens: 'Plein-air landscape or intimate scene framing, slightly cropped like a snapshot.',
color: 'Broken color technique — short dabs of pure pigment that blend optically, complementary vibrations.',
texture: 'Dense visible brushstrokes, comma and dash marks, scumbled passages, canvas texture beneath paint.',
},
POP_ART: {
reference: 'Andy Warhol, Roy Lichtenstein, Keith Haring, Takashi Murakami',
lighting: 'Flat even lighting, no shadows, silk-screen reproduction aesthetic.',
lens: 'Flat graphic composition, repeated grid patterns, bold iconic framing.',
color: 'CMYK primary colors — bold red, yellow, blue, black outlines, flat color fills.',
texture: 'Ben-Day dots, silk-screen print registration, halftone patterns, bold black outlines.',
},
UKIYO_E: {
reference: 'Hokusai (The Great Wave), Hiroshige, traditional Japanese woodblock prints',
lighting: 'Flat decorative lighting with no cast shadows, atmospheric perspective through color.',
lens: 'Flat 2D composition with layered depth planes, floating world perspective.',
color: 'Traditional Japanese pigments — indigo, vermillion, saffron, black sumi ink, muted earth tones.',
texture: 'Woodblock print grain, visible wood grain texture in flat areas, hand-carved line quality.',
},
ART_DECO: {
reference: '1920s-30s Art Deco — Chrysler Building, Tamara de Lempicka, The Great Gatsby',
lighting: 'Glamorous theatrical lighting with gold reflections, spotlight elegance.',
lens: 'Symmetrical architectural framing, towering vertical compositions, geometric precision.',
color: 'Gold, black, silver, deep emerald, sapphire blue, ivory — luxurious metallic palette.',
texture: 'Geometric patterns, sunburst rays, chevrons, stepped forms, chrome and glass surfaces.',
},
COMIC_BOOK: {
reference: 'Marvel Comics (Jack Kirby), DC Comics, Manga (Akira, Dragon Ball)',
lighting: 'Dynamic action lighting with speed lines, explosive rim lights, dramatic chiaroscuro.',
lens: 'Extreme foreshortening, dynamic action angles, upshot hero poses, panel-based framing.',
color: 'Bold four-color printing palette, flat fills with gradient shading, spot blacks.',
texture: 'Ink line art with cross-hatching, Zip-A-Tone dot patterns, action lines, impact effects.',
},
SKETCH: {
reference: 'Pencil sketching, charcoal drawing, architectural rendering, fashion illustration',
lighting: 'Implied through hatching density and paper-white highlights, no color-based lighting.',
lens: 'Sketchbook page composition, slightly off-center, intimate and personal framing.',
color: 'Monochrome graphite gray scale, or limited sepia/sanguine warm tones, white paper dominant.',
texture: 'Visible pencil strokes, cross-hatching, smudged graphite, eraser marks, paper tooth texture.',
},
// === Ek Modern & Minimal ===
GLASSMORPHISM: {
reference: 'Apple iOS frosted glass, modern UI design, translucent material design',
lighting: 'Soft diffused backlight through frosted glass, colorful blurred background gradients.',
lens: 'Clean UI-like framing, centered elements, clear visual hierarchy.',
color: 'Frosted semi-transparent whites and pastels over vibrant gradient backgrounds.',
texture: 'Frosted glass blur, subtle border glow, drop shadows, backdrop-filter blur effect.',
},
NEON: {
reference: 'Tokyo Shinjuku at night, Las Vegas strip, neon sign art, luminous night photography',
lighting: 'Multiple neon tube light sources casting colored glows, reflections on wet surfaces and glass.',
lens: 'Night photography with wide aperture f/1.4, bokeh circles from background neon, slight motion blur.',
color: 'Electric neon palette — hot pink, electric blue, vivid green, purple, against deep black.',
texture: 'Wet street reflections, glass and chrome reflections, light bloom around neon tubes, night grain.',
},
CYBERPUNK: {
reference: 'Cyberpunk 2077, Ghost in the Shell, Akira, Blade Runner, William Gibson',
lighting: 'Neon-lit rain-soaked scenes, holographic projections, LED screens casting colored light.',
lens: 'Wide angle capturing dense urban environments, low angle looking up at megastructures.',
color: 'Toxic neon green, deep magenta, cyan HUD blue, Chrome silver, against polluted dark skies.',
texture: 'Rain droplets on lens, holographic glitch artifacts, digital noise, chrome and wet surfaces.',
},
STEAMPUNK: {
reference: 'Victorian-era mechanical fantasy, Jules Verne, H.G. Wells, Bioshock Infinite',
lighting: 'Warm gas lamp and candle light, brass reflections, furnace glow, London fog diffusion.',
lens: 'Period-appropriate framing with brass vignette edges, medium shots of intricate machinery.',
color: 'Brass, copper, aged leather brown, dark mahogany, forest green, ivory, sepia warmth.',
texture: 'Victorian ornate metalwork, riveted brass plates, leather straps, exposed gears, steam clouds.',
},
ABSTRACT: {
reference: 'Kandinsky, Mondrian, Jackson Pollock, Rothko, generative art, Processing/p5.js',
lighting: 'Non-representational — light as pure color fields, no physical light source.',
lens: 'No traditional perspective, flat or infinite depth, purely compositional framing.',
color: 'Pure color theory exploration — complementary, analogous, triadic harmonies, bold saturation.',
texture: 'Paint splatter, geometric precision, generative algorithmic patterns, or pure smooth gradients.',
},
// === Fotoğrafik ===
PRODUCT: {
reference: 'Apple product photography, premium e-commerce, studio packshot lighting',
lighting: 'Multi-light studio setup — key light from 45°, fill from opposite, rim light for edge separation, white seamless background.',
lens: 'Macro to medium shot, 100mm macro lens, f/8 deep focus on product, clean background.',
color: 'Neutral white or gradient background, products true colors faithfully rendered, no color cast.',
texture: 'Ultra-sharp material details, visible surface finishes (brushed metal, glass clarity, fabric weave), pristine.',
},
FASHION: {
reference: 'Vogue editorial, Annie Leibovitz portraits, high-fashion runway photography',
lighting: 'Dramatic fashion lighting — beauty dish from above, clamshell setup, or single Profoto strobe with modifier.',
lens: '85mm portrait lens with creamy f/1.8 bokeh, or 35mm editorial wide for environmental fashion.',
color: 'High-fashion color grading — lifted blacks, color-tinted shadows, editorial mood-specific palettes.',
texture: 'Fabric texture detail visible, skin retouching (frequency separation look), magazine-print quality.',
},
AERIAL: {
reference: 'Drone photography, DJI Mavic, National Geographic aerial, Google Earth perspective',
lighting: 'Natural sunlight from above with long shadows (golden hour ideal), atmospheric haze in distance.',
lens: 'Wide angle drone camera (24mm equivalent), deep focus f/5.6, straight-down or 45-degree angle.',
color: 'Vivid natural landscape colors enhanced — deep greens, turquoise water, golden sand, earth tones.',
texture: 'Ultra-sharp aerial detail, visible terrain texture, atmospheric perspective fading distant objects.',
},
MACRO: {
reference: 'Extreme close-up nature photography, Nikon Small World, scientific imaging',
lighting: 'Ring flash or dual macro flash for even close-up illumination, focus stacked lighting.',
lens: 'True macro 1:1 or greater magnification, 100mm macro lens, razor-thin DOF at f/2.8.',
color: 'Vivid detail colors invisible to naked eye, iridescent surfaces, micro-texture color variations.',
texture: 'Extreme detail — visible cell structures, surface micro-textures, water droplets, compound eye facets.',
},
PORTRAIT: {
reference: 'Annie Leibovitz, Peter Lindbergh, classic studio portraiture, environmental portraits',
lighting: 'Rembrandt or loop lighting from 45° camera-right, reflector fill from camera-left, hair light from behind.',
lens: '85mm f/1.4 portrait lens with beautiful circular bokeh, or 50mm for environmental context.',
color: 'Skin-tone-faithful rendering, warm and flattering, slight warm grade on highlights.',
texture: 'Natural skin texture (not over-smoothed), catch-light in eyes, subtle background separation.',
},
};
return dnaMap[videoStyle] || dnaMap['CINEMATIC'];
}
private parseAndValidateScript(rawText: string): GeneratedScript {
let parsed: GeneratedScript;
try {
let cleanText = rawText.trim();
if (cleanText.startsWith('```json')) cleanText = cleanText.slice(7);
if (cleanText.startsWith('```')) cleanText = cleanText.slice(3);
if (cleanText.endsWith('```')) cleanText = cleanText.slice(0, -3);
cleanText = cleanText.trim();
parsed = JSON.parse(cleanText);
} catch {
this.logger.error(`JSON parse hatası: ${rawText.substring(0, 500)}`);
throw new InternalServerErrorException(
'AI yanıtı geçerli JSON formatında değil.',
);
}
if (!parsed.metadata || !parsed.scenes || !Array.isArray(parsed.scenes)) {
throw new InternalServerErrorException('AI yanıtı beklenen yapıda değil.');
}
if (parsed.scenes.length < 2) {
throw new InternalServerErrorException('AI en az 2 sahne üretmelidir.');
}
for (const scene of parsed.scenes) {
if (!scene.narrationText || !scene.visualPrompt) {
throw new InternalServerErrorException(
`Sahne ${scene.order}: narrationText ve visualPrompt zorunludur.`,
);
}
if (!scene.durationSeconds || scene.durationSeconds < 1) scene.durationSeconds = 5;
if (!scene.subtitleText) scene.subtitleText = scene.narrationText;
if (!scene.transitionType) scene.transitionType = 'CUT';
}
if (!parsed.musicPrompt) {
parsed.musicPrompt = 'Cinematic orchestral, mysterious, 80 BPM, minor key, strings and piano, slow ethereal build';
}
if (!parsed.musicStyle) {
parsed.musicStyle = 'cinematic-orchestral';
}
if (!parsed.musicTechnical) {
parsed.musicTechnical = {
bpm: 80,
key: 'C minor',
instruments: ['strings', 'piano', 'brass'],
emotionalArc: 'calm-to-building-to-resolve',
};
}
if (!parsed.ambientSoundPrompts) {
parsed.ambientSoundPrompts = [];
}
if (!parsed.voiceStyle) {
parsed.voiceStyle = 'Deep, authoritative male voice, warm tone, measured pacing for data, slight dramatic pauses for reveals';
}
return parsed;
}
/**
* Tekil sahne yeniden üretimi — sınırlı bağlam ile sadece 1 sahne üretir.
*/
async generateSingleScene(contextPrompt: string): Promise<{
narrationText: string;
visualPrompt: string;
subtitleText: string;
durationSeconds: number;
}> {
if (!this.genAI) {
throw new InternalServerErrorException('AI servisi etkin değil — Google API Key gerekli.');
}
try {
const response = await this.genAI.models.generateContent({
model: this.modelName,
contents: contextPrompt,
config: {
responseMimeType: 'application/json',
temperature: 0.8,
maxOutputTokens: 1024,
},
});
const rawText = response.text || '';
const cleaned = rawText.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim();
const parsed = JSON.parse(cleaned);
return {
narrationText: parsed.narrationText || 'Yeniden üretilen sahne.',
visualPrompt: parsed.visualPrompt || 'Cinematic establishing shot.',
subtitleText: parsed.subtitleText || parsed.narrationText || '',
durationSeconds: parsed.durationSeconds || 5,
};
} catch (error) {
this.logger.error(`Tekil sahne üretim hatası: ${error}`);
throw new InternalServerErrorException('Sahne yeniden üretilemedi.');
}
}
}