main

2026-03-23 14:14:52 +03:00
parent 9bd2b4a2dd
commit c1e081478c
7 changed files with 1371 additions and 126 deletions
--- a/src/modules/content-generation/content-generation.service.ts
+++ b/src/modules/content-generation/content-generation.service.ts
@@ -14,11 +14,13 @@ import { SeoService, FullSeoAnalysis as SeoDTO } from '../seo/seo.service';
 import { NeuroMarketingService } from '../neuro-marketing/neuro-marketing.service';
 import { StorageService } from '../visual-generation/services/storage.service';
 import { VisualGenerationService } from '../visual-generation/visual-generation.service';
+import { WebScraperService, ScrapedContent } from '../trends/services/web-scraper.service';
 import { ContentType as PrismaContentType, ContentStatus as PrismaContentStatus, MasterContentType as PrismaMasterContentType } from '@prisma/client';


 export interface ContentGenerationRequest {
    topic: string;
+    sourceUrl?: string;
    niche?: string;
    platforms: Platform[];
    includeResearch?: boolean;
@@ -76,6 +78,7 @@ export class ContentGenerationService {
        private readonly neuroService: NeuroMarketingService,
        private readonly storageService: StorageService,
        private readonly visualService: VisualGenerationService,
+        private readonly webScraperService: WebScraperService,
    ) { }


@@ -87,6 +90,7 @@ export class ContentGenerationService {
    async generateContent(request: ContentGenerationRequest): Promise<GeneratedContentBundle> {
        const {
            topic,
+            sourceUrl,
            niche,
            platforms,
            includeResearch = true,
@@ -99,6 +103,26 @@ export class ContentGenerationService {

        console.log(`[ContentGenerationService] Starting generation for topic: ${topic}, platforms: ${platforms.join(', ')}`);

+        // ========== STEP 1: Scrape source article if URL provided ==========
+        let scrapedSource: ScrapedContent | null = null;
+        if (sourceUrl) {
+            this.logger.log(`Scraping source article: ${sourceUrl}`);
+            try {
+                scrapedSource = await this.webScraperService.scrapeUrl(sourceUrl, {
+                    extractImages: true,
+                    extractLinks: true,
+                    timeout: 15000,
+                }, topic);
+                if (scrapedSource) {
+                    this.logger.log(`Scraped source: ${scrapedSource.wordCount} words, ${scrapedSource.images.length} images, ${scrapedSource.videoLinks.length} videos`);
+                } else {
+                    this.logger.warn(`Failed to scrape source URL: ${sourceUrl}`);
+                }
+            } catch (err) {
+                this.logger.warn(`Source scraping error: ${err.message}`);
+            }
+        }
+
        // Analyze niche if provided
        let nicheAnalysis: NicheAnalysis | undefined;
        if (niche) {
@@ -116,6 +140,23 @@ export class ContentGenerationService {
            });
        }

+        // ========== Build enriched context from scraped source ==========
+        let sourceContext = '';
+        if (scrapedSource) {
+            const articleText = scrapedSource.content.substring(0, 3000);
+            const videoInfo = scrapedSource.videoLinks.length > 0
+                ? `\nVİDEO LİNKLERİ: ${scrapedSource.videoLinks.join(', ')}`
+                : '';
+            const importantLinks = scrapedSource.links
+                .filter(l => l.isExternal && !l.href.includes('facebook') && !l.href.includes('twitter'))
+                .slice(0, 5)
+                .map(l => `${l.text}: ${l.href}`)
+                .join('\n');
+            const linkInfo = importantLinks ? `\nÖNEMLİ LİNKLER:\n${importantLinks}` : '';
+
+            sourceContext = `\n\n📰 KAYNAK MAKALE İÇERİĞİ (ZORUNLU REFERANS):\n${articleText}${videoInfo}${linkInfo}\n\n⚠️ ÖNEMLİ: Yukarıdaki kaynak makaledeki TÜM özneleri (kişi, ürün, oyun adları, tarihler, fiyatlar, markalar) habere dahil et. Hiçbir önemli bilgiyi atlama. Video linkleri ve önemli dış linkler varsa bunları da içerikte paylaş.`;
+        }
+
        // Generate content for each platform using AI
        const platformContent: GeneratedContent[] = [];
        for (const platform of platforms) {
@@ -127,11 +168,13 @@ export class ContentGenerationService {
                const sanitizedSummary = this.sanitizeResearchSummary(
                    research?.summary || `Everything you need to know about ${topic}`
                );
+                // Append scraped source context to give AI the full article details
+                const enrichedSummary = sanitizedSummary + sourceContext;
                // Normalize platform to lowercase for consistency
                const normalizedPlatform = platform.toLowerCase();
                const aiContent = await this.platformService.generateAIContent(
                    topic,
-                    sanitizedSummary,
+                    enrichedSummary,
                    normalizedPlatform as any, // Cast to any/Platform to resolve type mismatch if Platform is strict union
                    'standard',
                    'tr',
@@ -145,6 +188,9 @@ export class ContentGenerationService {
                    this.logger.warn(`AI Content is empty for ${platform}`);
                }

+                // Use scraped image from source if available
+                const sourceImageUrl = scrapedSource?.images?.[0]?.src || undefined;
+
                const config = this.platformService.getPlatformConfig(platform);
                let content: GeneratedContent = {
                    platform,
@@ -163,10 +209,19 @@ export class ContentGenerationService {
                    content.content = voiceApplied.branded;
                }

-                // Add hashtags if requested
+                // Add hashtags using AI (based on actual generated content)
                if (includeHashtags) {
-                    const hashtagSet = this.hashtagService.generateHashtags(topic, platform);
-                    content.hashtags = hashtagSet.hashtags.map((h) => h.hashtag);
+                    try {
+                        content.hashtags = await this.platformService.generateAIHashtags(
+                            content.content,
+                            topic,
+                            platform as any,
+                            'tr',
+                        );
+                    } catch (hashErr) {
+                        this.logger.warn(`AI hashtag generation failed, skipping: ${hashErr.message}`);
+                        content.hashtags = [];
+                    }
                }

                // Generate image for visual platforms
@@ -180,11 +235,31 @@ export class ContentGenerationService {
                            platform: platformKey,
                            enhancePrompt: true,
                        });
-                        content.imageUrl = image.url;
-                        this.logger.log(`Image generated for ${platform}: ${image.url}`);
+                        
+                        // Check if image is a real image or just a placeholder
+                        const isPlaceholder = image.url?.includes('placehold.co') || image.url?.includes('placeholder');
+                        if (!isPlaceholder) {
+                            content.imageUrl = image.url;
+                            this.logger.log(`Image generated for ${platform}: ${image.url}`);
+                        } else if (sourceImageUrl) {
+                            // Use scraped source image instead of placeholder
+                            content.imageUrl = sourceImageUrl;
+                            this.logger.log(`Using scraped source image instead of placeholder: ${sourceImageUrl}`);
+                        } else {
+                            content.imageUrl = image.url;
+                            this.logger.log(`Image generated for ${platform}: ${image.url} (placeholder, no source image available)`);
+                        }
                    } catch (imgError) {
                        this.logger.warn(`Image generation failed for ${platform}, continuing without image`, imgError);
+                        // Fallback to scraped source image
+                        if (sourceImageUrl) {
+                            content.imageUrl = sourceImageUrl;
+                            this.logger.log(`Using scraped source image as fallback: ${sourceImageUrl}`);
+                        }
                    }
+                } else if (sourceImageUrl && !content.imageUrl) {
+                    // For non-visual platforms, still attach source image if available
+                    content.imageUrl = sourceImageUrl;
                }

                platformContent.push(content);
@@ -358,7 +433,7 @@ export class ContentGenerationService {
                            userId: effectiveUserId!,
                            masterContentId: masterContent.id,
                            type: contentType,
-                            title: `${bundle.topic} - ${platformContent.platform}`,
+                            title: this.sanitizeResearchSummary(`${bundle.topic}`) + ` - ${platformContent.platform}`,
                            body: platformContent.content,
                            hashtags: platformContent.hashtags,
                            status: PrismaContentStatus.DRAFT,
@@ -548,6 +623,8 @@ KURALLAR:
 6. Karakter limitini koru
 7. Platformun tonuna uygun yaz
 8. SADECE yayınlanacak metni yaz
+9. Hiçbir haber sitesi, kaynak, ajans veya web sitesi adı kullanma
+10. "...göre", "...haberlere göre", "...kaynağına göre" gibi atıf ifadeleri ASLA kullanma

 SADECE yeniden yazılmış metni döndür, açıklama ekleme.`;

@@ -589,25 +666,43 @@ SADECE yeniden yazılmış metni döndür, açıklama ekleme.`;
        sanitized = sanitized.replace(/https?:\/\/[^\s]+/gi, '');
        sanitized = sanitized.replace(/www\.[^\s]+/gi, '');

-        // Remove common Turkish attribution phrases
+        // Remove common attribution phrases (Turkish and English)
        const attributionPatterns = [
            /\b\w+\.com(\.tr)?\b/gi,
            /\b\w+\.org(\.tr)?\b/gi,
            /\b\w+\.net(\.tr)?\b/gi,
            /\bkaynağına göre\b/gi,
            /\b'e göre\b/gi,
+            /\b'(i|a|e|u|ü|\u0131)n(da|de) (yayınlanan|yer alan|çıkan)\b/gi,
+            /\b(da|de) (çıkan|yayınlanan|yer alan) (haberlere|habere|bilgilere) göre\b/gi,
+            /\bhaberlere göre\b/gi,
+            /\braporuna göre\b/gi,
+            /\bsitesinde yer alan\b/gi,
+            /\baçıklamasına göre\b/gi,
+            /\byazısına göre\b/gi,
+            /\bhaberine göre\b/gi,
+            /\btarafından yapılan\b/gi,
            /\baccording to [^,.]+/gi,
+            /\breported by [^,.]+/gi,
+            /\bas reported in [^,.]+/gi,
            /\bsource:\s*[^,.]+/gi,
            /\breferans:\s*[^,.]+/gi,
            /\bkaynak:\s*[^,.]+/gi,
        ];

-        // Common Turkish tech/news source brands to strip
+        // Comprehensive list of Turkish tech/news source brands to strip
        const sourceNames = [
-            'donanımhaber', 'technopat', 'webtekno', 'shiftdelete',
+            'tamindir', 'donanımhaber', 'technopat', 'webtekno', 'shiftdelete',
            'chip online', 'log.com', 'mediatrend', 'bbc', 'cnn',
            'reuters', 'anadolu ajansı', 'hürriyet', 'milliyet',
            'sabah', 'forbes', 'bloomberg', 'techcrunch',
+            'the verge', 'engadget', 'ars technica', 'wired',
+            'mashable', 'gizmodo', 'tom\'s hardware', 'tom\'s guide',
+            'ntv', 'habertürk', 'sozcu', 'sözcü', 'cumhuriyet', 'star',
+            'posta', 'aksam', 'yeni safak', 'yeni şafak', 'takvim',
+            'mynet', 'ensonhaber', 'haber7', 'internethaber',
+            'ad hoc news', 'finanzen.net', 'der aktionär', 'aktionar',
+            'business insider', 'cnbc', 'financial times', 'wall street journal',
        ];

        for (const pattern of attributionPatterns) {
@@ -615,12 +710,15 @@ SADECE yeniden yazılmış metni döndür, açıklama ekleme.`;
        }

        for (const source of sourceNames) {
-            const regex = new RegExp(`\\b${source}\\b`, 'gi');
+            const regex = new RegExp(`\\b${source.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'gi');
            sanitized = sanitized.replace(regex, '');
        }

-        // Clean up multiple spaces and trailing commas
-        sanitized = sanitized.replace(/\s{2,}/g, ' ').replace(/,\s*,/g, ',').trim();
+        // Also remove "- site_name" patterns from titles (e.g. "Great News - Tamindir")
+        sanitized = sanitized.replace(/\s*-\s*$/gm, '');
+
+        // Clean up multiple spaces, trailing commas, and orphaned punctuation
+        sanitized = sanitized.replace(/\s{2,}/g, ' ').replace(/,\s*,/g, ',').replace(/\s+([.,;:!?])/g, '$1').trim();

        return sanitized;
    }