main
Some checks failed
Backend Deploy 🚀 / build-and-deploy (push) Has been cancelled

This commit is contained in:
Harun CAN
2026-03-23 14:14:52 +03:00
parent 9bd2b4a2dd
commit c1e081478c
7 changed files with 1371 additions and 126 deletions

View File

@@ -14,11 +14,13 @@ import { SeoService, FullSeoAnalysis as SeoDTO } from '../seo/seo.service';
import { NeuroMarketingService } from '../neuro-marketing/neuro-marketing.service';
import { StorageService } from '../visual-generation/services/storage.service';
import { VisualGenerationService } from '../visual-generation/visual-generation.service';
import { WebScraperService, ScrapedContent } from '../trends/services/web-scraper.service';
import { ContentType as PrismaContentType, ContentStatus as PrismaContentStatus, MasterContentType as PrismaMasterContentType } from '@prisma/client';
export interface ContentGenerationRequest {
topic: string;
sourceUrl?: string;
niche?: string;
platforms: Platform[];
includeResearch?: boolean;
@@ -76,6 +78,7 @@ export class ContentGenerationService {
private readonly neuroService: NeuroMarketingService,
private readonly storageService: StorageService,
private readonly visualService: VisualGenerationService,
private readonly webScraperService: WebScraperService,
) { }
@@ -87,6 +90,7 @@ export class ContentGenerationService {
async generateContent(request: ContentGenerationRequest): Promise<GeneratedContentBundle> {
const {
topic,
sourceUrl,
niche,
platforms,
includeResearch = true,
@@ -99,6 +103,26 @@ export class ContentGenerationService {
console.log(`[ContentGenerationService] Starting generation for topic: ${topic}, platforms: ${platforms.join(', ')}`);
// ========== STEP 1: Scrape source article if URL provided ==========
let scrapedSource: ScrapedContent | null = null;
if (sourceUrl) {
this.logger.log(`Scraping source article: ${sourceUrl}`);
try {
scrapedSource = await this.webScraperService.scrapeUrl(sourceUrl, {
extractImages: true,
extractLinks: true,
timeout: 15000,
}, topic);
if (scrapedSource) {
this.logger.log(`Scraped source: ${scrapedSource.wordCount} words, ${scrapedSource.images.length} images, ${scrapedSource.videoLinks.length} videos`);
} else {
this.logger.warn(`Failed to scrape source URL: ${sourceUrl}`);
}
} catch (err) {
this.logger.warn(`Source scraping error: ${err.message}`);
}
}
// Analyze niche if provided
let nicheAnalysis: NicheAnalysis | undefined;
if (niche) {
@@ -116,6 +140,23 @@ export class ContentGenerationService {
});
}
// ========== Build enriched context from scraped source ==========
let sourceContext = '';
if (scrapedSource) {
const articleText = scrapedSource.content.substring(0, 3000);
const videoInfo = scrapedSource.videoLinks.length > 0
? `\nVİDEO LİNKLERİ: ${scrapedSource.videoLinks.join(', ')}`
: '';
const importantLinks = scrapedSource.links
.filter(l => l.isExternal && !l.href.includes('facebook') && !l.href.includes('twitter'))
.slice(0, 5)
.map(l => `${l.text}: ${l.href}`)
.join('\n');
const linkInfo = importantLinks ? `\nÖNEMLİ LİNKLER:\n${importantLinks}` : '';
sourceContext = `\n\n📰 KAYNAK MAKALE İÇERİĞİ (ZORUNLU REFERANS):\n${articleText}${videoInfo}${linkInfo}\n\n⚠ ÖNEMLİ: Yukarıdaki kaynak makaledeki TÜM özneleri (kişi, ürün, oyun adları, tarihler, fiyatlar, markalar) habere dahil et. Hiçbir önemli bilgiyi atlama. Video linkleri ve önemli dış linkler varsa bunları da içerikte paylaş.`;
}
// Generate content for each platform using AI
const platformContent: GeneratedContent[] = [];
for (const platform of platforms) {
@@ -127,11 +168,13 @@ export class ContentGenerationService {
const sanitizedSummary = this.sanitizeResearchSummary(
research?.summary || `Everything you need to know about ${topic}`
);
// Append scraped source context to give AI the full article details
const enrichedSummary = sanitizedSummary + sourceContext;
// Normalize platform to lowercase for consistency
const normalizedPlatform = platform.toLowerCase();
const aiContent = await this.platformService.generateAIContent(
topic,
sanitizedSummary,
enrichedSummary,
normalizedPlatform as any, // Cast to any/Platform to resolve type mismatch if Platform is strict union
'standard',
'tr',
@@ -145,6 +188,9 @@ export class ContentGenerationService {
this.logger.warn(`AI Content is empty for ${platform}`);
}
// Use scraped image from source if available
const sourceImageUrl = scrapedSource?.images?.[0]?.src || undefined;
const config = this.platformService.getPlatformConfig(platform);
let content: GeneratedContent = {
platform,
@@ -163,10 +209,19 @@ export class ContentGenerationService {
content.content = voiceApplied.branded;
}
// Add hashtags if requested
// Add hashtags using AI (based on actual generated content)
if (includeHashtags) {
const hashtagSet = this.hashtagService.generateHashtags(topic, platform);
content.hashtags = hashtagSet.hashtags.map((h) => h.hashtag);
try {
content.hashtags = await this.platformService.generateAIHashtags(
content.content,
topic,
platform as any,
'tr',
);
} catch (hashErr) {
this.logger.warn(`AI hashtag generation failed, skipping: ${hashErr.message}`);
content.hashtags = [];
}
}
// Generate image for visual platforms
@@ -180,11 +235,31 @@ export class ContentGenerationService {
platform: platformKey,
enhancePrompt: true,
});
content.imageUrl = image.url;
this.logger.log(`Image generated for ${platform}: ${image.url}`);
// Check if image is a real image or just a placeholder
const isPlaceholder = image.url?.includes('placehold.co') || image.url?.includes('placeholder');
if (!isPlaceholder) {
content.imageUrl = image.url;
this.logger.log(`Image generated for ${platform}: ${image.url}`);
} else if (sourceImageUrl) {
// Use scraped source image instead of placeholder
content.imageUrl = sourceImageUrl;
this.logger.log(`Using scraped source image instead of placeholder: ${sourceImageUrl}`);
} else {
content.imageUrl = image.url;
this.logger.log(`Image generated for ${platform}: ${image.url} (placeholder, no source image available)`);
}
} catch (imgError) {
this.logger.warn(`Image generation failed for ${platform}, continuing without image`, imgError);
// Fallback to scraped source image
if (sourceImageUrl) {
content.imageUrl = sourceImageUrl;
this.logger.log(`Using scraped source image as fallback: ${sourceImageUrl}`);
}
}
} else if (sourceImageUrl && !content.imageUrl) {
// For non-visual platforms, still attach source image if available
content.imageUrl = sourceImageUrl;
}
platformContent.push(content);
@@ -358,7 +433,7 @@ export class ContentGenerationService {
userId: effectiveUserId!,
masterContentId: masterContent.id,
type: contentType,
title: `${bundle.topic} - ${platformContent.platform}`,
title: this.sanitizeResearchSummary(`${bundle.topic}`) + ` - ${platformContent.platform}`,
body: platformContent.content,
hashtags: platformContent.hashtags,
status: PrismaContentStatus.DRAFT,
@@ -548,6 +623,8 @@ KURALLAR:
6. Karakter limitini koru
7. Platformun tonuna uygun yaz
8. SADECE yayınlanacak metni yaz
9. Hiçbir haber sitesi, kaynak, ajans veya web sitesi adı kullanma
10. "...göre", "...haberlere göre", "...kaynağına göre" gibi atıf ifadeleri ASLA kullanma
SADECE yeniden yazılmış metni döndür, açıklama ekleme.`;
@@ -589,25 +666,43 @@ SADECE yeniden yazılmış metni döndür, açıklama ekleme.`;
sanitized = sanitized.replace(/https?:\/\/[^\s]+/gi, '');
sanitized = sanitized.replace(/www\.[^\s]+/gi, '');
// Remove common Turkish attribution phrases
// Remove common attribution phrases (Turkish and English)
const attributionPatterns = [
/\b\w+\.com(\.tr)?\b/gi,
/\b\w+\.org(\.tr)?\b/gi,
/\b\w+\.net(\.tr)?\b/gi,
/\bkaynağına göre\b/gi,
/\b'e göre\b/gi,
/\b'(i|a|e|u|ü|\u0131)n(da|de) (yayınlanan|yer alan|çıkan)\b/gi,
/\b(da|de) (çıkan|yayınlanan|yer alan) (haberlere|habere|bilgilere) göre\b/gi,
/\bhaberlere göre\b/gi,
/\braporuna göre\b/gi,
/\bsitesinde yer alan\b/gi,
/\baçıklamasına göre\b/gi,
/\byazısına göre\b/gi,
/\bhaberine göre\b/gi,
/\btarafından yapılan\b/gi,
/\baccording to [^,.]+/gi,
/\breported by [^,.]+/gi,
/\bas reported in [^,.]+/gi,
/\bsource:\s*[^,.]+/gi,
/\breferans:\s*[^,.]+/gi,
/\bkaynak:\s*[^,.]+/gi,
];
// Common Turkish tech/news source brands to strip
// Comprehensive list of Turkish tech/news source brands to strip
const sourceNames = [
'donanımhaber', 'technopat', 'webtekno', 'shiftdelete',
'tamindir', 'donanımhaber', 'technopat', 'webtekno', 'shiftdelete',
'chip online', 'log.com', 'mediatrend', 'bbc', 'cnn',
'reuters', 'anadolu ajansı', 'hürriyet', 'milliyet',
'sabah', 'forbes', 'bloomberg', 'techcrunch',
'the verge', 'engadget', 'ars technica', 'wired',
'mashable', 'gizmodo', 'tom\'s hardware', 'tom\'s guide',
'ntv', 'habertürk', 'sozcu', 'sözcü', 'cumhuriyet', 'star',
'posta', 'aksam', 'yeni safak', 'yeni şafak', 'takvim',
'mynet', 'ensonhaber', 'haber7', 'internethaber',
'ad hoc news', 'finanzen.net', 'der aktionär', 'aktionar',
'business insider', 'cnbc', 'financial times', 'wall street journal',
];
for (const pattern of attributionPatterns) {
@@ -615,12 +710,15 @@ SADECE yeniden yazılmış metni döndür, açıklama ekleme.`;
}
for (const source of sourceNames) {
const regex = new RegExp(`\\b${source}\\b`, 'gi');
const regex = new RegExp(`\\b${source.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'gi');
sanitized = sanitized.replace(regex, '');
}
// Clean up multiple spaces and trailing commas
sanitized = sanitized.replace(/\s{2,}/g, ' ').replace(/,\s*,/g, ',').trim();
// Also remove "- site_name" patterns from titles (e.g. "Great News - Tamindir")
sanitized = sanitized.replace(/\s*-\s*$/gm, '');
// Clean up multiple spaces, trailing commas, and orphaned punctuation
sanitized = sanitized.replace(/\s{2,}/g, ' ').replace(/,\s*,/g, ',').replace(/\s+([.,;:!?])/g, '$1').trim();
return sanitized;
}