generated from fahricansecer/boilerplate-be
This commit is contained in:
@@ -14,11 +14,13 @@ import { SeoService, FullSeoAnalysis as SeoDTO } from '../seo/seo.service';
|
||||
import { NeuroMarketingService } from '../neuro-marketing/neuro-marketing.service';
|
||||
import { StorageService } from '../visual-generation/services/storage.service';
|
||||
import { VisualGenerationService } from '../visual-generation/visual-generation.service';
|
||||
import { WebScraperService, ScrapedContent } from '../trends/services/web-scraper.service';
|
||||
import { ContentType as PrismaContentType, ContentStatus as PrismaContentStatus, MasterContentType as PrismaMasterContentType } from '@prisma/client';
|
||||
|
||||
|
||||
export interface ContentGenerationRequest {
|
||||
topic: string;
|
||||
sourceUrl?: string;
|
||||
niche?: string;
|
||||
platforms: Platform[];
|
||||
includeResearch?: boolean;
|
||||
@@ -76,6 +78,7 @@ export class ContentGenerationService {
|
||||
private readonly neuroService: NeuroMarketingService,
|
||||
private readonly storageService: StorageService,
|
||||
private readonly visualService: VisualGenerationService,
|
||||
private readonly webScraperService: WebScraperService,
|
||||
) { }
|
||||
|
||||
|
||||
@@ -87,6 +90,7 @@ export class ContentGenerationService {
|
||||
async generateContent(request: ContentGenerationRequest): Promise<GeneratedContentBundle> {
|
||||
const {
|
||||
topic,
|
||||
sourceUrl,
|
||||
niche,
|
||||
platforms,
|
||||
includeResearch = true,
|
||||
@@ -99,6 +103,26 @@ export class ContentGenerationService {
|
||||
|
||||
console.log(`[ContentGenerationService] Starting generation for topic: ${topic}, platforms: ${platforms.join(', ')}`);
|
||||
|
||||
// ========== STEP 1: Scrape source article if URL provided ==========
|
||||
let scrapedSource: ScrapedContent | null = null;
|
||||
if (sourceUrl) {
|
||||
this.logger.log(`Scraping source article: ${sourceUrl}`);
|
||||
try {
|
||||
scrapedSource = await this.webScraperService.scrapeUrl(sourceUrl, {
|
||||
extractImages: true,
|
||||
extractLinks: true,
|
||||
timeout: 15000,
|
||||
}, topic);
|
||||
if (scrapedSource) {
|
||||
this.logger.log(`Scraped source: ${scrapedSource.wordCount} words, ${scrapedSource.images.length} images, ${scrapedSource.videoLinks.length} videos`);
|
||||
} else {
|
||||
this.logger.warn(`Failed to scrape source URL: ${sourceUrl}`);
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.warn(`Source scraping error: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Analyze niche if provided
|
||||
let nicheAnalysis: NicheAnalysis | undefined;
|
||||
if (niche) {
|
||||
@@ -116,6 +140,23 @@ export class ContentGenerationService {
|
||||
});
|
||||
}
|
||||
|
||||
// ========== Build enriched context from scraped source ==========
|
||||
let sourceContext = '';
|
||||
if (scrapedSource) {
|
||||
const articleText = scrapedSource.content.substring(0, 3000);
|
||||
const videoInfo = scrapedSource.videoLinks.length > 0
|
||||
? `\nVİDEO LİNKLERİ: ${scrapedSource.videoLinks.join(', ')}`
|
||||
: '';
|
||||
const importantLinks = scrapedSource.links
|
||||
.filter(l => l.isExternal && !l.href.includes('facebook') && !l.href.includes('twitter'))
|
||||
.slice(0, 5)
|
||||
.map(l => `${l.text}: ${l.href}`)
|
||||
.join('\n');
|
||||
const linkInfo = importantLinks ? `\nÖNEMLİ LİNKLER:\n${importantLinks}` : '';
|
||||
|
||||
sourceContext = `\n\n📰 KAYNAK MAKALE İÇERİĞİ (ZORUNLU REFERANS):\n${articleText}${videoInfo}${linkInfo}\n\n⚠️ ÖNEMLİ: Yukarıdaki kaynak makaledeki TÜM özneleri (kişi, ürün, oyun adları, tarihler, fiyatlar, markalar) habere dahil et. Hiçbir önemli bilgiyi atlama. Video linkleri ve önemli dış linkler varsa bunları da içerikte paylaş.`;
|
||||
}
|
||||
|
||||
// Generate content for each platform using AI
|
||||
const platformContent: GeneratedContent[] = [];
|
||||
for (const platform of platforms) {
|
||||
@@ -127,11 +168,13 @@ export class ContentGenerationService {
|
||||
const sanitizedSummary = this.sanitizeResearchSummary(
|
||||
research?.summary || `Everything you need to know about ${topic}`
|
||||
);
|
||||
// Append scraped source context to give AI the full article details
|
||||
const enrichedSummary = sanitizedSummary + sourceContext;
|
||||
// Normalize platform to lowercase for consistency
|
||||
const normalizedPlatform = platform.toLowerCase();
|
||||
const aiContent = await this.platformService.generateAIContent(
|
||||
topic,
|
||||
sanitizedSummary,
|
||||
enrichedSummary,
|
||||
normalizedPlatform as any, // Cast to any/Platform to resolve type mismatch if Platform is strict union
|
||||
'standard',
|
||||
'tr',
|
||||
@@ -145,6 +188,9 @@ export class ContentGenerationService {
|
||||
this.logger.warn(`AI Content is empty for ${platform}`);
|
||||
}
|
||||
|
||||
// Use scraped image from source if available
|
||||
const sourceImageUrl = scrapedSource?.images?.[0]?.src || undefined;
|
||||
|
||||
const config = this.platformService.getPlatformConfig(platform);
|
||||
let content: GeneratedContent = {
|
||||
platform,
|
||||
@@ -163,10 +209,19 @@ export class ContentGenerationService {
|
||||
content.content = voiceApplied.branded;
|
||||
}
|
||||
|
||||
// Add hashtags if requested
|
||||
// Add hashtags using AI (based on actual generated content)
|
||||
if (includeHashtags) {
|
||||
const hashtagSet = this.hashtagService.generateHashtags(topic, platform);
|
||||
content.hashtags = hashtagSet.hashtags.map((h) => h.hashtag);
|
||||
try {
|
||||
content.hashtags = await this.platformService.generateAIHashtags(
|
||||
content.content,
|
||||
topic,
|
||||
platform as any,
|
||||
'tr',
|
||||
);
|
||||
} catch (hashErr) {
|
||||
this.logger.warn(`AI hashtag generation failed, skipping: ${hashErr.message}`);
|
||||
content.hashtags = [];
|
||||
}
|
||||
}
|
||||
|
||||
// Generate image for visual platforms
|
||||
@@ -180,11 +235,31 @@ export class ContentGenerationService {
|
||||
platform: platformKey,
|
||||
enhancePrompt: true,
|
||||
});
|
||||
content.imageUrl = image.url;
|
||||
this.logger.log(`Image generated for ${platform}: ${image.url}`);
|
||||
|
||||
// Check if image is a real image or just a placeholder
|
||||
const isPlaceholder = image.url?.includes('placehold.co') || image.url?.includes('placeholder');
|
||||
if (!isPlaceholder) {
|
||||
content.imageUrl = image.url;
|
||||
this.logger.log(`Image generated for ${platform}: ${image.url}`);
|
||||
} else if (sourceImageUrl) {
|
||||
// Use scraped source image instead of placeholder
|
||||
content.imageUrl = sourceImageUrl;
|
||||
this.logger.log(`Using scraped source image instead of placeholder: ${sourceImageUrl}`);
|
||||
} else {
|
||||
content.imageUrl = image.url;
|
||||
this.logger.log(`Image generated for ${platform}: ${image.url} (placeholder, no source image available)`);
|
||||
}
|
||||
} catch (imgError) {
|
||||
this.logger.warn(`Image generation failed for ${platform}, continuing without image`, imgError);
|
||||
// Fallback to scraped source image
|
||||
if (sourceImageUrl) {
|
||||
content.imageUrl = sourceImageUrl;
|
||||
this.logger.log(`Using scraped source image as fallback: ${sourceImageUrl}`);
|
||||
}
|
||||
}
|
||||
} else if (sourceImageUrl && !content.imageUrl) {
|
||||
// For non-visual platforms, still attach source image if available
|
||||
content.imageUrl = sourceImageUrl;
|
||||
}
|
||||
|
||||
platformContent.push(content);
|
||||
@@ -358,7 +433,7 @@ export class ContentGenerationService {
|
||||
userId: effectiveUserId!,
|
||||
masterContentId: masterContent.id,
|
||||
type: contentType,
|
||||
title: `${bundle.topic} - ${platformContent.platform}`,
|
||||
title: this.sanitizeResearchSummary(`${bundle.topic}`) + ` - ${platformContent.platform}`,
|
||||
body: platformContent.content,
|
||||
hashtags: platformContent.hashtags,
|
||||
status: PrismaContentStatus.DRAFT,
|
||||
@@ -548,6 +623,8 @@ KURALLAR:
|
||||
6. Karakter limitini koru
|
||||
7. Platformun tonuna uygun yaz
|
||||
8. SADECE yayınlanacak metni yaz
|
||||
9. Hiçbir haber sitesi, kaynak, ajans veya web sitesi adı kullanma
|
||||
10. "...göre", "...haberlere göre", "...kaynağına göre" gibi atıf ifadeleri ASLA kullanma
|
||||
|
||||
SADECE yeniden yazılmış metni döndür, açıklama ekleme.`;
|
||||
|
||||
@@ -589,25 +666,43 @@ SADECE yeniden yazılmış metni döndür, açıklama ekleme.`;
|
||||
sanitized = sanitized.replace(/https?:\/\/[^\s]+/gi, '');
|
||||
sanitized = sanitized.replace(/www\.[^\s]+/gi, '');
|
||||
|
||||
// Remove common Turkish attribution phrases
|
||||
// Remove common attribution phrases (Turkish and English)
|
||||
const attributionPatterns = [
|
||||
/\b\w+\.com(\.tr)?\b/gi,
|
||||
/\b\w+\.org(\.tr)?\b/gi,
|
||||
/\b\w+\.net(\.tr)?\b/gi,
|
||||
/\bkaynağına göre\b/gi,
|
||||
/\b'e göre\b/gi,
|
||||
/\b'(i|a|e|u|ü|\u0131)n(da|de) (yayınlanan|yer alan|çıkan)\b/gi,
|
||||
/\b(da|de) (çıkan|yayınlanan|yer alan) (haberlere|habere|bilgilere) göre\b/gi,
|
||||
/\bhaberlere göre\b/gi,
|
||||
/\braporuna göre\b/gi,
|
||||
/\bsitesinde yer alan\b/gi,
|
||||
/\baçıklamasına göre\b/gi,
|
||||
/\byazısına göre\b/gi,
|
||||
/\bhaberine göre\b/gi,
|
||||
/\btarafından yapılan\b/gi,
|
||||
/\baccording to [^,.]+/gi,
|
||||
/\breported by [^,.]+/gi,
|
||||
/\bas reported in [^,.]+/gi,
|
||||
/\bsource:\s*[^,.]+/gi,
|
||||
/\breferans:\s*[^,.]+/gi,
|
||||
/\bkaynak:\s*[^,.]+/gi,
|
||||
];
|
||||
|
||||
// Common Turkish tech/news source brands to strip
|
||||
// Comprehensive list of Turkish tech/news source brands to strip
|
||||
const sourceNames = [
|
||||
'donanımhaber', 'technopat', 'webtekno', 'shiftdelete',
|
||||
'tamindir', 'donanımhaber', 'technopat', 'webtekno', 'shiftdelete',
|
||||
'chip online', 'log.com', 'mediatrend', 'bbc', 'cnn',
|
||||
'reuters', 'anadolu ajansı', 'hürriyet', 'milliyet',
|
||||
'sabah', 'forbes', 'bloomberg', 'techcrunch',
|
||||
'the verge', 'engadget', 'ars technica', 'wired',
|
||||
'mashable', 'gizmodo', 'tom\'s hardware', 'tom\'s guide',
|
||||
'ntv', 'habertürk', 'sozcu', 'sözcü', 'cumhuriyet', 'star',
|
||||
'posta', 'aksam', 'yeni safak', 'yeni şafak', 'takvim',
|
||||
'mynet', 'ensonhaber', 'haber7', 'internethaber',
|
||||
'ad hoc news', 'finanzen.net', 'der aktionär', 'aktionar',
|
||||
'business insider', 'cnbc', 'financial times', 'wall street journal',
|
||||
];
|
||||
|
||||
for (const pattern of attributionPatterns) {
|
||||
@@ -615,12 +710,15 @@ SADECE yeniden yazılmış metni döndür, açıklama ekleme.`;
|
||||
}
|
||||
|
||||
for (const source of sourceNames) {
|
||||
const regex = new RegExp(`\\b${source}\\b`, 'gi');
|
||||
const regex = new RegExp(`\\b${source.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'gi');
|
||||
sanitized = sanitized.replace(regex, '');
|
||||
}
|
||||
|
||||
// Clean up multiple spaces and trailing commas
|
||||
sanitized = sanitized.replace(/\s{2,}/g, ' ').replace(/,\s*,/g, ',').trim();
|
||||
// Also remove "- site_name" patterns from titles (e.g. "Great News - Tamindir")
|
||||
sanitized = sanitized.replace(/\s*-\s*$/gm, '');
|
||||
|
||||
// Clean up multiple spaces, trailing commas, and orphaned punctuation
|
||||
sanitized = sanitized.replace(/\s{2,}/g, ' ').replace(/,\s*,/g, ',').replace(/\s+([.,;:!?])/g, '$1').trim();
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user