diff --git a/src/modules/content-generation/services/x-to-markdown.service.ts b/src/modules/content-generation/services/x-to-markdown.service.ts index 812ef7a..b7836fc 100644 --- a/src/modules/content-generation/services/x-to-markdown.service.ts +++ b/src/modules/content-generation/services/x-to-markdown.service.ts @@ -1,4 +1,5 @@ -// X to Markdown Service - Converts X/Twitter posts to markdown using baoyu skill +// X to Markdown Service - Converts X/Twitter posts to markdown +// Uses baoyu skill as primary, FXTwitter API as fallback // Path: src/modules/content-generation/services/x-to-markdown.service.ts import { Injectable, Logger } from '@nestjs/common'; @@ -23,14 +24,6 @@ export interface XToMarkdownResult { export class XToMarkdownService { private readonly logger = new Logger(XToMarkdownService.name); - // Path to the skill scripts directory — resolve relative to project root - private readonly SKILL_DIR = path.resolve( - process.cwd(), - '../.agent/skills/baoyu-danger-x-to-markdown/scripts', - ); - - private readonly MAIN_SCRIPT = path.join(this.SKILL_DIR, 'main.ts'); - /** * Check if a URL is an X/Twitter URL */ @@ -52,7 +45,20 @@ export class XToMarkdownService { } /** - * Convert an X/Twitter URL to markdown using the baoyu skill + * Extract tweet ID and username from X URL + */ + private parseTweetUrl(url: string): { username: string; tweetId: string } | null { + // Match https://x.com//status/ or https://twitter.com//status/ + const match = url.match(/(?:x\.com|twitter\.com)\/([^\/]+)\/status\/(\d+)/i); + if (match) { + return { username: match[1], tweetId: match[2] }; + } + return null; + } + + /** + * Convert an X/Twitter URL to markdown + * Strategy: Try baoyu skill first, then FXTwitter API fallback */ async convertToMarkdown(url: string): Promise { if (!this.isXUrl(url)) { @@ -64,53 +70,74 @@ export class XToMarkdownService { }; } - // Check if skill script exists - if (!fs.existsSync(this.MAIN_SCRIPT)) { - this.logger.warn(`X-to-Markdown skill script not found at: ${this.MAIN_SCRIPT}`); - // Try alternative paths - const altPaths = [ - path.resolve(process.cwd(), '.agent/skills/baoyu-danger-x-to-markdown/scripts/main.ts'), - path.resolve(process.cwd(), '../../.agent/skills/baoyu-danger-x-to-markdown/scripts/main.ts'), - ]; - - let found = false; - for (const altPath of altPaths) { - if (fs.existsSync(altPath)) { - this.logger.log(`Found script at alternative path: ${altPath}`); - return this.runScript(altPath, url); - } - } - - if (!found) { - return { - success: false, - markdown: '', - url, - error: `Skill script not found. Searched: ${this.MAIN_SCRIPT}, ${altPaths.join(', ')}`, - }; - } + // Try baoyu skill script first + const scriptResult = await this.tryBaoyuScript(url); + if (scriptResult.success && scriptResult.markdown) { + return scriptResult; } - return this.runScript(this.MAIN_SCRIPT, url); + this.logger.warn(`Baoyu script failed or returned empty: ${scriptResult.error}. Trying FXTwitter fallback...`); + + // Fallback: FXTwitter API (public, no auth needed) + const fxResult = await this.tryFxTwitter(url); + if (fxResult.success) { + return fxResult; + } + + this.logger.warn(`FXTwitter also failed: ${fxResult.error}. Trying syndication API...`); + + // Fallback 2: Twitter syndication API + const synResult = await this.trySyndication(url); + if (synResult.success) { + return synResult; + } + + return { + success: false, + markdown: '', + url, + error: `All methods failed. Baoyu: ${scriptResult.error}. FXTwitter: ${fxResult.error}. Syndication: ${synResult.error}`, + }; } /** - * Run the baoyu skill script to convert a URL to markdown + * Try baoyu skill script */ - private async runScript(scriptPath: string, url: string): Promise { - try { - this.logger.log(`Converting X URL to markdown: ${url}`); + private async tryBaoyuScript(url: string): Promise { + // Find the skill script + const possiblePaths = [ + path.resolve(process.cwd(), '../.agent/skills/baoyu-danger-x-to-markdown/scripts/main.ts'), + path.resolve(process.cwd(), '.agent/skills/baoyu-danger-x-to-markdown/scripts/main.ts'), + path.resolve(process.cwd(), '../../.agent/skills/baoyu-danger-x-to-markdown/scripts/main.ts'), + ]; - // Use npx -y bun to run the TypeScript script + let scriptPath: string | null = null; + for (const p of possiblePaths) { + if (fs.existsSync(p)) { + scriptPath = p; + break; + } + } + + if (!scriptPath) { + return { + success: false, + markdown: '', + url, + error: 'Baoyu skill script not found', + }; + } + + try { + this.logger.log(`Running baoyu script: ${scriptPath}`); const { stdout, stderr } = await execFileAsync( 'npx', ['-y', 'bun', scriptPath, url, '--json'], { - timeout: 30000, // 30 second timeout - maxBuffer: 1024 * 1024 * 5, // 5MB + timeout: 30000, + maxBuffer: 1024 * 1024 * 5, env: { ...process.env, - // Pass X auth tokens if set in environment X_AUTH_TOKEN: process.env.X_AUTH_TOKEN || '', X_CT0: process.env.X_CT0 || '', }, @@ -118,50 +145,244 @@ export class XToMarkdownService { ); if (stderr) { - this.logger.warn(`X-to-Markdown stderr: ${stderr.substring(0, 500)}`); + this.logger.warn(`Baoyu stderr: ${stderr.substring(0, 300)}`); } - // Try to parse JSON output - try { - const result = JSON.parse(stdout.trim()); - this.logger.log(`Successfully converted X URL: ${url} (${result.markdown?.length || 0} chars)`); + // Check for "Failed to fetch thread" in stdout + if (stdout.includes('Failed to fetch thread') || stdout.includes('Failed to fetch')) { return { - success: true, - markdown: result.markdown || result.content || stdout, - url, - author: result.author, - tweetCount: result.tweetCount, - coverImage: result.coverImage, - }; - } catch { - // If not valid JSON, the output is the markdown itself - this.logger.log(`X-to-Markdown returned plain text (${stdout.length} chars)`); - return { - success: true, - markdown: stdout.trim(), + success: false, + markdown: '', url, + error: 'Baoyu script: Failed to fetch thread (API returned empty tweet_results)', }; } + + try { + const result = JSON.parse(stdout.trim()); + if (result.markdown || result.content) { + return { + success: true, + markdown: result.markdown || result.content || stdout, + url, + author: result.author, + tweetCount: result.tweetCount, + coverImage: result.coverImage, + }; + } + } catch { + // Check if output is actually markdown content + if (stdout.trim().length > 50 && !stdout.includes('Failed')) { + return { + success: true, + markdown: stdout.trim(), + url, + }; + } + } + + return { + success: false, + markdown: '', + url, + error: `Baoyu script returned no content: ${stdout.substring(0, 200)}`, + }; } catch (error: any) { - this.logger.error(`Failed to convert X URL: ${url}`, error.message); - - // If the script fails, try a simpler fallback approach - return this.fallbackScrape(url); + return { + success: false, + markdown: '', + url, + error: `Baoyu script error: ${error.message}`, + }; } } /** - * Fallback: If the baoyu script fails, try a basic scrape - * using the existing WebScraperService pattern + * Fallback: Use FXTwitter API (public, no auth required) + * https://github.com/FixTweet/FxTwitter */ - private async fallbackScrape(url: string): Promise { - this.logger.warn(`Using fallback scrape for X URL: ${url}`); - return { - success: false, - markdown: '', - url, - error: 'Script execution failed. X auth tokens may be needed. Set X_AUTH_TOKEN and X_CT0 environment variables.', - }; + private async tryFxTwitter(url: string): Promise { + const parsed = this.parseTweetUrl(url); + if (!parsed) { + return { success: false, markdown: '', url, error: 'Cannot parse tweet URL' }; + } + + try { + const apiUrl = `https://api.fxtwitter.com/${parsed.username}/status/${parsed.tweetId}`; + this.logger.log(`Trying FXTwitter API: ${apiUrl}`); + + const response = await fetch(apiUrl, { + headers: { + 'User-Agent': 'ContentHunter/1.0', + 'Accept': 'application/json', + }, + signal: AbortSignal.timeout(15000), + }); + + if (!response.ok) { + return { + success: false, + markdown: '', + url, + error: `FXTwitter API returned ${response.status}`, + }; + } + + const data = await response.json(); + const tweet = data?.tweet; + + if (!tweet || !tweet.text) { + return { + success: false, + markdown: '', + url, + error: 'FXTwitter returned no tweet content', + }; + } + + // Build markdown from FXTwitter response + const markdown = this.buildMarkdownFromFxTweet(tweet, url); + + return { + success: true, + markdown, + url, + author: tweet.author?.name + ? `${tweet.author.name} (@${tweet.author.screen_name})` + : `@${parsed.username}`, + tweetCount: 1, + coverImage: tweet.media?.photos?.[0]?.url || tweet.author?.avatar_url, + }; + } catch (error: any) { + return { + success: false, + markdown: '', + url, + error: `FXTwitter error: ${error.message}`, + }; + } + } + + /** + * Fallback 2: Use Twitter syndication API (embed data) + */ + private async trySyndication(url: string): Promise { + const parsed = this.parseTweetUrl(url); + if (!parsed) { + return { success: false, markdown: '', url, error: 'Cannot parse tweet URL' }; + } + + try { + const apiUrl = `https://cdn.syndication.twimg.com/tweet-result?id=${parsed.tweetId}&lang=en&token=x`; + this.logger.log(`Trying syndication API: ${apiUrl}`); + + const response = await fetch(apiUrl, { + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; ContentHunter/1.0)', + 'Accept': 'application/json', + }, + signal: AbortSignal.timeout(10000), + }); + + if (!response.ok) { + return { + success: false, + markdown: '', + url, + error: `Syndication API returned ${response.status}`, + }; + } + + const data = await response.json(); + if (!data?.text) { + return { + success: false, + markdown: '', + url, + error: 'Syndication API returned no text', + }; + } + + // Build markdown from syndication data + const author = data.user?.name + ? `${data.user.name} (@${data.user.screen_name})` + : `@${parsed.username}`; + + let markdown = `---\nurl: "${url}"\nauthor: "${author}"\n---\n\n`; + markdown += data.text; + + if (data.mediaDetails) { + markdown += '\n\n'; + for (const media of data.mediaDetails) { + if (media.type === 'photo') { + markdown += `![${media.ext_alt_text || 'image'}](${media.media_url_https})\n`; + } else if (media.type === 'video') { + const bestVariant = media.video_info?.variants + ?.filter((v: any) => v.content_type === 'video/mp4') + ?.sort((a: any, b: any) => (b.bitrate || 0) - (a.bitrate || 0))?.[0]; + if (bestVariant) { + markdown += `[Video](${bestVariant.url})\n`; + } + } + } + } + + return { + success: true, + markdown, + url, + author, + tweetCount: 1, + coverImage: data.mediaDetails?.[0]?.media_url_https || data.user?.profile_image_url_https, + }; + } catch (error: any) { + return { + success: false, + markdown: '', + url, + error: `Syndication error: ${error.message}`, + }; + } + } + + /** + * Build markdown from FXTwitter API response + */ + private buildMarkdownFromFxTweet(tweet: any, originalUrl: string): string { + const author = tweet.author?.name + ? `${tweet.author.name} (@${tweet.author.screen_name})` + : 'Unknown'; + + let md = `---\nurl: "${originalUrl}"\nauthor: "${author}"\ncreated_at: "${tweet.created_at || ''}"\nlikes: ${tweet.likes || 0}\nretweets: ${tweet.retweets || 0}\nreplies: ${tweet.replies || 0}\n---\n\n`; + + // Main text + md += tweet.text + '\n'; + + // Media + if (tweet.media?.photos?.length) { + md += '\n'; + for (const photo of tweet.media.photos) { + md += `![${photo.altText || 'image'}](${photo.url})\n`; + } + } + + if (tweet.media?.videos?.length) { + md += '\n'; + for (const video of tweet.media.videos) { + md += `[Video](${video.url})\n`; + } + } + + // Quote tweet + if (tweet.quote) { + const quoteAuthor = tweet.quote.author?.name + ? `${tweet.quote.author.name} (@${tweet.quote.author.screen_name})` + : 'Unknown'; + md += `\n> **${quoteAuthor}:**\n`; + md += `> ${tweet.quote.text?.replace(/\n/g, '\n> ')}\n`; + } + + return md; } /** @@ -169,7 +390,7 @@ export class XToMarkdownService { */ getMarkdownExcerpt(markdown: string, maxLength: number = 3000): string { if (!markdown) return ''; - + // Remove YAML front matter const fmMatch = markdown.match(/^---\n[\s\S]*?\n---\n/); let content = fmMatch ? markdown.slice(fmMatch[0].length) : markdown; diff --git a/x-to-markdown/NVIDIAAIDev/2036928009366540789/1.md b/x-to-markdown/NVIDIAAIDev/2036928009366540789/1.md new file mode 100644 index 0000000..fe7cf0f --- /dev/null +++ b/x-to-markdown/NVIDIAAIDev/2036928009366540789/1.md @@ -0,0 +1,62 @@ +--- +url: "https://x.com/NVIDIAAIDev/status/2036928009366540789" +requestedUrl: "https://x.com/NVIDIAAIDev/status/2036928009366540789" +author: "NVIDIA AI Developer (@NVIDIAAIDev)" +authorName: "NVIDIA AI Developer" +authorUsername: "NVIDIAAIDev" +authorUrl: "https://x.com/NVIDIAAIDev" +tweetCount: 6 +--- + +## 1 +https://x.com/NVIDIAAIDev/status/2036928009366540789 + +🧵 We are expanding our open model families—including Nemotron, Cosmos, and BioNeMo—to advance development in agentic AI, physical AI, healthcare, and more. 👇 https://t.co/I5mjQN2fft + +![video](https://pbs.twimg.com/media/HESfOeyWIAAqGF3.jpg) +[video](https://video.twimg.com/amplify_video/2036927885873684480/vid/avc1/1280x720/Swv4g6GGU6Mf7dnB.mp4?tag=14) + +## 2 +https://x.com/NVIDIAAIDev/status/2036928019747467631 + +Access six frontier open model families on @HuggingFace to build your next specialized AI applications. + +🤗 https://t.co/x6tMllfkzD https://t.co/FNWdDX3uAp + +![](https://pbs.twimg.com/media/HESfLUjWUAAtRIN.jpg) + +## 3 +https://x.com/NVIDIAAIDev/status/2036928025304936619 + +@huggingface Nemotron 3 Super is now a leading reasoning foundation for @OpenClaw 🦞 and complex agentic workflows, with 1.5M+ downloads in its first two weeks. + +🤗 https://t.co/DSSLIXYIDz https://t.co/CMn1nxLvcH + +![](https://pbs.twimg.com/media/HESfLh2WcAA7KM4.jpg) + +## 4 +https://x.com/NVIDIAAIDev/status/2036928028970807406 + +@huggingface @openclaw For top reasoning accuracy across understanding, coding, and math benchmarks, Nemotron 3 Ultra (coming soon) sets a new efficiency standard for open models. + +📰 https://t.co/zlDwhgcBY0 https://t.co/B8TigCuNp4 + +![](https://pbs.twimg.com/media/HESfLtcXEAEgnlW.jpg) + +## 5 +https://x.com/NVIDIAAIDev/status/2036928032565313694 + +@huggingface @openclaw You can leverage Nemotron frameworks and persona data to build sovereign AI models tailored to regional languages and local cultural values. + +🤗 https://t.co/RDC4Arh0JV https://t.co/aNbkg0GyCY + +![](https://pbs.twimg.com/media/HESfL5pWcAAYrp9.jpg) + +## 6 +https://x.com/NVIDIAAIDev/status/2036928036742762698 + +@huggingface @openclaw Access the full family of our open model weights and technical documentation on @HuggingFace to accelerate your specialized domain research. + +🤗 https://t.co/x6tMllfkzD https://t.co/VJaL0NgRSj + +![](https://pbs.twimg.com/media/HESfMIUW4AAgPuF.jpg) \ No newline at end of file