/** * Feeder Scraper Service - Senior Level Implementation * HTTP requests with exact headers from working curl commands */ import { Injectable, Logger } from "@nestjs/common"; import axios, { AxiosInstance } from "axios"; import * as cheerio from "cheerio"; import { Sport, SPORTS_CONFIG, DEFAULT_HEADERS, DEFAULT_TIMEOUT, KeyEventsResponse, MatchStatsResponse, GameStatsResponse, ManagerResponse, IddaaMarketsHtmlResponse, BasketballBoxScoreResponse, ParsedMatchHeader, ParsedMarket, ParsedSelection, BasketballPlayerStats, LivescoresApiResponse, SidelinedResponse, SidelinedTeamData, SidelinedPlayer, } from "./feeder.types"; @Injectable() export class FeederScraperService { private readonly logger = new Logger(FeederScraperService.name); private readonly axios: AxiosInstance; constructor() { // Create axios instance with default config this.axios = axios.create({ headers: DEFAULT_HEADERS, timeout: DEFAULT_TIMEOUT, }); // Add response interceptor for logging this.axios.interceptors.response.use( (response) => { this.logger.debug( `✅ [${response.config.url?.split("?")[0]}] Status: ${response.status}`, ); return response; }, (error) => { const status = error.response?.status || "N/A"; const url = error.config?.url?.split("?")[0] || "Unknown"; this.logger.error(`❌ [${url}] Status: ${status} - ${error.message}`); throw error; }, ); } // ============================================ // Historical source endpoint (match list) // ============================================ async fetchLivescores( dateString: string, sport: Sport, ): Promise { const { sportParam } = SPORTS_CONFIG[sport]; const url = `https://www.mackolik.com/perform/p0/ajax/components/competition/livescores/json`; this.logger.log( `📡 [${sport}] Fetching historical source snapshot for ${dateString}`, ); const response = await this.axios.get(url, { params: { "sports[]": sportParam, matchDate: dateString, }, }); const payload = response.data as unknown; if ( !payload || typeof payload !== "object" || !("status" in payload) || !("data" in payload) ) { throw new Error("Historical source payload has invalid shape"); } return payload as LivescoresApiResponse; } // ============================================ // MATCH HEADER (Score, Status, HT Score) // ============================================ async fetchMatchHeader(matchId: string): Promise { const url = `https://www.mackolik.com/perform/p0/ajax/components/match/matchHeader`; this.logger.debug(`📡 [${matchId}] Fetching match header`); const response = await this.axios.get(url, { params: { matchId, sdapiLanguageCode: "tr-mk", ajaxViewName: "match-details", ajaxPartialViewName: "match-details-status", displayMode: "all", }, }); return this.parseMatchHeader(response.data.data?.html || ""); } private parseMatchHeader(html: string): ParsedMatchHeader { const $ = cheerio.load(html); // Extract match-status from data attribute const matchStatus = ($("[data-match-status]").attr("data-match-status") as any) || "postGame"; // Extract scores const scoreHome = this.safeInt($('[data-slot="score-home"]').text().trim()); const scoreAway = this.safeInt($('[data-slot="score-away"]').text().trim()); // Extract HT score from detailed score (İY X - X) let htScoreHome: number | null = null; let htScoreAway: number | null = null; const detailedScore = $(".p0c-soccer-match-details-header__detailed-score") .text() .trim(); const htMatch = detailedScore.match(/\(İY\s*(\d+)\s*-\s*(\d+)\)/); if (htMatch) { htScoreHome = parseInt(htMatch[1], 10); htScoreAway = parseInt(htMatch[2], 10); } return { matchStatus, scoreHome, scoreAway, htScoreHome, htScoreAway }; } // ============================================ // KEY EVENTS (Goals, Cards, Substitutes) // ============================================ async fetchKeyEvents( matchId: string, ): Promise { const url = `https://www.mackolik.com/ajax/football/key-events`; this.logger.debug(`📡 [${matchId}] Fetching key events`); try { const response = await this.axios.get(url, { params: { ajaxViewName: "events", matchId, seasonId: matchId, // Same as matchId }, }); return response.data.data; } catch (error: any) { if (error.response?.status === 404) { this.logger.warn(`[${matchId}] Key events not found (404)`); return null; } throw error; } } // ============================================ // MATCH STATS - STARTING FORMATION (İlk 11) // ============================================ async fetchStartingFormation( matchId: string, ): Promise { const url = `https://www.mackolik.com/ajax/football/match-stats`; this.logger.debug(`📡 [${matchId}] Fetching starting formation`); try { const response = await this.axios.get(url, { params: { ajaxViewName: "starting-formation", matchId, seasonId: matchId, }, }); return response.data.data; } catch (error: any) { if (error.response?.status === 404) { this.logger.warn(`[${matchId}] Starting formation not found (404)`); return null; } throw error; } } // ============================================ // MATCH STATS - SUBSTITUTIONS (Yedekler) // ============================================ async fetchSubstitutions( matchId: string, ): Promise { const url = `https://www.mackolik.com/ajax/football/match-stats`; this.logger.debug(`📡 [${matchId}] Fetching substitutions`); try { const response = await this.axios.get(url, { params: { ajaxViewName: "substitutions", matchId, seasonId: matchId, }, }); return response.data.data; } catch (error: any) { if (error.response?.status === 404) { this.logger.warn(`[${matchId}] Substitutions not found (404)`); return null; } throw error; } } // ============================================ // GAME STATS (Possession, Shots, Passes) // ============================================ async fetchGameStats( matchId: string, ): Promise { const url = `https://www.mackolik.com/ajax/soccer/match/gameStats`; this.logger.debug(`📡 [${matchId}] Fetching game stats`); try { const response = await this.axios.get(url, { params: { matchId }, }); return response.data.data; } catch (error: any) { if (error.response?.status === 404) { this.logger.warn(`[${matchId}] Game stats not found (404)`); return null; } throw error; } } // ============================================ // MANAGER // ============================================ async fetchManager(matchId: string): Promise { const url = `https://www.mackolik.com/ajax/football/match-stats`; this.logger.debug(`📡 [${matchId}] Fetching manager`); try { const response = await this.axios.get(url, { params: { ajaxViewName: "manager", matchId, seasonId: matchId, }, }); return response.data.data; } catch (error: any) { if (error.response?.status === 404) { this.logger.warn(`[${matchId}] Manager not found (404)`); return null; } throw error; } } // ============================================ // IDDAA MARKETS (HTML with odds + names) // ============================================ async fetchIddaaMarkets(matchId: string): Promise { const url = `https://www.mackolik.com/ajax/iddaa/markets/soccer/all/${matchId}`; this.logger.debug(`📡 [${matchId}] Fetching iddaa markets`); try { const response = await this.axios.get(url, { params: { template: "all" }, }); return this.parseIddaaMarketsHtml(response.data.data?.html || ""); } catch (error: any) { if (error.response?.status === 404) { this.logger.warn(`[${matchId}] Iddaa markets not found (404)`); return []; } throw error; } } private parseIddaaMarketsHtml(html: string): ParsedMarket[] { if (!html) return []; const $ = cheerio.load(html); const markets: ParsedMarket[] = []; $(".widget-iddaa-markets__market-item").each((_, marketEl) => { const $market = $(marketEl); const marketId = $market.attr("data-market") || ""; const marketName = $market .find(".widget-iddaa-markets__header-text") .text() .trim(); const iddaaCode = $market .find(".widget-iddaa-markets__iddaa-code") .text() .trim(); const mbc = $market.find(".widget-iddaa-markets__mbc").text().trim(); const selections: ParsedSelection[] = []; $market.find(".widget-iddaa-markets__option").each((_, optionEl) => { const $option = $(optionEl); selections.push({ shortcode: $option.attr("data-shortcode") || "", outcomeNo: $option.attr("data-outcome-no") || "", label: $option.find(".widget-iddaa-markets__label").text().trim(), value: $option.find(".widget-iddaa-markets__value").text().trim(), }); }); if (marketId && marketName) { markets.push({ marketId, marketName, iddaaCode, mbc, selections }); } }); this.logger.debug(`Parsed ${markets.length} iddaa markets`); return markets; } // ============================================ // BASKETBALL BOX SCORE // ============================================ async fetchBasketballBoxScore( matchId: string, ): Promise { // Updated URL based on user request const url = `https://www.mackolik.com/ajax/basketball/match/box-score`; this.logger.debug(`📡 [${matchId}] Fetching basketball box score`); try { const response = await this.axios.get(url, { params: { matchId }, headers: { "X-Requested-With": "XMLHttpRequest", "User-Agent": DEFAULT_HEADERS["User-Agent"], }, }); return response.data.data; } catch (error: any) { if (error.response?.status === 404) { this.logger.warn(`[${matchId}] Basketball box score not found (404)`); return null; } throw error; } } parseBasketballBoxScore(html: string): { teamTotals: any; players: Partial[]; } { if (!html) return { teamTotals: {}, players: [] }; const $ = cheerio.load(html); const players: Partial[] = []; // Parse individual players from widget rows $(".widget-basketball-match-box-score__row").each((_, elem) => { const row = $(elem); // Skip if no player name found const nameElem = row.find(".widget-basketball-match-box-score__player"); if (!nameElem.length) return; const name = nameElem.text().trim(); // Indices based on User HTML: // 0: Name, 1: Min, 2: Pts, 3: Reb, 4: Ast, 5: 2FG, 6: 3FG, 7: FT, 8: Fouls, 9: Blk, 10: Stl, 11: TO const values = row.find("td"); // Check if it's a valid player row (should have enough columns) if (values.length < 10) return; // Extract ID from link if possible let playerId = ""; const link = nameElem.find("a").attr("href"); if (link) { playerId = this.extractPlayerIdFromUrl(link) || ""; } players.push({ id: playerId, // Will be generated if empty later name, minutes: values.eq(1).text().trim(), points: this.safeInt(values.eq(2).text().trim()) || 0, rebounds: this.safeInt(values.eq(3).text().trim()) || 0, assists: this.safeInt(values.eq(4).text().trim()) || 0, fgMade: this.safeInt(values.eq(5).text().trim().split("/")[0]) || 0, fgAttempted: this.safeInt(values.eq(5).text().trim().split("/")[1]) || 0, threePtMade: this.safeInt(values.eq(6).text().trim().split("/")[0]) || 0, threePtAttempted: this.safeInt(values.eq(6).text().trim().split("/")[1]) || 0, ftMade: this.safeInt(values.eq(7).text().trim().split("/")[0]) || 0, ftAttempted: this.safeInt(values.eq(7).text().trim().split("/")[1]) || 0, fouls: this.safeInt(values.eq(8).text().trim()) || 0, blocks: this.safeInt(values.eq(9).text().trim()) || 0, steals: this.safeInt(values.eq(10).text().trim()) || 0, turnovers: this.safeInt(values.eq(11).text().trim()) || 0, }); }); // Parse Team Totals from Footer const footerRow = $(".widget-basketball-match-box-score__footer td"); let teamTotals: any = {}; if (footerRow.length > 5) { // Indices shift because first cells might be empty matchers // usually index 2 matches Points column teamTotals = { points: this.safeInt(footerRow.eq(2).text().trim()) || 0, rebounds: this.safeInt(footerRow.eq(3).text().trim()) || 0, assists: this.safeInt(footerRow.eq(4).text().trim()) || 0, fgMade: this.safeInt(footerRow.eq(5).text().trim().split("/")[0]) || 0, fgAttempted: this.safeInt(footerRow.eq(5).text().trim().split("/")[1]) || 0, threePtMade: this.safeInt(footerRow.eq(6).text().trim().split("/")[0]) || 0, threePtAttempted: this.safeInt(footerRow.eq(6).text().trim().split("/")[1]) || 0, ftMade: this.safeInt(footerRow.eq(7).text().trim().split("/")[0]) || 0, ftAttempted: this.safeInt(footerRow.eq(7).text().trim().split("/")[1]) || 0, fouls: this.safeInt(footerRow.eq(8).text().trim()) || 0, blocks: this.safeInt(footerRow.eq(9).text().trim()) || 0, steals: this.safeInt(footerRow.eq(10).text().trim()) || 0, turnovers: this.safeInt(footerRow.eq(11).text().trim()) || 0, }; } return { teamTotals, players }; } // ============================================ // MATCH PAGE (Main page for officials parsing) // ============================================ async fetchMatchPage( matchId: string, matchSlug: string, sport: Sport, ): Promise { const { iddaaUrlPath } = SPORTS_CONFIG[sport]; const url = `https://www.mackolik.com/${iddaaUrlPath}/${matchSlug}/${matchId}`; this.logger.debug(`📡 [${matchId}] Fetching match page`); // For HTML pages, we DON'T send X-Requested-With header const response = await this.axios.get(url, { headers: { "User-Agent": DEFAULT_HEADERS["User-Agent"], Referer: DEFAULT_HEADERS["Referer"], "Accept-Language": DEFAULT_HEADERS["Accept-Language"], Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", // NO X-Requested-With for HTML pages! }, }); return response.data; } // ============================================ // HELPER FUNCTIONS // ============================================ private safeInt(value: string | undefined): number | null { if (!value) return null; const num = parseInt(value, 10); return isNaN(num) ? null : num; } // ============================================ // BASKETBALL DETAILS HEADER (Quarter Scores) // ============================================ async fetchBasketballDetailsHeader(matchId: string): Promise { const url = `https://www.mackolik.com/ajax/basketball/match/details-header`; this.logger.debug(`📡 [${matchId}] Fetching basketball details header`); try { const response = await this.axios.get(url, { params: { matchId }, headers: { "X-Requested-With": "XMLHttpRequest", "User-Agent": DEFAULT_HEADERS["User-Agent"], }, }); if (response.data?.data?.views?.scoreDetails?.html) { return this.parseBasketballDetailsHeader( response.data.data.views.scoreDetails.html, ); } return null; } catch (error: any) { // 404 is acceptable if (error.response?.status === 404) return null; throw error; } } private parseBasketballDetailsHeader( html: string, ): { home: any; away: any } | null { if (!html) return null; const $ = cheerio.load(html); const rows = $( ".widget-basketball-match-details-header__score-details tbody tr", ); if (rows.length < 2) return null; const parseRow = (row: any) => { const cols = $(row).find("td"); // Format: TeamName, Q1, Q2, Q3, Q4, Final // Values are inside .widget-basketball-match-details-header__score-part (just the quarter score) // or direct text if simple table. // User HTML shows: 33 const getScore = (index: number) => { const cell = cols.eq(index); const part = cell.find( ".widget-basketball-match-details-header__score-part", ); const val = part.length ? part.text() : cell.text(); return this.safeInt(val.trim()); }; return { q1: getScore(1), q2: getScore(2), q3: getScore(3), q4: getScore(4), // If there's OT, it would be column 5, and Final column 6? // Standard 4 quarters: Col 1,2,3,4. Col 5 is Final. // If 5 cols (+name), logic holds. // Let's assume standard for now. }; }; return { home: parseRow(rows[0]), away: parseRow(rows[1]), }; } // ============================================ // BASKETBALL MARKETS (Odds) // ============================================ async fetchBasketballMarkets(matchId: string): Promise { // User provided URL structure: /ajax/iddaa/markets/basketball/all/{matchId}?template=all const url = `https://www.mackolik.com/ajax/iddaa/markets/basketball/all/${matchId}`; this.logger.debug(`📡 [${matchId}] Fetching basketball markets`); try { const response = await this.axios.get(url, { params: { template: "all" }, headers: { "X-Requested-With": "XMLHttpRequest", "User-Agent": DEFAULT_HEADERS["User-Agent"], }, }); if (response.data?.data?.html) { return this.parseIddaaMarketsHtml(response.data.data.html); } return []; } catch (error: any) { if (error.response?.status === 404) { this.logger.warn(`[${matchId}] Basketball markets not found (404)`); return []; } throw error; } } extractPlayerIdFromUrl(url: string | undefined): string | null { if (!url) return null; const parts = url.split("/"); return parts[parts.length - 1] || null; } // ============================================ // SIDELINED PLAYERS (Injuries & Suspensions) // ============================================ async fetchSidelinedPlayers( matchId: string, matchSlug: string, ): Promise { const url = `https://www.mackolik.com/mac/${matchSlug}/${matchId}`; this.logger.debug(`📡 [${matchId}] Fetching sidelined players`); try { const response = await this.axios.get(url, { headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7", Referer: "https://www.mackolik.com", }, timeout: 10000, }); const $ = cheerio.load(response.data); return { homeTeam: this._parseSidelinedSection($, 0), awayTeam: this._parseSidelinedSection($, 1), }; } catch (error: any) { if (error.response?.status === 404) { this.logger.warn(`[${matchId}] Match page not found (404)`); return null; } this.logger.warn( `[${matchId}] Sidelined fetch warning: ${error.message}`, ); return null; } } private _parseSidelinedSection( $: cheerio.CheerioAPI, teamIndex: number, ): SidelinedTeamData { const sidelinedWidgets = $(".widget-sidelined-players"); if (sidelinedWidgets.length <= teamIndex) { return { teamName: "", teamId: "", totalSidelined: 0, players: [] }; } const widget = sidelinedWidgets.eq(teamIndex); const teamCrest = widget.find(".widget-sidelined-players__header-crest"); const teamCrestSrc = teamCrest.attr("src") || ""; const teamId = teamCrestSrc.split("/").pop() || ""; const teamName = widget .find(".widget-sidelined-players__header-text") .text() .trim(); const players: SidelinedPlayer[] = []; widget.find(".widget-sidelined-players__item").each((_, element) => { const playerData = this._parsePlayerItem($, $(element)); if (playerData) { players.push(playerData); } }); return { teamName, teamId, totalSidelined: players.length, players, }; } private _parsePlayerItem( $: cheerio.CheerioAPI, $item: cheerio.Cheerio, ): SidelinedPlayer | null { try { const nameElem = $item.find(".widget-sidelined-players__name"); const playerName = nameElem.text().trim(); const playerUrl = nameElem.attr("href") || ""; const playerId = playerUrl.split("/").pop() || ""; const positionElem = $item.find(".widget-sidelined-players__position"); const position = positionElem.attr("title") || ""; const positionShort = positionElem.text().trim(); const reasonImg = $item.find(".widget-sidelined-players__reason img"); const reasonIcon = reasonImg.attr("src") || ""; const numbers = $item.find(".widget-sidelined-players__number"); // Use parseInt EXACTLY as in JS script (ignoring potential NaN for now, will handle via helper if needed but safer to stick to script logic first) const matchesMissedText = numbers.length > 0 ? numbers.eq(0).text().trim() : ""; const matchesMissed = matchesMissedText ? parseInt(matchesMissedText, 10) : null; const averageText = numbers.length > 1 ? numbers.eq(1).text().trim() : ""; const average = averageText ? parseInt(averageText, 10) : null; const description = $item .find(".widget-sidelined-players__value") .text() .trim(); const type = reasonIcon.includes("shortage_1.png") ? "injury" : reasonIcon.includes("suspension") ? "suspension" : "other"; return { playerId, playerName, playerUrl: playerUrl.startsWith("http") ? playerUrl : `https://www.mackolik.com${playerUrl}`, position, positionShort, type, description, matchesMissed: isNaN(matchesMissed as number) ? null : matchesMissed, average: isNaN(average as number) ? null : average, reasonIcon: reasonIcon.startsWith("http") ? reasonIcon : `https://www.mackolik.com${reasonIcon}`, // Keep safer URL construction but stick closer to logic }; } catch { return null; } } }