Files
iddaai-be/src/modules/feeder/feeder-scraper.service.ts
T
2026-04-16 17:21:48 +03:00

747 lines
24 KiB
TypeScript
Executable File

/**
* Feeder Scraper Service - Senior Level Implementation
* HTTP requests with exact headers from working curl commands
*/
import { Injectable, Logger } from "@nestjs/common";
import axios, { AxiosInstance } from "axios";
import * as cheerio from "cheerio";
import {
Sport,
SPORTS_CONFIG,
DEFAULT_HEADERS,
DEFAULT_TIMEOUT,
KeyEventsResponse,
MatchStatsResponse,
GameStatsResponse,
ManagerResponse,
IddaaMarketsHtmlResponse,
BasketballBoxScoreResponse,
ParsedMatchHeader,
ParsedMarket,
ParsedSelection,
BasketballPlayerStats,
LivescoresApiResponse,
SidelinedResponse,
SidelinedTeamData,
SidelinedPlayer,
} from "./feeder.types";
@Injectable()
export class FeederScraperService {
private readonly logger = new Logger(FeederScraperService.name);
private readonly axios: AxiosInstance;
constructor() {
// Create axios instance with default config
this.axios = axios.create({
headers: DEFAULT_HEADERS,
timeout: DEFAULT_TIMEOUT,
});
// Add response interceptor for logging
this.axios.interceptors.response.use(
(response) => {
this.logger.debug(
`✅ [${response.config.url?.split("?")[0]}] Status: ${response.status}`,
);
return response;
},
(error) => {
const status = error.response?.status || "N/A";
const url = error.config?.url?.split("?")[0] || "Unknown";
this.logger.error(`❌ [${url}] Status: ${status} - ${error.message}`);
throw error;
},
);
}
// ============================================
// Historical source endpoint (match list)
// ============================================
async fetchLivescores(
dateString: string,
sport: Sport,
): Promise<LivescoresApiResponse> {
const { sportParam } = SPORTS_CONFIG[sport];
const url = `https://www.mackolik.com/perform/p0/ajax/components/competition/livescores/json`;
this.logger.log(
`📡 [${sport}] Fetching historical source snapshot for ${dateString}`,
);
const response = await this.axios.get(url, {
params: {
"sports[]": sportParam,
matchDate: dateString,
},
});
const payload = response.data as unknown;
if (
!payload ||
typeof payload !== "object" ||
!("status" in payload) ||
!("data" in payload)
) {
throw new Error("Historical source payload has invalid shape");
}
return payload as LivescoresApiResponse;
}
// ============================================
// MATCH HEADER (Score, Status, HT Score)
// ============================================
async fetchMatchHeader(matchId: string): Promise<ParsedMatchHeader> {
const url = `https://www.mackolik.com/perform/p0/ajax/components/match/matchHeader`;
this.logger.debug(`📡 [${matchId}] Fetching match header`);
const response = await this.axios.get(url, {
params: {
matchId,
sdapiLanguageCode: "tr-mk",
ajaxViewName: "match-details",
ajaxPartialViewName: "match-details-status",
displayMode: "all",
},
});
return this.parseMatchHeader(response.data.data?.html || "");
}
private parseMatchHeader(html: string): ParsedMatchHeader {
const $ = cheerio.load(html);
// Extract match-status from data attribute
const matchStatus =
($("[data-match-status]").attr("data-match-status") as any) || "postGame";
// Extract scores
const scoreHome = this.safeInt($('[data-slot="score-home"]').text().trim());
const scoreAway = this.safeInt($('[data-slot="score-away"]').text().trim());
// Extract HT score from detailed score (İY X - X)
let htScoreHome: number | null = null;
let htScoreAway: number | null = null;
const detailedScore = $(".p0c-soccer-match-details-header__detailed-score")
.text()
.trim();
const htMatch = detailedScore.match(/\(İY\s*(\d+)\s*-\s*(\d+)\)/);
if (htMatch) {
htScoreHome = parseInt(htMatch[1], 10);
htScoreAway = parseInt(htMatch[2], 10);
}
return { matchStatus, scoreHome, scoreAway, htScoreHome, htScoreAway };
}
// ============================================
// KEY EVENTS (Goals, Cards, Substitutes)
// ============================================
async fetchKeyEvents(
matchId: string,
): Promise<KeyEventsResponse["data"] | null> {
const url = `https://www.mackolik.com/ajax/football/key-events`;
this.logger.debug(`📡 [${matchId}] Fetching key events`);
try {
const response = await this.axios.get<KeyEventsResponse>(url, {
params: {
ajaxViewName: "events",
matchId,
seasonId: matchId, // Same as matchId
},
});
return response.data.data;
} catch (error: any) {
if (error.response?.status === 404) {
this.logger.warn(`[${matchId}] Key events not found (404)`);
return null;
}
throw error;
}
}
// ============================================
// MATCH STATS - STARTING FORMATION (İlk 11)
// ============================================
async fetchStartingFormation(
matchId: string,
): Promise<MatchStatsResponse["data"] | null> {
const url = `https://www.mackolik.com/ajax/football/match-stats`;
this.logger.debug(`📡 [${matchId}] Fetching starting formation`);
try {
const response = await this.axios.get<MatchStatsResponse>(url, {
params: {
ajaxViewName: "starting-formation",
matchId,
seasonId: matchId,
},
});
return response.data.data;
} catch (error: any) {
if (error.response?.status === 404) {
this.logger.warn(`[${matchId}] Starting formation not found (404)`);
return null;
}
throw error;
}
}
// ============================================
// MATCH STATS - SUBSTITUTIONS (Yedekler)
// ============================================
async fetchSubstitutions(
matchId: string,
): Promise<MatchStatsResponse["data"] | null> {
const url = `https://www.mackolik.com/ajax/football/match-stats`;
this.logger.debug(`📡 [${matchId}] Fetching substitutions`);
try {
const response = await this.axios.get<MatchStatsResponse>(url, {
params: {
ajaxViewName: "substitutions",
matchId,
seasonId: matchId,
},
});
return response.data.data;
} catch (error: any) {
if (error.response?.status === 404) {
this.logger.warn(`[${matchId}] Substitutions not found (404)`);
return null;
}
throw error;
}
}
// ============================================
// GAME STATS (Possession, Shots, Passes)
// ============================================
async fetchGameStats(
matchId: string,
): Promise<GameStatsResponse["data"] | null> {
const url = `https://www.mackolik.com/ajax/soccer/match/gameStats`;
this.logger.debug(`📡 [${matchId}] Fetching game stats`);
try {
const response = await this.axios.get<GameStatsResponse>(url, {
params: { matchId },
});
return response.data.data;
} catch (error: any) {
if (error.response?.status === 404) {
this.logger.warn(`[${matchId}] Game stats not found (404)`);
return null;
}
throw error;
}
}
// ============================================
// MANAGER
// ============================================
async fetchManager(matchId: string): Promise<ManagerResponse["data"] | null> {
const url = `https://www.mackolik.com/ajax/football/match-stats`;
this.logger.debug(`📡 [${matchId}] Fetching manager`);
try {
const response = await this.axios.get<ManagerResponse>(url, {
params: {
ajaxViewName: "manager",
matchId,
seasonId: matchId,
},
});
return response.data.data;
} catch (error: any) {
if (error.response?.status === 404) {
this.logger.warn(`[${matchId}] Manager not found (404)`);
return null;
}
throw error;
}
}
// ============================================
// IDDAA MARKETS (HTML with odds + names)
// ============================================
async fetchIddaaMarkets(matchId: string): Promise<ParsedMarket[]> {
const url = `https://www.mackolik.com/ajax/iddaa/markets/soccer/all/${matchId}`;
this.logger.debug(`📡 [${matchId}] Fetching iddaa markets`);
try {
const response = await this.axios.get<IddaaMarketsHtmlResponse>(url, {
params: { template: "all" },
});
return this.parseIddaaMarketsHtml(response.data.data?.html || "");
} catch (error: any) {
if (error.response?.status === 404) {
this.logger.warn(`[${matchId}] Iddaa markets not found (404)`);
return [];
}
throw error;
}
}
private parseIddaaMarketsHtml(html: string): ParsedMarket[] {
if (!html) return [];
const $ = cheerio.load(html);
const markets: ParsedMarket[] = [];
$(".widget-iddaa-markets__market-item").each((_, marketEl) => {
const $market = $(marketEl);
const marketId = $market.attr("data-market") || "";
const marketName = $market
.find(".widget-iddaa-markets__header-text")
.text()
.trim();
const iddaaCode = $market
.find(".widget-iddaa-markets__iddaa-code")
.text()
.trim();
const mbc = $market.find(".widget-iddaa-markets__mbc").text().trim();
const selections: ParsedSelection[] = [];
$market.find(".widget-iddaa-markets__option").each((_, optionEl) => {
const $option = $(optionEl);
selections.push({
shortcode: $option.attr("data-shortcode") || "",
outcomeNo: $option.attr("data-outcome-no") || "",
label: $option.find(".widget-iddaa-markets__label").text().trim(),
value: $option.find(".widget-iddaa-markets__value").text().trim(),
});
});
if (marketId && marketName) {
markets.push({ marketId, marketName, iddaaCode, mbc, selections });
}
});
this.logger.debug(`Parsed ${markets.length} iddaa markets`);
return markets;
}
// ============================================
// BASKETBALL BOX SCORE
// ============================================
async fetchBasketballBoxScore(
matchId: string,
): Promise<BasketballBoxScoreResponse["data"] | null> {
// Updated URL based on user request
const url = `https://www.mackolik.com/ajax/basketball/match/box-score`;
this.logger.debug(`📡 [${matchId}] Fetching basketball box score`);
try {
const response = await this.axios.get<BasketballBoxScoreResponse>(url, {
params: { matchId },
headers: {
"X-Requested-With": "XMLHttpRequest",
"User-Agent": DEFAULT_HEADERS["User-Agent"],
},
});
return response.data.data;
} catch (error: any) {
if (error.response?.status === 404) {
this.logger.warn(`[${matchId}] Basketball box score not found (404)`);
return null;
}
throw error;
}
}
parseBasketballBoxScore(html: string): {
teamTotals: any;
players: Partial<BasketballPlayerStats>[];
} {
if (!html) return { teamTotals: {}, players: [] };
const $ = cheerio.load(html);
const players: Partial<BasketballPlayerStats>[] = [];
// Parse individual players from widget rows
$(".widget-basketball-match-box-score__row").each((_, elem) => {
const row = $(elem);
// Skip if no player name found
const nameElem = row.find(".widget-basketball-match-box-score__player");
if (!nameElem.length) return;
const name = nameElem.text().trim();
// Indices based on User HTML:
// 0: Name, 1: Min, 2: Pts, 3: Reb, 4: Ast, 5: 2FG, 6: 3FG, 7: FT, 8: Fouls, 9: Blk, 10: Stl, 11: TO
const values = row.find("td");
// Check if it's a valid player row (should have enough columns)
if (values.length < 10) return;
// Extract ID from link if possible
let playerId = "";
const link = nameElem.find("a").attr("href");
if (link) {
playerId = this.extractPlayerIdFromUrl(link) || "";
}
players.push({
id: playerId, // Will be generated if empty later
name,
minutes: values.eq(1).text().trim(),
points: this.safeInt(values.eq(2).text().trim()) || 0,
rebounds: this.safeInt(values.eq(3).text().trim()) || 0,
assists: this.safeInt(values.eq(4).text().trim()) || 0,
fgMade: this.safeInt(values.eq(5).text().trim().split("/")[0]) || 0,
fgAttempted:
this.safeInt(values.eq(5).text().trim().split("/")[1]) || 0,
threePtMade:
this.safeInt(values.eq(6).text().trim().split("/")[0]) || 0,
threePtAttempted:
this.safeInt(values.eq(6).text().trim().split("/")[1]) || 0,
ftMade: this.safeInt(values.eq(7).text().trim().split("/")[0]) || 0,
ftAttempted:
this.safeInt(values.eq(7).text().trim().split("/")[1]) || 0,
fouls: this.safeInt(values.eq(8).text().trim()) || 0,
blocks: this.safeInt(values.eq(9).text().trim()) || 0,
steals: this.safeInt(values.eq(10).text().trim()) || 0,
turnovers: this.safeInt(values.eq(11).text().trim()) || 0,
});
});
// Parse Team Totals from Footer
const footerRow = $(".widget-basketball-match-box-score__footer td");
let teamTotals: any = {};
if (footerRow.length > 5) {
// Indices shift because first cells might be empty matchers
// usually index 2 matches Points column
teamTotals = {
points: this.safeInt(footerRow.eq(2).text().trim()) || 0,
rebounds: this.safeInt(footerRow.eq(3).text().trim()) || 0,
assists: this.safeInt(footerRow.eq(4).text().trim()) || 0,
fgMade: this.safeInt(footerRow.eq(5).text().trim().split("/")[0]) || 0,
fgAttempted:
this.safeInt(footerRow.eq(5).text().trim().split("/")[1]) || 0,
threePtMade:
this.safeInt(footerRow.eq(6).text().trim().split("/")[0]) || 0,
threePtAttempted:
this.safeInt(footerRow.eq(6).text().trim().split("/")[1]) || 0,
ftMade: this.safeInt(footerRow.eq(7).text().trim().split("/")[0]) || 0,
ftAttempted:
this.safeInt(footerRow.eq(7).text().trim().split("/")[1]) || 0,
fouls: this.safeInt(footerRow.eq(8).text().trim()) || 0,
blocks: this.safeInt(footerRow.eq(9).text().trim()) || 0,
steals: this.safeInt(footerRow.eq(10).text().trim()) || 0,
turnovers: this.safeInt(footerRow.eq(11).text().trim()) || 0,
};
}
return { teamTotals, players };
}
// ============================================
// MATCH PAGE (Main page for officials parsing)
// ============================================
async fetchMatchPage(
matchId: string,
matchSlug: string,
sport: Sport,
): Promise<string> {
const { iddaaUrlPath } = SPORTS_CONFIG[sport];
const url = `https://www.mackolik.com/${iddaaUrlPath}/${matchSlug}/${matchId}`;
this.logger.debug(`📡 [${matchId}] Fetching match page`);
// For HTML pages, we DON'T send X-Requested-With header
const response = await this.axios.get(url, {
headers: {
"User-Agent": DEFAULT_HEADERS["User-Agent"],
Referer: DEFAULT_HEADERS["Referer"],
"Accept-Language": DEFAULT_HEADERS["Accept-Language"],
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
// NO X-Requested-With for HTML pages!
},
});
return response.data;
}
// ============================================
// HELPER FUNCTIONS
// ============================================
private safeInt(value: string | undefined): number | null {
if (!value) return null;
const num = parseInt(value, 10);
return isNaN(num) ? null : num;
}
// ============================================
// BASKETBALL DETAILS HEADER (Quarter Scores)
// ============================================
async fetchBasketballDetailsHeader(matchId: string): Promise<any> {
const url = `https://www.mackolik.com/ajax/basketball/match/details-header`;
this.logger.debug(`📡 [${matchId}] Fetching basketball details header`);
try {
const response = await this.axios.get(url, {
params: { matchId },
headers: {
"X-Requested-With": "XMLHttpRequest",
"User-Agent": DEFAULT_HEADERS["User-Agent"],
},
});
if (response.data?.data?.views?.scoreDetails?.html) {
return this.parseBasketballDetailsHeader(
response.data.data.views.scoreDetails.html,
);
}
return null;
} catch (error: any) {
// 404 is acceptable
if (error.response?.status === 404) return null;
throw error;
}
}
private parseBasketballDetailsHeader(
html: string,
): { home: any; away: any } | null {
if (!html) return null;
const $ = cheerio.load(html);
const rows = $(
".widget-basketball-match-details-header__score-details tbody tr",
);
if (rows.length < 2) return null;
const parseRow = (row: any) => {
const cols = $(row).find("td");
// Format: TeamName, Q1, Q2, Q3, Q4, Final
// Values are inside .widget-basketball-match-details-header__score-part (just the quarter score)
// or direct text if simple table.
// User HTML shows: <span class="...score-part"> 33 </span>
const getScore = (index: number) => {
const cell = cols.eq(index);
const part = cell.find(
".widget-basketball-match-details-header__score-part",
);
const val = part.length ? part.text() : cell.text();
return this.safeInt(val.trim());
};
return {
q1: getScore(1),
q2: getScore(2),
q3: getScore(3),
q4: getScore(4),
// If there's OT, it would be column 5, and Final column 6?
// Standard 4 quarters: Col 1,2,3,4. Col 5 is Final.
// If 5 cols (+name), logic holds.
// Let's assume standard for now.
};
};
return {
home: parseRow(rows[0]),
away: parseRow(rows[1]),
};
}
// ============================================
// BASKETBALL MARKETS (Odds)
// ============================================
async fetchBasketballMarkets(matchId: string): Promise<ParsedMarket[]> {
// User provided URL structure: /ajax/iddaa/markets/basketball/all/{matchId}?template=all
const url = `https://www.mackolik.com/ajax/iddaa/markets/basketball/all/${matchId}`;
this.logger.debug(`📡 [${matchId}] Fetching basketball markets`);
try {
const response = await this.axios.get<IddaaMarketsHtmlResponse>(url, {
params: { template: "all" },
headers: {
"X-Requested-With": "XMLHttpRequest",
"User-Agent": DEFAULT_HEADERS["User-Agent"],
},
});
if (response.data?.data?.html) {
return this.parseIddaaMarketsHtml(response.data.data.html);
}
return [];
} catch (error: any) {
if (error.response?.status === 404) {
this.logger.warn(`[${matchId}] Basketball markets not found (404)`);
return [];
}
throw error;
}
}
extractPlayerIdFromUrl(url: string | undefined): string | null {
if (!url) return null;
const parts = url.split("/");
return parts[parts.length - 1] || null;
}
// ============================================
// SIDELINED PLAYERS (Injuries & Suspensions)
// ============================================
async fetchSidelinedPlayers(
matchId: string,
matchSlug: string,
): Promise<SidelinedResponse | null> {
const url = `https://www.mackolik.com/mac/${matchSlug}/${matchId}`;
this.logger.debug(`📡 [${matchId}] Fetching sidelined players`);
try {
const response = await this.axios.get(url, {
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
Referer: "https://www.mackolik.com",
},
timeout: 10000,
});
const $ = cheerio.load(response.data);
return {
homeTeam: this._parseSidelinedSection($, 0),
awayTeam: this._parseSidelinedSection($, 1),
};
} catch (error: any) {
if (error.response?.status === 404) {
this.logger.warn(`[${matchId}] Match page not found (404)`);
return null;
}
this.logger.warn(
`[${matchId}] Sidelined fetch warning: ${error.message}`,
);
return null;
}
}
private _parseSidelinedSection(
$: cheerio.CheerioAPI,
teamIndex: number,
): SidelinedTeamData {
const sidelinedWidgets = $(".widget-sidelined-players");
if (sidelinedWidgets.length <= teamIndex) {
return { teamName: "", teamId: "", totalSidelined: 0, players: [] };
}
const widget = sidelinedWidgets.eq(teamIndex);
const teamCrest = widget.find(".widget-sidelined-players__header-crest");
const teamCrestSrc = teamCrest.attr("src") || "";
const teamId = teamCrestSrc.split("/").pop() || "";
const teamName = widget
.find(".widget-sidelined-players__header-text")
.text()
.trim();
const players: SidelinedPlayer[] = [];
widget.find(".widget-sidelined-players__item").each((_, element) => {
const playerData = this._parsePlayerItem($, $(element));
if (playerData) {
players.push(playerData);
}
});
return {
teamName,
teamId,
totalSidelined: players.length,
players,
};
}
private _parsePlayerItem(
$: cheerio.CheerioAPI,
$item: cheerio.Cheerio<any>,
): SidelinedPlayer | null {
try {
const nameElem = $item.find(".widget-sidelined-players__name");
const playerName = nameElem.text().trim();
const playerUrl = nameElem.attr("href") || "";
const playerId = playerUrl.split("/").pop() || "";
const positionElem = $item.find(".widget-sidelined-players__position");
const position = positionElem.attr("title") || "";
const positionShort = positionElem.text().trim();
const reasonImg = $item.find(".widget-sidelined-players__reason img");
const reasonIcon = reasonImg.attr("src") || "";
const numbers = $item.find(".widget-sidelined-players__number");
// Use parseInt EXACTLY as in JS script (ignoring potential NaN for now, will handle via helper if needed but safer to stick to script logic first)
const matchesMissedText =
numbers.length > 0 ? numbers.eq(0).text().trim() : "";
const matchesMissed = matchesMissedText
? parseInt(matchesMissedText, 10)
: null;
const averageText = numbers.length > 1 ? numbers.eq(1).text().trim() : "";
const average = averageText ? parseInt(averageText, 10) : null;
const description = $item
.find(".widget-sidelined-players__value")
.text()
.trim();
const type = reasonIcon.includes("shortage_1.png")
? "injury"
: reasonIcon.includes("suspension")
? "suspension"
: "other";
return {
playerId,
playerName,
playerUrl: playerUrl.startsWith("http")
? playerUrl
: `https://www.mackolik.com${playerUrl}`,
position,
positionShort,
type,
description,
matchesMissed: isNaN(matchesMissed as number) ? null : matchesMissed,
average: isNaN(average as number) ? null : average,
reasonIcon: reasonIcon.startsWith("http")
? reasonIcon
: `https://www.mackolik.com${reasonIcon}`, // Keep safer URL construction but stick closer to logic
};
} catch {
return null;
}
}
}