Source code for vlrdevapi.events.matches

"""Event matches functionality."""

from __future__ import annotations

import datetime
from urllib import parse
from bs4 import BeautifulSoup

from .models import Match, MatchTeam
from ..config import get_config
from ..countries import map_country_code
from ..fetcher import fetch_html, batch_fetch_html
from ..exceptions import NetworkError
from ..utils import (
    extract_text,
    extract_id_from_url,
    extract_country_code,
    parse_date,
    parse_int,
)

_config = get_config()


def _get_match_team_ids_batch(match_ids: list[int], timeout: float, max_workers: int = 4) -> dict[int, tuple[int | None, int | None]]:
    """Get team IDs for multiple matches concurrently.
    
    Args:
        match_ids: List of match IDs
        timeout: Request timeout
        max_workers: Number of concurrent workers
    
    Returns:
        Dictionary mapping match_id to (team1_id, team2_id)
    """
    if not match_ids:
        return {}
    
    # Build URLs for all match pages
    urls = [f"{_config.vlr_base}/{match_id}" for match_id in match_ids]
    
    # Fetch all match pages concurrently
    results = batch_fetch_html(urls, timeout=timeout, max_workers=max_workers)
    
    # Parse team IDs from each page
    team_ids_map: dict[int, tuple[int | None, int | None]] = {}
    
    for match_id, url in zip(match_ids, urls):
        content = results.get(url)
        if isinstance(content, Exception) or not content:
            team_ids_map[match_id] = (None, None)
            continue
        
        try:
            soup = BeautifulSoup(content, "lxml")
            # Prefer header team links; fallback to any two distinct team links on the page
            team_links = soup.select(".match-header-link-name a[href*='/team/']")
            if len(team_links) < 2:
                # Fallback selectors seen across site variations
                team_links = soup.select(".match-header a[href*='/team/']") or soup.select("a[href*='/team/']")

            # Deduplicate by team URL while preserving order
            seen_hrefs: set[str] = set()
            unique_links: list = []
            for a in team_links:
                href = a.get("href")
                href_str = href if isinstance(href, str) else ""
                if "/team/" in href_str and href_str not in seen_hrefs:
                    seen_hrefs.add(href_str)
                    unique_links.append(href_str)
                if len(unique_links) >= 2:
                    break

            team1_id = extract_id_from_url(unique_links[0], "team") if len(unique_links) >= 1 else None
            team2_id = extract_id_from_url(unique_links[1], "team") if len(unique_links) >= 2 else None

            team_ids_map[match_id] = (team1_id, team2_id)
        except Exception:
            team_ids_map[match_id] = (None, None)
    
    return team_ids_map



[docs]
def matches(event_id: int, stage: str | None = None, limit: int | None = None, timeout: float | None = None) -> list[Match]:
    """
    Get event matches with team IDs.
    
    Args:
        event_id: Event ID
        stage: Stage filter (optional)
        limit: Maximum number of matches to return (optional)
        timeout: Request timeout in seconds
    
    Returns:
        List of event matches with team IDs extracted from match pages
    
    Example:
        >>> import vlrdevapi as vlr
        >>> matches = vlr.events.matches(event_id=123, limit=20)
        >>> for match in matches:
        ...     print(f"{match.teams[0].name} (ID: {match.teams[0].id}) vs {match.teams[1].name} (ID: {match.teams[1].id})")
    """
    url = f"{_config.vlr_base}/event/matches/{event_id}"
    effective_timeout = timeout if timeout is not None else _config.default_timeout
    try:
        html = fetch_html(url, effective_timeout)
    except NetworkError:
        return []
    
    soup = BeautifulSoup(html, "lxml")

    # If a stage is provided, find the corresponding stage link and refetch the page
    if stage:
        # Collect stage options from dropdown
        dropdown = soup.select_one("span.wf-dropdown.mod-all")
        options: list = dropdown.select("a") if dropdown else []
        stage_map: dict[str, str] = {}
        for a in options:
            text = (extract_text(a) or "").strip()
            href = a.get("href")
            if not href or not isinstance(href, str):
                continue
            # Normalize text for matching
            key = text.lower()
            stage_map[key] = parse.urljoin(f"{_config.vlr_base}/", href.lstrip("/"))
        # Try to match requested stage (case-insensitive)
        target = stage.strip().lower()
        stage_url = stage_map.get(target)
        if stage_url:
            try:
                html = fetch_html(stage_url, effective_timeout)
                soup = BeautifulSoup(html, "lxml")
            except NetworkError:
                return []
    match_data: list[tuple[int, str, list[MatchTeam], str, str, str, datetime.date | None, str | None]] = []
    
    for card in soup.select("a.match-item"):
        if limit is not None and len(match_data) >= limit:
            break
        href = card.get("href")
        href_str = href if isinstance(href, str) else None
        # Robustly parse match ID from href like "/match/<id>/..." or "<id>/..."
        parts = href_str.strip("/").split("/") if href_str else []
        if parts and parts[0] == "match" and len(parts) >= 2:
            match_id = parse_int(parts[1])
        else:
            match_id = parse_int(parts[0]) if parts else None
        if not match_id:
            continue
        
        teams: list[MatchTeam] = []
        for team_el in card.select(".match-item-vs-team")[:2]:
            name_el = team_el.select_one(".match-item-vs-team-name .text-of") or team_el.select_one(".match-item-vs-team-name")
            name = extract_text(name_el)
            if not name:
                continue
            
            score_el = team_el.select_one(".match-item-vs-team-score")
            score = parse_int(extract_text(score_el)) if score_el else None
            
            country = None
            code = extract_country_code(team_el)
            if code:
                country = map_country_code(code)
            
            teams.append(MatchTeam(
                id=None,
                name=name,
                country=country,
                score=score,
                is_winner="mod-winner" in (team_el.get("class") or []),
            ))
        
        if len(teams) != 2:
            continue
        
        # Parse status
        ml = card.select_one(".match-item-eta .ml")
        match_status = "upcoming"
        if ml:
            classes_raw = ml.get("class")
            classes: list[str] = []
            if isinstance(classes_raw, list):
                classes = classes_raw
            elif isinstance(classes_raw, str):
                classes = [classes_raw]
            classes_list = classes
            if any("mod-completed" in str(c) for c in classes_list):
                match_status = "completed"
            elif any("mod-live" in str(c) or "mod-ongoing" in str(c) for c in classes_list):
                match_status = "ongoing"
        
        # Parse stage/phase
        event_el = card.select_one(".match-item-event")
        series_el = card.select_one(".match-item-event-series")
        phase = extract_text(series_el) or None
        stage_name = extract_text(event_el) or None
        if phase and stage_name:
            stage_name = stage_name.replace(phase, "").strip()
        
        # Parse date
        match_date: datetime.date | None = None
        label = card.find_previous("div", class_="wf-label mod-large")
        if label:
            texts = [frag.strip() for frag in label.find_all(string=True, recursive=False)]
            text = " ".join(t for t in texts if t)
            match_date = parse_date(text, ["%a, %B %d, %Y", "%A, %B %d, %Y", "%B %d, %Y"])
        
        time_text = extract_text(card.select_one(".match-item-time")) or None
        match_url = parse.urljoin(f"{_config.vlr_base}/", href_str.lstrip("/")) if href_str else ""
        
        match_data.append((match_id, match_url, teams, match_status, stage_name or "", phase or "", match_date, time_text))
    
    # Apply limit early to avoid fetching unnecessary team IDs
    if limit is not None and len(match_data) > limit:
        match_data = match_data[:limit]
    
    # Fetch team IDs concurrently using batch fetching (only for limited matches)
    match_ids = [match_id for match_id, _, _, _, _, _, _, _ in match_data]
    team_ids_map = _get_match_team_ids_batch(match_ids, effective_timeout, max_workers=4)
    
    results: list[Match] = []
    
    for match_id, match_url, teams, match_status, stage_name, phase, match_date, time_text in match_data:
        # Get team IDs from batch results
        team1_id, team2_id = team_ids_map.get(match_id, (None, None))
        
        # Update team IDs
        updated_teams = [
            MatchTeam(
                id=team1_id,
                name=teams[0].name,
                country=teams[0].country,
                score=teams[0].score,
                is_winner=teams[0].is_winner,
            ),
            MatchTeam(
                id=team2_id,
                name=teams[1].name,
                country=teams[1].country,
                score=teams[1].score,
                is_winner=teams[1].is_winner,
            ),
        ]
        
        results.append(Match(
            match_id=match_id,
            event_id=event_id,
            stage=stage_name,
            phase=phase,
            status=match_status,
            date=match_date,
            time=time_text,
            teams=(updated_teams[0], updated_teams[1]),
            url=match_url,
        ))
    
    return results