Source code for vlrdevapi.teams.matches

"""Team matches retrieval."""

from __future__ import annotations

from datetime import datetime
from typing import TypedDict

from bs4 import BeautifulSoup

from ..config import get_config
from ..fetcher import fetch_html, batch_fetch_html
from ..exceptions import NetworkError
from ..utils import extract_text, absolute_url, extract_id_from_url

from .models import TeamMatch, MatchTeam

_config = get_config()

def _parse_match_datetime(date_str: str | None, time_str: str | None) -> datetime | None:
    """
    Parse match date and time strings into a datetime object.
    
    Args:
        date_str: Date string (e.g., "2025/10/14", "October 15", "Oct 15")
        time_str: Time string (e.g., "5:50 pm", "2:30 PM PDT", "14:30")
    
    Returns:
        datetime object or None if parsing fails
    """
    if not date_str:
        return None
    
    try:
        date_str_clean = date_str.strip()
        
        # Try multiple date formats
        date_formats = [
            "%Y/%m/%d",  # 2025/10/14 (most common on VLR)
            "%Y-%m-%d",  # 2025-10-14
            "%B %d",     # October 15
            "%b %d",     # Oct 15
            "%d/%m",     # 15/10
            "%m/%d",     # 10/15
        ]
        
        parsed_date = None
        for fmt in date_formats:
            try:
                parsed_date = datetime.strptime(date_str_clean, fmt)
                # Add current year if not included in format
                if "%Y" not in fmt:
                    from datetime import datetime as dt
                    current_year = dt.now().year
                    parsed_date = parsed_date.replace(year=current_year)
                break
            except ValueError:
                continue
        
        if not parsed_date:
            return None
        
        # Parse time if available
        if time_str:
            time_str_clean = time_str.strip()
            
            time_formats = [
                "%I:%M %p",  # 5:50 PM or 5:50 pm
                "%I:%M%p",   # 5:50PM (no space)
                "%H:%M",     # 14:30
            ]
            
            for fmt in time_formats:
                try:
                    time_obj = datetime.strptime(time_str_clean, fmt).time()
                    parsed_date = datetime.combine(parsed_date.date(), time_obj)
                    break
                except ValueError:
                    continue
        
        return parsed_date
    except Exception:
        return None


def _extract_match_id_from_url(url: str) -> int | None:
    """
    Extract match ID from match URL.
    
    Args:
        url: Match URL (e.g., "/511536/velocity-gaming-vs-s8ul-esports...")
    
    Returns:
        Match ID or None
    """
    if not url:
        return None
    
    # Remove leading slash
    url = url.lstrip("/")
    
    # Split by slash and get first part
    parts = url.split("/")
    if parts:
        try:
            return int(parts[0])
        except (ValueError, IndexError):
            pass
    
    return None


# pyright: reportUnusedFunction=false
def _get_team_ids_from_match(match_url: str, timeout: float | None = None) -> tuple[int | None, int | None]:
    """
    Get team IDs by fetching the match page.
    
    Args:
        match_url: Full match URL
        timeout: Request timeout
    
    Returns:
        Tuple of (team1_id, team2_id)
    """
    try:
        effective_timeout = timeout if timeout is not None else _config.default_timeout
        html = fetch_html(match_url, effective_timeout)
        soup = BeautifulSoup(html, "lxml")
        
        # Find team links in the match header
        team_links = soup.select(".match-header-link")
        
        team1_id = None
        team2_id = None
        
        if len(team_links) >= 2:
            # Extract team IDs from the links
            t1_val = team_links[0].get("href")
            t2_val = team_links[1].get("href")
            team1_href = t1_val if isinstance(t1_val, str) else None
            team2_href = t2_val if isinstance(t2_val, str) else None
            team1_id = extract_id_from_url(team1_href, "team")
            team2_id = extract_id_from_url(team2_href, "team")
        
        return team1_id, team2_id
    except:
        return None, None


def _get_team_ids_batch(match_urls: list[str], timeout: float | None = None) -> dict[str, tuple[int | None, int | None]]:
    """Get team IDs for multiple matches concurrently.
    
    Args:
        match_urls: List of full match URLs
        timeout: Request timeout
    
    Returns:
        Dictionary mapping match_url to (team1_id, team2_id)
    """
    if not match_urls:
        return {}
    
    # Batch fetch all match pages concurrently
    effective_timeout = timeout if timeout is not None else _config.default_timeout
    batch_results = batch_fetch_html(match_urls, timeout=effective_timeout, max_workers=min(4, len(match_urls)))
    
    # Parse team IDs from each page
    results: dict[str, tuple[int | None, int | None]] = {}
    
    for match_url in match_urls:
        html = batch_results.get(match_url)
        
        if isinstance(html, Exception) or not html:
            results[match_url] = (None, None)
            continue
        
        try:
            soup = BeautifulSoup(html, "lxml")
            
            # Find team links in the match header
            team_links = soup.select(".match-header-link")
            
            team1_id = None
            team2_id = None
            
            if len(team_links) >= 2:
                # Extract team IDs from the links
                t1_val = team_links[0].get("href")
                t2_val = team_links[1].get("href")
                team1_href = t1_val if isinstance(t1_val, str) else None
                team2_href = t2_val if isinstance(t2_val, str) else None
                team1_id = extract_id_from_url(team1_href, "team")
                team2_id = extract_id_from_url(team2_href, "team")
            
            results[match_url] = (team1_id, team2_id)
        except:
            results[match_url] = (None, None)
    
    return results



[docs]
def upcoming_matches(team_id: int, limit: int | None = None, timeout: float | None = None) -> list[TeamMatch]:
    """
    Get upcoming matches for a team.
    
    Args:
        team_id: Team ID
        limit: Maximum number of matches to return (fetches across pages if needed)
        timeout: Request timeout in seconds
    
    Returns:
        List of upcoming matches
    
    Example:
        >>> import vlrdevapi as vlr
        >>> matches = vlr.teams.upcoming_matches(team_id=799, limit=10)
        >>> for match in matches:
        ...     if match.match_datetime:
        ...         print(f"{match.team1.name} vs {match.team2.name} - {match.match_datetime.strftime('%B %d, %Y')}")
        ...     else:
        ...         print(f"{match.team1.name} vs {match.team2.name}")
    """
    all_matches: list[TeamMatch] = []
    page = 1
    
    effective_timeout = timeout if timeout is not None else _config.default_timeout
    while True:
        url = f"{_config.vlr_base}/team/matches/{team_id}/?group=upcoming"
        if page > 1:
            url += f"&page={page}"
        
        try:
            html = fetch_html(url, effective_timeout)
        except NetworkError:
            break
        
        # Calculate remaining matches needed
        remaining = None
        if limit is not None:
            remaining = limit - len(all_matches)
        
        matches = _parse_matches(html, effective_timeout, limit=remaining)
        
        if not matches:
            break
        
        all_matches.extend(matches)
        
        # If limit is specified and we have enough matches, stop
        if limit is not None and len(all_matches) >= limit:
            return all_matches[:limit]
        
        page += 1
        
        # Safety limit to prevent infinite loops
        if page > 100:
            break
    
    return all_matches




[docs]
def completed_matches(team_id: int, limit: int | None = None, timeout: float | None = None) -> list[TeamMatch]:
    """
    Get completed matches for a team.
    
    Args:
        team_id: Team ID
        limit: Maximum number of matches to return (fetches across pages if needed)
        timeout: Request timeout in seconds
    
    Returns:
        List of completed matches
    
    Example:
        >>> import vlrdevapi as vlr
        >>> matches = vlr.teams.completed_matches(team_id=799, limit=20)
        >>> for match in matches:
        ...     print(f"{match.team1.name} {match.team1.score}:{match.team2.score} {match.team2.name}")
        ...     if match.match_datetime:
        ...         print(f"  Date: {match.match_datetime.strftime('%B %d, %Y')}")
    """
    all_matches: list[TeamMatch] = []
    page = 1
    
    effective_timeout = timeout if timeout is not None else _config.default_timeout
    while True:
        url = f"{_config.vlr_base}/team/matches/{team_id}/?group=completed"
        if page > 1:
            url += f"&page={page}"
        
        try:
            html = fetch_html(url, timeout)
        except NetworkError:
            break
        
        # Calculate remaining matches needed
        remaining = None
        if limit is not None:
            remaining = limit - len(all_matches)
        
        matches = _parse_matches(html, timeout, limit=remaining)
        
        if not matches:
            break
        
        all_matches.extend(matches)
        
        # If limit is specified and we have enough matches, stop
        if limit is not None and len(all_matches) >= limit:
            return all_matches[:limit]
        
        page += 1
        
        # Safety limit to prevent infinite loops
        if page > 100:
            break
    
    return all_matches



class _MatchData(TypedDict):
    match_id: int | None
    match_url: str | None
    tournament_name: str | None
    phase: str | None
    series: str | None
    team1_name: str | None
    team1_tag: str | None
    team1_logo: str | None
    team2_name: str | None
    team2_tag: str | None
    team2_logo: str | None
    score_team1: int | None
    score_team2: int | None
    match_datetime: datetime | None


def _parse_matches(html: str, timeout: float | None = None, limit: int | None = None) -> list[TeamMatch]:
    """Parse matches from HTML with batch fetching for team IDs.
    
    Args:
        html: HTML content
        timeout: Request timeout for fetching team IDs
        limit: Maximum number of matches to parse (stops early to avoid wasted parsing)
    
    Returns:
        List of parsed matches
    """
    soup = BeautifulSoup(html, "lxml")
    
    # Find all match items
    match_items = soup.select("a.m-item")
    
    # First pass: collect all match data and URLs
    match_data_list: list[_MatchData] = []
    match_urls_to_fetch: list[str] = []
    
    for item in match_items:
        # Early stop if we've reached the limit
        if limit is not None and len(match_data_list) >= limit:
            break
        
        # Extract match URL and ID
        match_url_val = item.get("href")
        match_url_raw = match_url_val if isinstance(match_url_val, str) else None
        match_id = _extract_match_id_from_url(match_url_raw) if isinstance(match_url_raw, str) else None
        match_url = absolute_url(match_url_raw) if match_url_raw else None
        
        # Extract tournament name
        tournament_name = None
        event_el = item.select_one(".m-item-event")
        if event_el:
            # Get the tournament name from the bold div
            tournament_div = event_el.select_one("div[style*='font-weight: 700']")
            if tournament_div:
                tournament_name = extract_text(tournament_div)
        
        # Extract phase and series (e.g., "Playoffs ⋅ GF")
        phase = None
        series = None
        if event_el:
            # Get all text nodes excluding the tournament name div
            event_text = extract_text(event_el)
            
            # Remove tournament name from the beginning
            if tournament_name and event_text.startswith(tournament_name):
                series_text = event_text[len(tournament_name):].strip()
                
                # Split by the dot separator
                if "⋅" in series_text:
                    parts = series_text.split("⋅")
                    if len(parts) >= 2:
                        phase = parts[0].strip()
                        series = parts[1].strip()
                elif series_text:
                    # If no dot, treat entire text as series
                    series = series_text
        
        # Extract team 1 info (left side)
        team1_name = None
        team1_tag = None
        team1_logo = None
        
        team1_el = item.select_one(".m-item-team:not(.mod-right)")
        if team1_el:
            team1_name_el = team1_el.select_one(".m-item-team-name")
            if team1_name_el:
                team1_name = extract_text(team1_name_el)
            
            team1_tag_el = team1_el.select_one(".m-item-team-tag")
            if team1_tag_el:
                team1_tag = extract_text(team1_tag_el)
        
        # Extract team 1 logo (left logo) - skip default logos
        team1_logo_el = item.select_one(".m-item-logo:not(.mod-right) img")
        if team1_logo_el:
            src_val = team1_logo_el.get("src")
            src = src_val if isinstance(src_val, str) else None
            # Skip default/placeholder logos
            if src and "vlr.png" not in src and "tmp/" not in src:
                team1_logo = absolute_url(src)
        
        # Extract team 2 info (right side)
        team2_name = None
        team2_tag = None
        team2_logo = None
        
        team2_el = item.select_one(".m-item-team.mod-right")
        if team2_el:
            team2_name_el = team2_el.select_one(".m-item-team-name")
            if team2_name_el:
                team2_name = extract_text(team2_name_el)
            
            team2_tag_el = team2_el.select_one(".m-item-team-tag")
            if team2_tag_el:
                team2_tag = extract_text(team2_tag_el)
        
        # Extract team 2 logo (right logo) - skip default logos
        team2_logo_el = item.select_one(".m-item-logo.mod-right img")
        if team2_logo_el:
            src_val = team2_logo_el.get("src")
            src = src_val if isinstance(src_val, str) else None
            # Skip default/placeholder logos
            if src and "vlr.png" not in src and "tmp/" not in src:
                team2_logo = absolute_url(src)
        
        
        # Extract scores (if available)
        score_team1 = None
        score_team2 = None
        result_el = item.select_one(".m-item-result")
        if result_el:
            score_spans = result_el.select("span")
            if len(score_spans) >= 2:
                try:
                    score_team1 = int(extract_text(score_spans[0]))
                    score_team2 = int(extract_text(score_spans[1]))
                except (ValueError, AttributeError):
                    pass
        
        # Extract date and time
        date_str = None
        time_str = None
        date_el = item.select_one(".m-item-date")
        if date_el:
            date_div = date_el.select_one("div")
            if date_div:
                date_str = extract_text(date_div)
            
            # Get time (text node after the div)
            full_date_text = extract_text(date_el)
            if date_str and full_date_text.startswith(date_str):
                time_str = full_date_text[len(date_str):].strip()
        
        # Parse datetime
        match_datetime = _parse_match_datetime(date_str, time_str)
        
        # Store match data for later processing
        match_data: _MatchData = {
            'match_id': match_id,
            'match_url': match_url,
            'tournament_name': tournament_name,
            'phase': phase,
            'series': series,
            'team1_name': team1_name,
            'team1_tag': team1_tag,
            'team1_logo': team1_logo,
            'team2_name': team2_name,
            'team2_tag': team2_tag,
            'team2_logo': team2_logo,
            'score_team1': score_team1,
            'score_team2': score_team2,
            'match_datetime': match_datetime,
        }
        match_data_list.append(match_data)
        
        if match_url:
            match_urls_to_fetch.append(match_url)
    
    # Batch fetch team IDs for all matches concurrently
    team_ids_map = _get_team_ids_batch(match_urls_to_fetch, timeout)
    
    # Second pass: build TeamMatch objects with team IDs
    matches: list[TeamMatch] = []
    
    for data in match_data_list:
        match_url = data['match_url']
        if isinstance(match_url, str):
            team1_id, team2_id = team_ids_map.get(match_url, (None, None))
        else:
            team1_id, team2_id = (None, None)
        
        # Create team objects
        team1_obj = MatchTeam(
            team_id=team1_id,
            name=data['team1_name'],
            tag=data['team1_tag'],
            logo=data['team1_logo'],
            score=data['score_team1'],
        )
        
        team2_obj = MatchTeam(
            team_id=team2_id,
            name=data['team2_name'],
            tag=data['team2_tag'],
            logo=data['team2_logo'],
            score=data['score_team2'],
        )
        
        matches.append(TeamMatch(
            match_id=data['match_id'],
            match_url=data['match_url'],
            tournament_name=data['tournament_name'],
            phase=data['phase'],
            series=data['series'],
            team1=team1_obj,
            team2=team2_obj,
            match_datetime=data['match_datetime'],
        ))
    
    return matches