"""Team matches retrieval."""
from __future__ import annotations
from datetime import datetime
from typing import TypedDict
from bs4 import BeautifulSoup
from ..config import get_config
from ..fetcher import fetch_html, batch_fetch_html
from ..exceptions import NetworkError
from ..utils import extract_text, absolute_url, extract_id_from_url
from .models import TeamMatch, MatchTeam
_config = get_config()
def _parse_match_datetime(date_str: str | None, time_str: str | None) -> datetime | None:
"""
Parse match date and time strings into a datetime object.
Args:
date_str: Date string (e.g., "2025/10/14", "October 15", "Oct 15")
time_str: Time string (e.g., "5:50 pm", "2:30 PM PDT", "14:30")
Returns:
datetime object or None if parsing fails
"""
if not date_str:
return None
try:
date_str_clean = date_str.strip()
# Try multiple date formats
date_formats = [
"%Y/%m/%d", # 2025/10/14 (most common on VLR)
"%Y-%m-%d", # 2025-10-14
"%B %d", # October 15
"%b %d", # Oct 15
"%d/%m", # 15/10
"%m/%d", # 10/15
]
parsed_date = None
for fmt in date_formats:
try:
parsed_date = datetime.strptime(date_str_clean, fmt)
# Add current year if not included in format
if "%Y" not in fmt:
from datetime import datetime as dt
current_year = dt.now().year
parsed_date = parsed_date.replace(year=current_year)
break
except ValueError:
continue
if not parsed_date:
return None
# Parse time if available
if time_str:
time_str_clean = time_str.strip()
time_formats = [
"%I:%M %p", # 5:50 PM or 5:50 pm
"%I:%M%p", # 5:50PM (no space)
"%H:%M", # 14:30
]
for fmt in time_formats:
try:
time_obj = datetime.strptime(time_str_clean, fmt).time()
parsed_date = datetime.combine(parsed_date.date(), time_obj)
break
except ValueError:
continue
return parsed_date
except Exception:
return None
def _extract_match_id_from_url(url: str) -> int | None:
"""
Extract match ID from match URL.
Args:
url: Match URL (e.g., "/511536/velocity-gaming-vs-s8ul-esports...")
Returns:
Match ID or None
"""
if not url:
return None
# Remove leading slash
url = url.lstrip("/")
# Split by slash and get first part
parts = url.split("/")
if parts:
try:
return int(parts[0])
except (ValueError, IndexError):
pass
return None
# pyright: reportUnusedFunction=false
def _get_team_ids_from_match(match_url: str, timeout: float | None = None) -> tuple[int | None, int | None]:
"""
Get team IDs by fetching the match page.
Args:
match_url: Full match URL
timeout: Request timeout
Returns:
Tuple of (team1_id, team2_id)
"""
try:
effective_timeout = timeout if timeout is not None else _config.default_timeout
html = fetch_html(match_url, effective_timeout)
soup = BeautifulSoup(html, "lxml")
# Find team links in the match header
team_links = soup.select(".match-header-link")
team1_id = None
team2_id = None
if len(team_links) >= 2:
# Extract team IDs from the links
t1_val = team_links[0].get("href")
t2_val = team_links[1].get("href")
team1_href = t1_val if isinstance(t1_val, str) else None
team2_href = t2_val if isinstance(t2_val, str) else None
team1_id = extract_id_from_url(team1_href, "team")
team2_id = extract_id_from_url(team2_href, "team")
return team1_id, team2_id
except:
return None, None
def _get_team_ids_batch(match_urls: list[str], timeout: float | None = None) -> dict[str, tuple[int | None, int | None]]:
"""Get team IDs for multiple matches concurrently.
Args:
match_urls: List of full match URLs
timeout: Request timeout
Returns:
Dictionary mapping match_url to (team1_id, team2_id)
"""
if not match_urls:
return {}
# Batch fetch all match pages concurrently
effective_timeout = timeout if timeout is not None else _config.default_timeout
batch_results = batch_fetch_html(match_urls, timeout=effective_timeout, max_workers=min(4, len(match_urls)))
# Parse team IDs from each page
results: dict[str, tuple[int | None, int | None]] = {}
for match_url in match_urls:
html = batch_results.get(match_url)
if isinstance(html, Exception) or not html:
results[match_url] = (None, None)
continue
try:
soup = BeautifulSoup(html, "lxml")
# Find team links in the match header
team_links = soup.select(".match-header-link")
team1_id = None
team2_id = None
if len(team_links) >= 2:
# Extract team IDs from the links
t1_val = team_links[0].get("href")
t2_val = team_links[1].get("href")
team1_href = t1_val if isinstance(t1_val, str) else None
team2_href = t2_val if isinstance(t2_val, str) else None
team1_id = extract_id_from_url(team1_href, "team")
team2_id = extract_id_from_url(team2_href, "team")
results[match_url] = (team1_id, team2_id)
except:
results[match_url] = (None, None)
return results
[docs]
def upcoming_matches(team_id: int, limit: int | None = None, timeout: float | None = None) -> list[TeamMatch]:
"""
Get upcoming matches for a team.
Args:
team_id: Team ID
limit: Maximum number of matches to return (fetches across pages if needed)
timeout: Request timeout in seconds
Returns:
List of upcoming matches
Example:
>>> import vlrdevapi as vlr
>>> matches = vlr.teams.upcoming_matches(team_id=799, limit=10)
>>> for match in matches:
... if match.match_datetime:
... print(f"{match.team1.name} vs {match.team2.name} - {match.match_datetime.strftime('%B %d, %Y')}")
... else:
... print(f"{match.team1.name} vs {match.team2.name}")
"""
all_matches: list[TeamMatch] = []
page = 1
effective_timeout = timeout if timeout is not None else _config.default_timeout
while True:
url = f"{_config.vlr_base}/team/matches/{team_id}/?group=upcoming"
if page > 1:
url += f"&page={page}"
try:
html = fetch_html(url, effective_timeout)
except NetworkError:
break
# Calculate remaining matches needed
remaining = None
if limit is not None:
remaining = limit - len(all_matches)
matches = _parse_matches(html, effective_timeout, limit=remaining)
if not matches:
break
all_matches.extend(matches)
# If limit is specified and we have enough matches, stop
if limit is not None and len(all_matches) >= limit:
return all_matches[:limit]
page += 1
# Safety limit to prevent infinite loops
if page > 100:
break
return all_matches
[docs]
def completed_matches(team_id: int, limit: int | None = None, timeout: float | None = None) -> list[TeamMatch]:
"""
Get completed matches for a team.
Args:
team_id: Team ID
limit: Maximum number of matches to return (fetches across pages if needed)
timeout: Request timeout in seconds
Returns:
List of completed matches
Example:
>>> import vlrdevapi as vlr
>>> matches = vlr.teams.completed_matches(team_id=799, limit=20)
>>> for match in matches:
... print(f"{match.team1.name} {match.team1.score}:{match.team2.score} {match.team2.name}")
... if match.match_datetime:
... print(f" Date: {match.match_datetime.strftime('%B %d, %Y')}")
"""
all_matches: list[TeamMatch] = []
page = 1
effective_timeout = timeout if timeout is not None else _config.default_timeout
while True:
url = f"{_config.vlr_base}/team/matches/{team_id}/?group=completed"
if page > 1:
url += f"&page={page}"
try:
html = fetch_html(url, timeout)
except NetworkError:
break
# Calculate remaining matches needed
remaining = None
if limit is not None:
remaining = limit - len(all_matches)
matches = _parse_matches(html, timeout, limit=remaining)
if not matches:
break
all_matches.extend(matches)
# If limit is specified and we have enough matches, stop
if limit is not None and len(all_matches) >= limit:
return all_matches[:limit]
page += 1
# Safety limit to prevent infinite loops
if page > 100:
break
return all_matches
class _MatchData(TypedDict):
match_id: int | None
match_url: str | None
tournament_name: str | None
phase: str | None
series: str | None
team1_name: str | None
team1_tag: str | None
team1_logo: str | None
team2_name: str | None
team2_tag: str | None
team2_logo: str | None
score_team1: int | None
score_team2: int | None
match_datetime: datetime | None
def _parse_matches(html: str, timeout: float | None = None, limit: int | None = None) -> list[TeamMatch]:
"""Parse matches from HTML with batch fetching for team IDs.
Args:
html: HTML content
timeout: Request timeout for fetching team IDs
limit: Maximum number of matches to parse (stops early to avoid wasted parsing)
Returns:
List of parsed matches
"""
soup = BeautifulSoup(html, "lxml")
# Find all match items
match_items = soup.select("a.m-item")
# First pass: collect all match data and URLs
match_data_list: list[_MatchData] = []
match_urls_to_fetch: list[str] = []
for item in match_items:
# Early stop if we've reached the limit
if limit is not None and len(match_data_list) >= limit:
break
# Extract match URL and ID
match_url_val = item.get("href")
match_url_raw = match_url_val if isinstance(match_url_val, str) else None
match_id = _extract_match_id_from_url(match_url_raw) if isinstance(match_url_raw, str) else None
match_url = absolute_url(match_url_raw) if match_url_raw else None
# Extract tournament name
tournament_name = None
event_el = item.select_one(".m-item-event")
if event_el:
# Get the tournament name from the bold div
tournament_div = event_el.select_one("div[style*='font-weight: 700']")
if tournament_div:
tournament_name = extract_text(tournament_div)
# Extract phase and series (e.g., "Playoffs ⋅ GF")
phase = None
series = None
if event_el:
# Get all text nodes excluding the tournament name div
event_text = extract_text(event_el)
# Remove tournament name from the beginning
if tournament_name and event_text.startswith(tournament_name):
series_text = event_text[len(tournament_name):].strip()
# Split by the dot separator
if "⋅" in series_text:
parts = series_text.split("⋅")
if len(parts) >= 2:
phase = parts[0].strip()
series = parts[1].strip()
elif series_text:
# If no dot, treat entire text as series
series = series_text
# Extract team 1 info (left side)
team1_name = None
team1_tag = None
team1_logo = None
team1_el = item.select_one(".m-item-team:not(.mod-right)")
if team1_el:
team1_name_el = team1_el.select_one(".m-item-team-name")
if team1_name_el:
team1_name = extract_text(team1_name_el)
team1_tag_el = team1_el.select_one(".m-item-team-tag")
if team1_tag_el:
team1_tag = extract_text(team1_tag_el)
# Extract team 1 logo (left logo) - skip default logos
team1_logo_el = item.select_one(".m-item-logo:not(.mod-right) img")
if team1_logo_el:
src_val = team1_logo_el.get("src")
src = src_val if isinstance(src_val, str) else None
# Skip default/placeholder logos
if src and "vlr.png" not in src and "tmp/" not in src:
team1_logo = absolute_url(src)
# Extract team 2 info (right side)
team2_name = None
team2_tag = None
team2_logo = None
team2_el = item.select_one(".m-item-team.mod-right")
if team2_el:
team2_name_el = team2_el.select_one(".m-item-team-name")
if team2_name_el:
team2_name = extract_text(team2_name_el)
team2_tag_el = team2_el.select_one(".m-item-team-tag")
if team2_tag_el:
team2_tag = extract_text(team2_tag_el)
# Extract team 2 logo (right logo) - skip default logos
team2_logo_el = item.select_one(".m-item-logo.mod-right img")
if team2_logo_el:
src_val = team2_logo_el.get("src")
src = src_val if isinstance(src_val, str) else None
# Skip default/placeholder logos
if src and "vlr.png" not in src and "tmp/" not in src:
team2_logo = absolute_url(src)
# Extract scores (if available)
score_team1 = None
score_team2 = None
result_el = item.select_one(".m-item-result")
if result_el:
score_spans = result_el.select("span")
if len(score_spans) >= 2:
try:
score_team1 = int(extract_text(score_spans[0]))
score_team2 = int(extract_text(score_spans[1]))
except (ValueError, AttributeError):
pass
# Extract date and time
date_str = None
time_str = None
date_el = item.select_one(".m-item-date")
if date_el:
date_div = date_el.select_one("div")
if date_div:
date_str = extract_text(date_div)
# Get time (text node after the div)
full_date_text = extract_text(date_el)
if date_str and full_date_text.startswith(date_str):
time_str = full_date_text[len(date_str):].strip()
# Parse datetime
match_datetime = _parse_match_datetime(date_str, time_str)
# Store match data for later processing
match_data: _MatchData = {
'match_id': match_id,
'match_url': match_url,
'tournament_name': tournament_name,
'phase': phase,
'series': series,
'team1_name': team1_name,
'team1_tag': team1_tag,
'team1_logo': team1_logo,
'team2_name': team2_name,
'team2_tag': team2_tag,
'team2_logo': team2_logo,
'score_team1': score_team1,
'score_team2': score_team2,
'match_datetime': match_datetime,
}
match_data_list.append(match_data)
if match_url:
match_urls_to_fetch.append(match_url)
# Batch fetch team IDs for all matches concurrently
team_ids_map = _get_team_ids_batch(match_urls_to_fetch, timeout)
# Second pass: build TeamMatch objects with team IDs
matches: list[TeamMatch] = []
for data in match_data_list:
match_url = data['match_url']
if isinstance(match_url, str):
team1_id, team2_id = team_ids_map.get(match_url, (None, None))
else:
team1_id, team2_id = (None, None)
# Create team objects
team1_obj = MatchTeam(
team_id=team1_id,
name=data['team1_name'],
tag=data['team1_tag'],
logo=data['team1_logo'],
score=data['score_team1'],
)
team2_obj = MatchTeam(
team_id=team2_id,
name=data['team2_name'],
tag=data['team2_tag'],
logo=data['team2_logo'],
score=data['score_team2'],
)
matches.append(TeamMatch(
match_id=data['match_id'],
match_url=data['match_url'],
tournament_name=data['tournament_name'],
phase=data['phase'],
series=data['series'],
team1=team1_obj,
team2=team2_obj,
match_datetime=data['match_datetime'],
))
return matches