1.4.0: Updated Profile

This commit is contained in:
2026-01-27 16:51:53 +08:00
parent 28dc02c0c4
commit 0be68a86f6
7 changed files with 342 additions and 81 deletions

View File

@@ -978,6 +978,163 @@ class FeatureService:
df['util_usage_rate'] = df['util_usage_rate_backup'].fillna(0)
df.drop(columns=['util_usage_rate_backup'], inplace=True)
# --- 8. New Feature Dimensions (Party, Rating Dist, ELO) ---
# Fetch Base Data for Calculation
q_new_feats = f"""
SELECT mp.steam_id_64, mp.match_id, mp.match_team_id, mp.team_id,
mp.rating, mp.adr, mp.is_win
FROM fact_match_players mp
WHERE mp.steam_id_64 IN ({placeholders})
"""
df_base = pd.read_sql_query(q_new_feats, conn, params=valid_ids)
if not df_base.empty:
# 8.1 Party Size Stats
# Get party sizes for these matches
# We need to query party sizes for ALL matches involved
match_ids = df_base['match_id'].unique()
if len(match_ids) > 0:
match_id_ph = ','.join(['?'] * len(match_ids))
q_party_size = f"""
SELECT match_id, match_team_id, COUNT(*) as party_size
FROM fact_match_players
WHERE match_id IN ({match_id_ph}) AND match_team_id > 0
GROUP BY match_id, match_team_id
"""
# Split match_ids into chunks if too many
chunk_size = 900
party_sizes_list = []
for i in range(0, len(match_ids), chunk_size):
chunk = match_ids[i:i+chunk_size]
chunk_ph = ','.join(['?'] * len(chunk))
q_chunk = q_party_size.replace(match_id_ph, chunk_ph)
party_sizes_list.append(pd.read_sql_query(q_chunk, conn, params=list(chunk)))
if party_sizes_list:
df_party_sizes = pd.concat(party_sizes_list)
# Merge party size to base data
df_base_party = df_base.merge(df_party_sizes, on=['match_id', 'match_team_id'], how='left')
# Calculate Stats per Party Size (1-5)
# We want columns like party_1_win_rate, party_1_rating, party_1_adr
party_stats = df_base_party.groupby(['steam_id_64', 'party_size']).agg({
'is_win': 'mean',
'rating': 'mean',
'adr': 'mean'
}).reset_index()
# Pivot
pivoted_party = party_stats.pivot(index='steam_id_64', columns='party_size').reset_index()
# Flatten and rename
new_party_cols = ['steam_id_64']
for col in pivoted_party.columns:
if col[0] == 'steam_id_64': continue
metric, size = col
if size in [1, 2, 3, 4, 5]:
# metric is is_win, rating, adr
metric_name = 'win_rate' if metric == 'is_win' else metric
new_party_cols.append(f"party_{int(size)}_{metric_name}")
# Handle MultiIndex column flattening properly
# The pivot creates MultiIndex. We need to construct a flat DataFrame.
flat_data = {'steam_id_64': pivoted_party['steam_id_64']}
for size in [1, 2, 3, 4, 5]:
if size in pivoted_party['is_win'].columns:
flat_data[f"party_{size}_win_rate"] = pivoted_party['is_win'][size]
if size in pivoted_party['rating'].columns:
flat_data[f"party_{size}_rating"] = pivoted_party['rating'][size]
if size in pivoted_party['adr'].columns:
flat_data[f"party_{size}_adr"] = pivoted_party['adr'][size]
df_party_flat = pd.DataFrame(flat_data)
df = df.merge(df_party_flat, on='steam_id_64', how='left')
# 8.2 Rating Distribution
# rating_dist_carry_rate (>1.5), normal (1.0-1.5), sacrifice (0.6-1.0), sleeping (<0.6)
df_base['rating_tier'] = pd.cut(df_base['rating'],
bins=[-1, 0.6, 1.0, 1.5, 100],
labels=['sleeping', 'sacrifice', 'normal', 'carry'],
right=False) # <0.6, 0.6-<1.0, 1.0-<1.5, >=1.5 (wait, cut behavior)
# Standard cut: right=True by default (a, b]. We want:
# < 0.6
# 0.6 <= x < 1.0
# 1.0 <= x < 1.5
# >= 1.5
# So bins=[-inf, 0.6, 1.0, 1.5, inf], right=False -> [a, b)
df_base['rating_tier'] = pd.cut(df_base['rating'],
bins=[-float('inf'), 0.6, 1.0, 1.5, float('inf')],
labels=['sleeping', 'sacrifice', 'normal', 'carry'],
right=False)
# Wait, 1.5 should be Normal or Carry?
# User: >1.5 Carry, 1.0~1.5 Normal. So 1.5 is Normal? Or Carry?
# Usually inclusive on lower bound.
# 1.5 -> Carry (>1.5 usually means >= 1.5 or strictly >).
# "1.0~1.5 正常" implies [1.0, 1.5]. ">1.5 Carry" implies (1.5, inf).
# Let's assume >= 1.5 is Carry.
# So bins: (-inf, 0.6), [0.6, 1.0), [1.0, 1.5), [1.5, inf)
# right=False gives [a, b).
# So [1.5, inf) is correct for Carry.
dist_stats = df_base.groupby(['steam_id_64', 'rating_tier']).size().unstack(fill_value=0)
# Calculate rates
dist_stats = dist_stats.div(dist_stats.sum(axis=1), axis=0)
dist_stats.columns = [f"rating_dist_{c}_rate" for c in dist_stats.columns]
dist_stats = dist_stats.reset_index()
df = df.merge(dist_stats, on='steam_id_64', how='left')
# 8.3 ELO Stratification
# Fetch Match Teams ELO
if len(match_ids) > 0:
q_elo = f"""
SELECT match_id, group_id, group_origin_elo
FROM fact_match_teams
WHERE match_id IN ({match_id_ph})
"""
# Use chunking again
elo_list = []
for i in range(0, len(match_ids), chunk_size):
chunk = match_ids[i:i+chunk_size]
chunk_ph = ','.join(['?'] * len(chunk))
q_chunk = q_elo.replace(match_id_ph, chunk_ph)
elo_list.append(pd.read_sql_query(q_chunk, conn, params=list(chunk)))
if elo_list:
df_elo_teams = pd.concat(elo_list)
# Merge to get Opponent ELO
# Player has match_id, team_id.
# Join on match_id.
# Filter where group_id != team_id
df_merged_elo = df_base.merge(df_elo_teams, on='match_id', how='left')
df_merged_elo = df_merged_elo[df_merged_elo['group_id'] != df_merged_elo['team_id']]
# Now df_merged_elo has 'group_origin_elo' which is Opponent ELO
# Binning: <1200, 1200-1400, 1400-1600, 1600-1800, 1800-2000, >2000
# bins: [-inf, 1200, 1400, 1600, 1800, 2000, inf]
elo_bins = [-float('inf'), 1200, 1400, 1600, 1800, 2000, float('inf')]
elo_labels = ['lt1200', '1200_1400', '1400_1600', '1600_1800', '1800_2000', 'gt2000']
df_merged_elo['elo_bin'] = pd.cut(df_merged_elo['group_origin_elo'], bins=elo_bins, labels=elo_labels, right=False)
elo_stats = df_merged_elo.groupby(['steam_id_64', 'elo_bin']).agg({
'rating': 'mean'
}).unstack(fill_value=0) # We only need rating for now
# Rename columns
# elo_stats columns are MultiIndex (rating, bin).
# We want: elo_{bin}_rating
flat_elo_data = {'steam_id_64': elo_stats.index}
for bin_label in elo_labels:
if bin_label in elo_stats['rating'].columns:
flat_elo_data[f"elo_{bin_label}_rating"] = elo_stats['rating'][bin_label].values
df_elo_flat = pd.DataFrame(flat_elo_data)
df = df.merge(df_elo_flat, on='steam_id_64', how='left')
# Final Mappings
df['total_matches'] = df['matches_played']