1.4.0: Updated Profile
This commit is contained in:
@@ -978,6 +978,163 @@ class FeatureService:
|
||||
df['util_usage_rate'] = df['util_usage_rate_backup'].fillna(0)
|
||||
df.drop(columns=['util_usage_rate_backup'], inplace=True)
|
||||
|
||||
# --- 8. New Feature Dimensions (Party, Rating Dist, ELO) ---
|
||||
# Fetch Base Data for Calculation
|
||||
q_new_feats = f"""
|
||||
SELECT mp.steam_id_64, mp.match_id, mp.match_team_id, mp.team_id,
|
||||
mp.rating, mp.adr, mp.is_win
|
||||
FROM fact_match_players mp
|
||||
WHERE mp.steam_id_64 IN ({placeholders})
|
||||
"""
|
||||
df_base = pd.read_sql_query(q_new_feats, conn, params=valid_ids)
|
||||
|
||||
if not df_base.empty:
|
||||
# 8.1 Party Size Stats
|
||||
# Get party sizes for these matches
|
||||
# We need to query party sizes for ALL matches involved
|
||||
match_ids = df_base['match_id'].unique()
|
||||
if len(match_ids) > 0:
|
||||
match_id_ph = ','.join(['?'] * len(match_ids))
|
||||
q_party_size = f"""
|
||||
SELECT match_id, match_team_id, COUNT(*) as party_size
|
||||
FROM fact_match_players
|
||||
WHERE match_id IN ({match_id_ph}) AND match_team_id > 0
|
||||
GROUP BY match_id, match_team_id
|
||||
"""
|
||||
# Split match_ids into chunks if too many
|
||||
chunk_size = 900
|
||||
party_sizes_list = []
|
||||
for i in range(0, len(match_ids), chunk_size):
|
||||
chunk = match_ids[i:i+chunk_size]
|
||||
chunk_ph = ','.join(['?'] * len(chunk))
|
||||
q_chunk = q_party_size.replace(match_id_ph, chunk_ph)
|
||||
party_sizes_list.append(pd.read_sql_query(q_chunk, conn, params=list(chunk)))
|
||||
|
||||
if party_sizes_list:
|
||||
df_party_sizes = pd.concat(party_sizes_list)
|
||||
|
||||
# Merge party size to base data
|
||||
df_base_party = df_base.merge(df_party_sizes, on=['match_id', 'match_team_id'], how='left')
|
||||
|
||||
# Calculate Stats per Party Size (1-5)
|
||||
# We want columns like party_1_win_rate, party_1_rating, party_1_adr
|
||||
party_stats = df_base_party.groupby(['steam_id_64', 'party_size']).agg({
|
||||
'is_win': 'mean',
|
||||
'rating': 'mean',
|
||||
'adr': 'mean'
|
||||
}).reset_index()
|
||||
|
||||
# Pivot
|
||||
pivoted_party = party_stats.pivot(index='steam_id_64', columns='party_size').reset_index()
|
||||
|
||||
# Flatten and rename
|
||||
new_party_cols = ['steam_id_64']
|
||||
for col in pivoted_party.columns:
|
||||
if col[0] == 'steam_id_64': continue
|
||||
metric, size = col
|
||||
if size in [1, 2, 3, 4, 5]:
|
||||
# metric is is_win, rating, adr
|
||||
metric_name = 'win_rate' if metric == 'is_win' else metric
|
||||
new_party_cols.append(f"party_{int(size)}_{metric_name}")
|
||||
|
||||
# Handle MultiIndex column flattening properly
|
||||
# The pivot creates MultiIndex. We need to construct a flat DataFrame.
|
||||
flat_data = {'steam_id_64': pivoted_party['steam_id_64']}
|
||||
for size in [1, 2, 3, 4, 5]:
|
||||
if size in pivoted_party['is_win'].columns:
|
||||
flat_data[f"party_{size}_win_rate"] = pivoted_party['is_win'][size]
|
||||
if size in pivoted_party['rating'].columns:
|
||||
flat_data[f"party_{size}_rating"] = pivoted_party['rating'][size]
|
||||
if size in pivoted_party['adr'].columns:
|
||||
flat_data[f"party_{size}_adr"] = pivoted_party['adr'][size]
|
||||
|
||||
df_party_flat = pd.DataFrame(flat_data)
|
||||
df = df.merge(df_party_flat, on='steam_id_64', how='left')
|
||||
|
||||
# 8.2 Rating Distribution
|
||||
# rating_dist_carry_rate (>1.5), normal (1.0-1.5), sacrifice (0.6-1.0), sleeping (<0.6)
|
||||
df_base['rating_tier'] = pd.cut(df_base['rating'],
|
||||
bins=[-1, 0.6, 1.0, 1.5, 100],
|
||||
labels=['sleeping', 'sacrifice', 'normal', 'carry'],
|
||||
right=False) # <0.6, 0.6-<1.0, 1.0-<1.5, >=1.5 (wait, cut behavior)
|
||||
# Standard cut: right=True by default (a, b]. We want:
|
||||
# < 0.6
|
||||
# 0.6 <= x < 1.0
|
||||
# 1.0 <= x < 1.5
|
||||
# >= 1.5
|
||||
# So bins=[-inf, 0.6, 1.0, 1.5, inf], right=False -> [a, b)
|
||||
df_base['rating_tier'] = pd.cut(df_base['rating'],
|
||||
bins=[-float('inf'), 0.6, 1.0, 1.5, float('inf')],
|
||||
labels=['sleeping', 'sacrifice', 'normal', 'carry'],
|
||||
right=False)
|
||||
|
||||
# Wait, 1.5 should be Normal or Carry?
|
||||
# User: >1.5 Carry, 1.0~1.5 Normal. So 1.5 is Normal? Or Carry?
|
||||
# Usually inclusive on lower bound.
|
||||
# 1.5 -> Carry (>1.5 usually means >= 1.5 or strictly >).
|
||||
# "1.0~1.5 正常" implies [1.0, 1.5]. ">1.5 Carry" implies (1.5, inf).
|
||||
# Let's assume >= 1.5 is Carry.
|
||||
# So bins: (-inf, 0.6), [0.6, 1.0), [1.0, 1.5), [1.5, inf)
|
||||
# right=False gives [a, b).
|
||||
# So [1.5, inf) is correct for Carry.
|
||||
|
||||
dist_stats = df_base.groupby(['steam_id_64', 'rating_tier']).size().unstack(fill_value=0)
|
||||
# Calculate rates
|
||||
dist_stats = dist_stats.div(dist_stats.sum(axis=1), axis=0)
|
||||
dist_stats.columns = [f"rating_dist_{c}_rate" for c in dist_stats.columns]
|
||||
dist_stats = dist_stats.reset_index()
|
||||
|
||||
df = df.merge(dist_stats, on='steam_id_64', how='left')
|
||||
|
||||
# 8.3 ELO Stratification
|
||||
# Fetch Match Teams ELO
|
||||
if len(match_ids) > 0:
|
||||
q_elo = f"""
|
||||
SELECT match_id, group_id, group_origin_elo
|
||||
FROM fact_match_teams
|
||||
WHERE match_id IN ({match_id_ph})
|
||||
"""
|
||||
# Use chunking again
|
||||
elo_list = []
|
||||
for i in range(0, len(match_ids), chunk_size):
|
||||
chunk = match_ids[i:i+chunk_size]
|
||||
chunk_ph = ','.join(['?'] * len(chunk))
|
||||
q_chunk = q_elo.replace(match_id_ph, chunk_ph)
|
||||
elo_list.append(pd.read_sql_query(q_chunk, conn, params=list(chunk)))
|
||||
|
||||
if elo_list:
|
||||
df_elo_teams = pd.concat(elo_list)
|
||||
|
||||
# Merge to get Opponent ELO
|
||||
# Player has match_id, team_id.
|
||||
# Join on match_id.
|
||||
# Filter where group_id != team_id
|
||||
df_merged_elo = df_base.merge(df_elo_teams, on='match_id', how='left')
|
||||
df_merged_elo = df_merged_elo[df_merged_elo['group_id'] != df_merged_elo['team_id']]
|
||||
|
||||
# Now df_merged_elo has 'group_origin_elo' which is Opponent ELO
|
||||
# Binning: <1200, 1200-1400, 1400-1600, 1600-1800, 1800-2000, >2000
|
||||
# bins: [-inf, 1200, 1400, 1600, 1800, 2000, inf]
|
||||
elo_bins = [-float('inf'), 1200, 1400, 1600, 1800, 2000, float('inf')]
|
||||
elo_labels = ['lt1200', '1200_1400', '1400_1600', '1600_1800', '1800_2000', 'gt2000']
|
||||
|
||||
df_merged_elo['elo_bin'] = pd.cut(df_merged_elo['group_origin_elo'], bins=elo_bins, labels=elo_labels, right=False)
|
||||
|
||||
elo_stats = df_merged_elo.groupby(['steam_id_64', 'elo_bin']).agg({
|
||||
'rating': 'mean'
|
||||
}).unstack(fill_value=0) # We only need rating for now
|
||||
|
||||
# Rename columns
|
||||
# elo_stats columns are MultiIndex (rating, bin).
|
||||
# We want: elo_{bin}_rating
|
||||
flat_elo_data = {'steam_id_64': elo_stats.index}
|
||||
for bin_label in elo_labels:
|
||||
if bin_label in elo_stats['rating'].columns:
|
||||
flat_elo_data[f"elo_{bin_label}_rating"] = elo_stats['rating'][bin_label].values
|
||||
|
||||
df_elo_flat = pd.DataFrame(flat_elo_data)
|
||||
df = df.merge(df_elo_flat, on='steam_id_64', how='left')
|
||||
|
||||
# Final Mappings
|
||||
df['total_matches'] = df['matches_played']
|
||||
|
||||
|
||||
Reference in New Issue
Block a user