G05-Customer_Sentiment/src/agent.py

154 lines
6.3 KiB
Python
Raw Normal View History

import os
import joblib
import numpy as np
import pandas as pd
import requests
from typing import Literal, Annotated
from pydantic import BaseModel, Field
_model_lgb = None
_model_lr = None
_le = None
class CustomerFeatures(BaseModel):
gender: Literal["male", "female", "other"]
age_group: Literal["18-25", "26-35", "36-45", "46-60", "60+"]
region: Literal["north", "south", "east", "west", "central"]
product_category: str
purchase_channel: Literal["online", "offline"]
platform: str
response_time_hours: Annotated[float, Field(ge=0)]
issue_resolved: bool
complaint_registered: bool
review_text: Annotated[str, Field(min_length=3)]
class RiskOutput(BaseModel):
risk: float
class ExplanationOutput(BaseModel):
factors: list[str]
def _ensure_loaded():
global _model_lgb, _model_lr, _le
if _model_lgb is None:
_model_lgb = joblib.load(os.path.join("artifacts", "lgb_pipeline.joblib"))
if _model_lr is None:
_model_lr = joblib.load(os.path.join("artifacts", "lr_pipeline.joblib"))
if _le is None:
_le = joblib.load(os.path.join("artifacts", "label_encoder.joblib"))
def _to_dataframe(features) -> pd.DataFrame:
if isinstance(features, CustomerFeatures):
payload = features.model_dump()
elif isinstance(features, dict):
payload = features
else:
raise TypeError("features must be CustomerFeatures or dict")
return pd.DataFrame([payload])
def predict_risk(features: CustomerFeatures | dict) -> float:
_ensure_loaded()
df = _to_dataframe(features)
probs = _model_lgb.predict_proba(df)[0]
idx_neg = int(_le.transform(["negative"])[0])
return float(probs[idx_neg])
def predict_risk_model(features: CustomerFeatures | dict) -> RiskOutput:
return RiskOutput(risk=predict_risk(features))
def explain_features(features: CustomerFeatures | dict) -> list[str]:
_ensure_loaded()
df = _to_dataframe(features)
pre = _model_lr.named_steps["preprocessor"]
Xv = pre.transform(df)
clf = _model_lr.named_steps["classifier"]
idx_neg = int(_le.transform(["negative"])[0])
coefs = clf.coef_[idx_neg]
vec = Xv.toarray().ravel()
contrib = vec * coefs
names = pre.get_feature_names_out()
order = np.argsort(-np.abs(contrib))[:8]
out = []
for i in order:
direction = "increase" if contrib[i] > 0 else "decrease"
out.append(f"{names[i]} {direction} negative risk (weight={contrib[i]:.3f})")
return out
def explain_features_model(features: CustomerFeatures | dict) -> ExplanationOutput:
return ExplanationOutput(factors=explain_features(features))
def explain_features_with_llm(features: CustomerFeatures | dict, api_key: str) -> str:
"""Use LLM to generate natural language explanation for the risk factors"""
_ensure_loaded()
explanations = explain_features(features)
# Map feature names to human-readable descriptions
feature_mapping = {
'cat__gender_male': '男性',
'cat__gender_female': '女性',
'cat__gender_other': '其他性别',
'cat__age_group_18-25': '18-25岁年龄段',
'cat__age_group_26-35': '26-35岁年龄段',
'cat__age_group_36-45': '36-45岁年龄段',
'cat__age_group_46-60': '46-60岁年龄段',
'cat__age_group_60+': '60岁以上年龄段',
'cat__region_north': '北部地区',
'cat__region_south': '南部地区',
'cat__region_east': '东部地区',
'cat__region_west': '西部地区',
'cat__region_central': '中部地区',
'cat__purchase_channel_online': '线上购买渠道',
'cat__purchase_channel_offline': '线下购买渠道',
'cat__issue_resolved_True': '问题已解决',
'cat__issue_resolved_False': '问题未解决',
'cat__complaint_registered_True': '已注册投诉',
'cat__complaint_registered_False': '未注册投诉',
'num__response_time_hours': '响应时间(小时)'
}
# Convert explanations to human-readable format
human_explanations = []
for exp in explanations:
for feature, desc in feature_mapping.items():
if feature in exp:
# Replace feature name with description
human_exp = exp.replace(feature, desc)
# Make the text more natural
human_exp = human_exp.replace('increase negative risk', '增加了负面情绪风险')
human_exp = human_exp.replace('decrease negative risk', '降低了负面情绪风险')
human_exp = human_exp.replace('weight=', '权重为')
human_explanations.append(human_exp)
break
if not human_explanations:
# Fallback if no feature mappings found
human_explanations = [exp.replace('increase negative risk', '增加了负面情绪风险').replace('decrease negative risk', '降低了负面情绪风险') for exp in explanations]
# Use DeepSeek API to generate natural language explanation
prompt = f"请将以下客户负面情绪风险因素分析结果转化为一段自然、流畅的中文解释,用于向客服人员展示:\n\n{chr(10).join(human_explanations)}\n\n要求:\n1. 用简洁的语言说明主要风险因素\n2. 突出影响最大的几个因素\n3. 保持专业但易于理解\n4. 不要使用技术术语\n5. 总长度控制在100-200字之间"
try:
response = requests.post(
"https://api.deepseek.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": "deepseek-chat",
"messages": [
{"role": "system", "content": "你是一位专业的客户分析专家,擅长将复杂的数据分析结果转化为通俗易懂的解释。"},
{"role": "user", "content": prompt}
],
"max_tokens": 200,
"temperature": 0.7
}
)
response.raise_for_status()
result = response.json()
return result["choices"][0]["message"]["content"]
except Exception as e:
# Fallback to simple concatenation if API call fails
return f"客户负面情绪风险分析:{chr(10).join(human_explanations)}"