import os import joblib import numpy as np import pandas as pd import requests from typing import Literal, Annotated from pydantic import BaseModel, Field _model_lgb = None _model_lr = None _le = None class CustomerFeatures(BaseModel): gender: Literal["male", "female", "other"] age_group: Literal["18-25", "26-35", "36-45", "46-60", "60+"] region: Literal["north", "south", "east", "west", "central"] product_category: str purchase_channel: Literal["online", "offline"] platform: str response_time_hours: Annotated[float, Field(ge=0)] issue_resolved: bool complaint_registered: bool review_text: Annotated[str, Field(min_length=3)] class RiskOutput(BaseModel): risk: float class ExplanationOutput(BaseModel): factors: list[str] def _ensure_loaded(): global _model_lgb, _model_lr, _le if _model_lgb is None: _model_lgb = joblib.load(os.path.join("artifacts", "lgb_pipeline.joblib")) if _model_lr is None: _model_lr = joblib.load(os.path.join("artifacts", "lr_pipeline.joblib")) if _le is None: _le = joblib.load(os.path.join("artifacts", "label_encoder.joblib")) def _to_dataframe(features) -> pd.DataFrame: if isinstance(features, CustomerFeatures): payload = features.model_dump() elif isinstance(features, dict): payload = features else: raise TypeError("features must be CustomerFeatures or dict") return pd.DataFrame([payload]) def predict_risk(features: CustomerFeatures | dict) -> float: _ensure_loaded() df = _to_dataframe(features) probs = _model_lgb.predict_proba(df)[0] idx_neg = int(_le.transform(["negative"])[0]) return float(probs[idx_neg]) def predict_risk_model(features: CustomerFeatures | dict) -> RiskOutput: return RiskOutput(risk=predict_risk(features)) def explain_features(features: CustomerFeatures | dict) -> list[str]: _ensure_loaded() df = _to_dataframe(features) pre = _model_lr.named_steps["preprocessor"] Xv = pre.transform(df) clf = _model_lr.named_steps["classifier"] idx_neg = int(_le.transform(["negative"])[0]) coefs = clf.coef_[idx_neg] vec = Xv.toarray().ravel() contrib = vec * coefs names = pre.get_feature_names_out() order = np.argsort(-np.abs(contrib))[:8] out = [] for i in order: direction = "increase" if contrib[i] > 0 else "decrease" out.append(f"{names[i]} {direction} negative risk (weight={contrib[i]:.3f})") return out def explain_features_model(features: CustomerFeatures | dict) -> ExplanationOutput: return ExplanationOutput(factors=explain_features(features)) def explain_features_with_llm(features: CustomerFeatures | dict, api_key: str) -> str: """Use LLM to generate natural language explanation for the risk factors""" _ensure_loaded() explanations = explain_features(features) # Map feature names to human-readable descriptions feature_mapping = { 'cat__gender_male': '男性', 'cat__gender_female': '女性', 'cat__gender_other': '其他性别', 'cat__age_group_18-25': '18-25岁年龄段', 'cat__age_group_26-35': '26-35岁年龄段', 'cat__age_group_36-45': '36-45岁年龄段', 'cat__age_group_46-60': '46-60岁年龄段', 'cat__age_group_60+': '60岁以上年龄段', 'cat__region_north': '北部地区', 'cat__region_south': '南部地区', 'cat__region_east': '东部地区', 'cat__region_west': '西部地区', 'cat__region_central': '中部地区', 'cat__purchase_channel_online': '线上购买渠道', 'cat__purchase_channel_offline': '线下购买渠道', 'cat__issue_resolved_True': '问题已解决', 'cat__issue_resolved_False': '问题未解决', 'cat__complaint_registered_True': '已注册投诉', 'cat__complaint_registered_False': '未注册投诉', 'num__response_time_hours': '响应时间(小时)' } # Convert explanations to human-readable format human_explanations = [] for exp in explanations: for feature, desc in feature_mapping.items(): if feature in exp: # Replace feature name with description human_exp = exp.replace(feature, desc) # Make the text more natural human_exp = human_exp.replace('increase negative risk', '增加了负面情绪风险') human_exp = human_exp.replace('decrease negative risk', '降低了负面情绪风险') human_exp = human_exp.replace('weight=', '权重为') human_explanations.append(human_exp) break if not human_explanations: # Fallback if no feature mappings found human_explanations = [exp.replace('increase negative risk', '增加了负面情绪风险').replace('decrease negative risk', '降低了负面情绪风险') for exp in explanations] # Use DeepSeek API to generate natural language explanation prompt = f"请将以下客户负面情绪风险因素分析结果转化为一段自然、流畅的中文解释,用于向客服人员展示:\n\n{chr(10).join(human_explanations)}\n\n要求:\n1. 用简洁的语言说明主要风险因素\n2. 突出影响最大的几个因素\n3. 保持专业但易于理解\n4. 不要使用技术术语\n5. 总长度控制在100-200字之间" try: response = requests.post( "https://api.deepseek.com/v1/chat/completions", headers={ "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" }, json={ "model": "deepseek-chat", "messages": [ {"role": "system", "content": "你是一位专业的客户分析专家,擅长将复杂的数据分析结果转化为通俗易懂的解释。"}, {"role": "user", "content": prompt} ], "max_tokens": 200, "temperature": 0.7 } ) response.raise_for_status() result = response.json() return result["choices"][0]["message"]["content"] except Exception as e: # Fallback to simple concatenation if API call fails return f"客户负面情绪风险分析:{chr(10).join(human_explanations)}"