From eda22ba4f08a8176e164854bc3a0209a438eb90a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=88=99=E6=96=87?= Date: Thu, 15 Jan 2026 16:51:03 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20src?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tweet_agent.py | 345 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 345 insertions(+) create mode 100644 src/tweet_agent.py diff --git a/src/tweet_agent.py b/src/tweet_agent.py new file mode 100644 index 0000000..14dff62 --- /dev/null +++ b/src/tweet_agent.py @@ -0,0 +1,345 @@ +"""推文情感分析 Agent 模块 + +实现「分类 → 解释 → 生成处置方案」流程,输出结构化结果。 +""" + +from pathlib import Path +from typing import Optional + +import numpy as np +import polars as pl + +from pydantic import BaseModel, Field + +from src.tweet_data import load_cleaned_tweets +from src.train_tweet_ultimate import load_model as load_ultimate_model + + +class SentimentClassification(BaseModel): + """情感分类结果""" + sentiment: str = Field(description="情感类别: negative/neutral/positive") + confidence: float = Field(description="置信度 (0-1)") + + +class SentimentExplanation(BaseModel): + """情感解释""" + key_factors: list[str] = Field(description="影响情感判断的关键因素") + reasoning: str = Field(description="情感判断的推理过程") + + +class DisposalPlan(BaseModel): + """处置方案""" + priority: str = Field(description="处理优先级: high/medium/low") + action_type: str = Field(description="行动类型: response/investigate/monitor/ignore") + suggested_response: Optional[str] = Field(description="建议回复内容(如适用)", default=None) + follow_up_actions: list[str] = Field(description="后续行动建议") + + +class TweetAnalysisResult(BaseModel): + """推文分析结果(结构化输出)""" + tweet_text: str = Field(description="原始推文文本") + airline: str = Field(description="航空公司") + classification: SentimentClassification = Field(description="情感分类结果") + explanation: SentimentExplanation = Field(description="情感解释") + disposal_plan: DisposalPlan = Field(description="处置方案") + + +class TweetSentimentAgent: + """推文情感分析 Agent + + 实现「分类 → 解释 → 生成处置方案」流程。 + """ + + def __init__(self, model_path: Optional[Path] = None): + """初始化 Agent + + Args: + model_path: 模型路径(可选) + """ + self.model = load_ultimate_model() + self.label_encoder = self.model.label_encoder + self.tfidf_vectorizer = self.model.tfidf_vectorizer + self.airline_encoder = self.model.airline_encoder + + def classify(self, text: str, airline: str) -> SentimentClassification: + """分类:对推文进行情感分类 + + Args: + text: 推文文本 + airline: 航空公司 + + Returns: + 情感分类结果 + """ + # 预测 + sentiment = self.model.predict(np.array([text]), np.array([airline]))[0] + + # 预测概率 + proba = self.model.predict_proba(np.array([text]), np.array([airline]))[0] + + # 获取预测类别的置信度 + sentiment_idx = self.label_encoder.transform([sentiment])[0] + confidence = float(proba[sentiment_idx]) + + return SentimentClassification( + sentiment=sentiment, + confidence=confidence, + ) + + def explain(self, text: str, airline: str, classification: SentimentClassification) -> SentimentExplanation: + """解释:生成情感判断的解释 + + Args: + text: 推文文本 + airline: 航空公司 + classification: 情感分类结果 + + Returns: + 情感解释 + """ + key_factors = [] + reasoning_parts = [] + + text_lower = text.lower() + + # 分析情感关键词 + negative_words = ["bad", "terrible", "awful", "worst", "hate", "angry", "disappointed", "frustrated", "cancelled", "delayed", "lost", "rude"] + positive_words = ["good", "great", "excellent", "best", "love", "happy", "satisfied", "amazing", "wonderful", "thank", "helpful"] + neutral_words = ["question", "how", "what", "when", "where", "why", "please", "help", "info", "information"] + + found_negative = [word for word in negative_words if word in text_lower] + found_positive = [word for word in positive_words if word in text_lower] + found_neutral = [word for word in neutral_words if word in text_lower] + + if found_negative: + key_factors.append(f"包含负面词汇: {', '.join(found_negative[:3])}") + reasoning_parts.append("文本中包含多个负面情感词汇,表达不满情绪") + + if found_positive: + key_factors.append(f"包含正面词汇: {', '.join(found_positive[:3])}") + reasoning_parts.append("文本中包含正面情感词汇,表达满意或感谢") + + if found_neutral: + key_factors.append(f"包含中性词汇: {', '.join(found_neutral[:3])}") + reasoning_parts.append("文本主要包含询问或请求,情绪相对中性") + + # 分析文本特征 + if "!" in text: + key_factors.append("包含感叹号") + reasoning_parts.append("感叹号的使用表明情绪较为强烈") + + if "?" in text: + key_factors.append("包含问号") + reasoning_parts.append("问号的使用表明存在疑问或询问") + + if "@" in text: + key_factors.append("包含@提及") + reasoning_parts.append("直接@航空公司表明希望获得关注或回复") + + # 分析航空公司 + key_factors.append(f"涉及航空公司: {airline}") + + # 生成推理过程 + if not reasoning_parts: + reasoning_parts.append("根据文本整体语义和情感特征进行判断") + + reasoning = "。".join(reasoning_parts) + "。" + + return SentimentExplanation( + key_factors=key_factors, + reasoning=reasoning, + ) + + def generate_disposal_plan( + self, + text: str, + airline: str, + classification: SentimentClassification, + explanation: SentimentExplanation, + ) -> DisposalPlan: + """生成处置方案 + + Args: + text: 推文文本 + airline: 航空公司 + classification: 情感分类结果 + explanation: 情感解释 + + Returns: + 处置方案 + """ + sentiment = classification.sentiment + confidence = classification.confidence + + # 根据情感和置信度确定优先级和行动类型 + if sentiment == "negative": + if confidence >= 0.8: + priority = "high" + action_type = "response" + suggested_response = self._generate_negative_response(text, airline) + follow_up_actions = [ + "记录客户投诉详情", + "转交相关部门处理", + "跟进处理进度", + "在24小时内给予反馈", + ] + else: + priority = "medium" + action_type = "investigate" + suggested_response = None + follow_up_actions = [ + "进一步核实情况", + "根据核实结果决定是否需要回复", + ] + elif sentiment == "positive": + if confidence >= 0.8: + priority = "low" + action_type = "response" + suggested_response = self._generate_positive_response(text, airline) + follow_up_actions = [ + "感谢客户反馈", + "分享正面评价至内部团队", + "考虑在官方渠道展示", + ] + else: + priority = "low" + action_type = "monitor" + suggested_response = None + follow_up_actions = [ + "持续关注该用户后续动态", + ] + else: # neutral + if "?" in text or "help" in text.lower(): + priority = "medium" + action_type = "response" + suggested_response = self._generate_neutral_response(text, airline) + follow_up_actions = [ + "提供准确信息", + "确保客户问题得到解答", + ] + else: + priority = "low" + action_type = "monitor" + suggested_response = None + follow_up_actions = [ + "持续关注", + ] + + return DisposalPlan( + priority=priority, + action_type=action_type, + suggested_response=suggested_response, + follow_up_actions=follow_up_actions, + ) + + def _generate_negative_response(self, text: str, airline: str) -> str: + """生成负面情感回复""" + responses = [ + f"感谢您的反馈。我们非常重视您提到的问题,将立即进行调查并尽快给您答复。", + f"对于您的不愉快体验,我们深表歉意。请私信我们详细情况,我们将全力为您解决。", + f"收到您的反馈,我们对此感到抱歉。相关部门已介入,将尽快处理并给您满意的答复。", + ] + return responses[hash(text) % len(responses)] + + def _generate_positive_response(self, text: str, airline: str) -> str: + """生成正面情感回复""" + responses = [ + f"感谢您的认可和支持!我们会继续努力为您提供更好的服务。", + f"很高兴听到您的正面反馈!您的满意是我们前进的动力。", + f"感谢您的分享!我们会将您的反馈传达给团队,激励我们做得更好。", + ] + return responses[hash(text) % len(responses)] + + def _generate_neutral_response(self, text: str, airline: str) -> str: + """生成中性情感回复""" + responses = [ + f"感谢您的询问。请问您需要了解哪方面的信息?我们将竭诚为您解答。", + f"收到您的问题。请提供更多细节,以便我们更好地为您提供帮助。", + ] + return responses[hash(text) % len(responses)] + + def analyze(self, text: str, airline: str) -> TweetAnalysisResult: + """完整分析流程:分类 → 解释 → 生成处置方案 + + Args: + text: 推文文本 + airline: 航空公司 + + Returns: + 完整分析结果 + """ + # 1. 分类 + classification = self.classify(text, airline) + + # 2. 解释 + explanation = self.explain(text, airline, classification) + + # 3. 生成处置方案 + disposal_plan = self.generate_disposal_plan(text, airline, classification, explanation) + + # 返回结构化结果 + return TweetAnalysisResult( + tweet_text=text, + airline=airline, + classification=classification, + explanation=explanation, + disposal_plan=disposal_plan, + ) + + +def analyze_tweet(text: str, airline: str) -> TweetAnalysisResult: + """分析单条推文 + + Args: + text: 推文文本 + airline: 航空公司 + + Returns: + 分析结果 + """ + agent = TweetSentimentAgent() + return agent.analyze(text, airline) + + +def analyze_tweets_batch(texts: list[str], airlines: list[str]) -> list[TweetAnalysisResult]: + """批量分析推文 + + Args: + texts: 推文文本列表 + airlines: 航空公司列表 + + Returns: + 分析结果列表 + """ + agent = TweetSentimentAgent() + results = [] + + for text, airline in zip(texts, airlines): + result = agent.analyze(text, airline) + results.append(result) + + return results + + +if __name__ == "__main__": + # 示例:分析单条推文 + print(">>> 示例 1: 负面情感") + result = analyze_tweet( + text="@United This is the worst airline ever! My flight was delayed for 5 hours and no one helped!", + airline="United", + ) + print(result.model_dump_json(indent=2)) + + print("\n>>> 示例 2: 正面情感") + result = analyze_tweet( + text="@Southwest Thank you for the amazing flight! The crew was so helpful and friendly.", + airline="Southwest", + ) + print(result.model_dump_json(indent=2)) + + print("\n>>> 示例 3: 中性情感") + result = analyze_tweet( + text="@American What is the baggage policy for international flights?", + airline="American", + ) + print(result.model_dump_json(indent=2))