From eda22ba4f08a8176e164854bc3a0209a438eb90a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=88=99=E6=96=87?= <st2311020133@gitea.local>
Date: Thu, 15 Jan 2026 16:51:03 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
 =?UTF-8?q?=20src?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/tweet_agent.py | 345 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 345 insertions(+)
 create mode 100644 src/tweet_agent.py

diff --git a/src/tweet_agent.py b/src/tweet_agent.py
new file mode 100644
index 0000000..14dff62
--- /dev/null
+++ b/src/tweet_agent.py
@@ -0,0 +1,345 @@
+"""推文情感分析 Agent 模块
+
+实现「分类 → 解释 → 生成处置方案」流程，输出结构化结果。
+"""
+
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+import polars as pl
+
+from pydantic import BaseModel, Field
+
+from src.tweet_data import load_cleaned_tweets
+from src.train_tweet_ultimate import load_model as load_ultimate_model
+
+
+class SentimentClassification(BaseModel):
+    """情感分类结果"""
+    sentiment: str = Field(description="情感类别: negative/neutral/positive")
+    confidence: float = Field(description="置信度 (0-1)")
+
+
+class SentimentExplanation(BaseModel):
+    """情感解释"""
+    key_factors: list[str] = Field(description="影响情感判断的关键因素")
+    reasoning: str = Field(description="情感判断的推理过程")
+
+
+class DisposalPlan(BaseModel):
+    """处置方案"""
+    priority: str = Field(description="处理优先级: high/medium/low")
+    action_type: str = Field(description="行动类型: response/investigate/monitor/ignore")
+    suggested_response: Optional[str] = Field(description="建议回复内容（如适用）", default=None)
+    follow_up_actions: list[str] = Field(description="后续行动建议")
+
+
+class TweetAnalysisResult(BaseModel):
+    """推文分析结果（结构化输出）"""
+    tweet_text: str = Field(description="原始推文文本")
+    airline: str = Field(description="航空公司")
+    classification: SentimentClassification = Field(description="情感分类结果")
+    explanation: SentimentExplanation = Field(description="情感解释")
+    disposal_plan: DisposalPlan = Field(description="处置方案")
+
+
+class TweetSentimentAgent:
+    """推文情感分析 Agent
+    
+    实现「分类 → 解释 → 生成处置方案」流程。
+    """
+    
+    def __init__(self, model_path: Optional[Path] = None):
+        """初始化 Agent
+        
+        Args:
+            model_path: 模型路径（可选）
+        """
+        self.model = load_ultimate_model()
+        self.label_encoder = self.model.label_encoder
+        self.tfidf_vectorizer = self.model.tfidf_vectorizer
+        self.airline_encoder = self.model.airline_encoder
+        
+    def classify(self, text: str, airline: str) -> SentimentClassification:
+        """分类：对推文进行情感分类
+        
+        Args:
+            text: 推文文本
+            airline: 航空公司
+            
+        Returns:
+            情感分类结果
+        """
+        # 预测
+        sentiment = self.model.predict(np.array([text]), np.array([airline]))[0]
+        
+        # 预测概率
+        proba = self.model.predict_proba(np.array([text]), np.array([airline]))[0]
+        
+        # 获取预测类别的置信度
+        sentiment_idx = self.label_encoder.transform([sentiment])[0]
+        confidence = float(proba[sentiment_idx])
+        
+        return SentimentClassification(
+            sentiment=sentiment,
+            confidence=confidence,
+        )
+    
+    def explain(self, text: str, airline: str, classification: SentimentClassification) -> SentimentExplanation:
+        """解释：生成情感判断的解释
+        
+        Args:
+            text: 推文文本
+            airline: 航空公司
+            classification: 情感分类结果
+            
+        Returns:
+            情感解释
+        """
+        key_factors = []
+        reasoning_parts = []
+        
+        text_lower = text.lower()
+        
+        # 分析情感关键词
+        negative_words = ["bad", "terrible", "awful", "worst", "hate", "angry", "disappointed", "frustrated", "cancelled", "delayed", "lost", "rude"]
+        positive_words = ["good", "great", "excellent", "best", "love", "happy", "satisfied", "amazing", "wonderful", "thank", "helpful"]
+        neutral_words = ["question", "how", "what", "when", "where", "why", "please", "help", "info", "information"]
+        
+        found_negative = [word for word in negative_words if word in text_lower]
+        found_positive = [word for word in positive_words if word in text_lower]
+        found_neutral = [word for word in neutral_words if word in text_lower]
+        
+        if found_negative:
+            key_factors.append(f"包含负面词汇: {', '.join(found_negative[:3])}")
+            reasoning_parts.append("文本中包含多个负面情感词汇，表达不满情绪")
+        
+        if found_positive:
+            key_factors.append(f"包含正面词汇: {', '.join(found_positive[:3])}")
+            reasoning_parts.append("文本中包含正面情感词汇，表达满意或感谢")
+        
+        if found_neutral:
+            key_factors.append(f"包含中性词汇: {', '.join(found_neutral[:3])}")
+            reasoning_parts.append("文本主要包含询问或请求，情绪相对中性")
+        
+        # 分析文本特征
+        if "!" in text:
+            key_factors.append("包含感叹号")
+            reasoning_parts.append("感叹号的使用表明情绪较为强烈")
+        
+        if "?" in text:
+            key_factors.append("包含问号")
+            reasoning_parts.append("问号的使用表明存在疑问或询问")
+        
+        if "@" in text:
+            key_factors.append("包含@提及")
+            reasoning_parts.append("直接@航空公司表明希望获得关注或回复")
+        
+        # 分析航空公司
+        key_factors.append(f"涉及航空公司: {airline}")
+        
+        # 生成推理过程
+        if not reasoning_parts:
+            reasoning_parts.append("根据文本整体语义和情感特征进行判断")
+        
+        reasoning = "。".join(reasoning_parts) + "。"
+        
+        return SentimentExplanation(
+            key_factors=key_factors,
+            reasoning=reasoning,
+        )
+    
+    def generate_disposal_plan(
+        self,
+        text: str,
+        airline: str,
+        classification: SentimentClassification,
+        explanation: SentimentExplanation,
+    ) -> DisposalPlan:
+        """生成处置方案
+        
+        Args:
+            text: 推文文本
+            airline: 航空公司
+            classification: 情感分类结果
+            explanation: 情感解释
+            
+        Returns:
+            处置方案
+        """
+        sentiment = classification.sentiment
+        confidence = classification.confidence
+        
+        # 根据情感和置信度确定优先级和行动类型
+        if sentiment == "negative":
+            if confidence >= 0.8:
+                priority = "high"
+                action_type = "response"
+                suggested_response = self._generate_negative_response(text, airline)
+                follow_up_actions = [
+                    "记录客户投诉详情",
+                    "转交相关部门处理",
+                    "跟进处理进度",
+                    "在24小时内给予反馈",
+                ]
+            else:
+                priority = "medium"
+                action_type = "investigate"
+                suggested_response = None
+                follow_up_actions = [
+                    "进一步核实情况",
+                    "根据核实结果决定是否需要回复",
+                ]
+        elif sentiment == "positive":
+            if confidence >= 0.8:
+                priority = "low"
+                action_type = "response"
+                suggested_response = self._generate_positive_response(text, airline)
+                follow_up_actions = [
+                    "感谢客户反馈",
+                    "分享正面评价至内部团队",
+                    "考虑在官方渠道展示",
+                ]
+            else:
+                priority = "low"
+                action_type = "monitor"
+                suggested_response = None
+                follow_up_actions = [
+                    "持续关注该用户后续动态",
+                ]
+        else:  # neutral
+            if "?" in text or "help" in text.lower():
+                priority = "medium"
+                action_type = "response"
+                suggested_response = self._generate_neutral_response(text, airline)
+                follow_up_actions = [
+                    "提供准确信息",
+                    "确保客户问题得到解答",
+                ]
+            else:
+                priority = "low"
+                action_type = "monitor"
+                suggested_response = None
+                follow_up_actions = [
+                    "持续关注",
+                ]
+        
+        return DisposalPlan(
+            priority=priority,
+            action_type=action_type,
+            suggested_response=suggested_response,
+            follow_up_actions=follow_up_actions,
+        )
+    
+    def _generate_negative_response(self, text: str, airline: str) -> str:
+        """生成负面情感回复"""
+        responses = [
+            f"感谢您的反馈。我们非常重视您提到的问题，将立即进行调查并尽快给您答复。",
+            f"对于您的不愉快体验，我们深表歉意。请私信我们详细情况，我们将全力为您解决。",
+            f"收到您的反馈，我们对此感到抱歉。相关部门已介入，将尽快处理并给您满意的答复。",
+        ]
+        return responses[hash(text) % len(responses)]
+    
+    def _generate_positive_response(self, text: str, airline: str) -> str:
+        """生成正面情感回复"""
+        responses = [
+            f"感谢您的认可和支持！我们会继续努力为您提供更好的服务。",
+            f"很高兴听到您的正面反馈！您的满意是我们前进的动力。",
+            f"感谢您的分享！我们会将您的反馈传达给团队，激励我们做得更好。",
+        ]
+        return responses[hash(text) % len(responses)]
+    
+    def _generate_neutral_response(self, text: str, airline: str) -> str:
+        """生成中性情感回复"""
+        responses = [
+            f"感谢您的询问。请问您需要了解哪方面的信息？我们将竭诚为您解答。",
+            f"收到您的问题。请提供更多细节，以便我们更好地为您提供帮助。",
+        ]
+        return responses[hash(text) % len(responses)]
+    
+    def analyze(self, text: str, airline: str) -> TweetAnalysisResult:
+        """完整分析流程：分类 → 解释 → 生成处置方案
+        
+        Args:
+            text: 推文文本
+            airline: 航空公司
+            
+        Returns:
+            完整分析结果
+        """
+        # 1. 分类
+        classification = self.classify(text, airline)
+        
+        # 2. 解释
+        explanation = self.explain(text, airline, classification)
+        
+        # 3. 生成处置方案
+        disposal_plan = self.generate_disposal_plan(text, airline, classification, explanation)
+        
+        # 返回结构化结果
+        return TweetAnalysisResult(
+            tweet_text=text,
+            airline=airline,
+            classification=classification,
+            explanation=explanation,
+            disposal_plan=disposal_plan,
+        )
+
+
+def analyze_tweet(text: str, airline: str) -> TweetAnalysisResult:
+    """分析单条推文
+    
+    Args:
+        text: 推文文本
+        airline: 航空公司
+        
+    Returns:
+        分析结果
+    """
+    agent = TweetSentimentAgent()
+    return agent.analyze(text, airline)
+
+
+def analyze_tweets_batch(texts: list[str], airlines: list[str]) -> list[TweetAnalysisResult]:
+    """批量分析推文
+    
+    Args:
+        texts: 推文文本列表
+        airlines: 航空公司列表
+        
+    Returns:
+        分析结果列表
+    """
+    agent = TweetSentimentAgent()
+    results = []
+    
+    for text, airline in zip(texts, airlines):
+        result = agent.analyze(text, airline)
+        results.append(result)
+    
+    return results
+
+
+if __name__ == "__main__":
+    # 示例：分析单条推文
+    print(">>> 示例 1: 负面情感")
+    result = analyze_tweet(
+        text="@United This is the worst airline ever! My flight was delayed for 5 hours and no one helped!",
+        airline="United",
+    )
+    print(result.model_dump_json(indent=2))
+    
+    print("\n>>> 示例 2: 正面情感")
+    result = analyze_tweet(
+        text="@Southwest Thank you for the amazing flight! The crew was so helpful and friendly.",
+        airline="Southwest",
+    )
+    print(result.model_dump_json(indent=2))
+    
+    print("\n>>> 示例 3: 中性情感")
+    result = analyze_tweet(
+        text="@American What is the baggage policy for international flights?",
+        airline="American",
+    )
+    print(result.model_dump_json(indent=2))