"""竞争情报与差异化定位系统""" import pandas as pd import numpy as np from typing import List, Dict, Tuple, Optional from pydantic import BaseModel, Field import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots from sklearn.preprocessing import StandardScaler from scipy import stats class AirlineComparison(BaseModel): """航空公司对比分析""" 航空公司: str = Field(description="航空公司名称") 问题类型: str = Field(description="问题类型") 投诉数量: int = Field(description="投诉数量") 平均情感强度: float = Field(description="平均情感强度") 满意度得分: float = Field(description="满意度得分(0-100)") 相对表现: str = Field(description="相对表现(优于/劣于/持平)") class CompetitiveAdvantage(BaseModel): """竞争优势分析""" 优势领域: str = Field(description="优势领域") 相对优势度: float = Field(description="相对优势度") 关键指标: List[str] = Field(description="关键指标") 改进建议: str = Field(description="改进建议") class OpportunitySpace(BaseModel): """机会空间发现""" 机会领域: str = Field(description="机会领域") 未满足需求: str = Field(description="未满足需求") 潜在市场规模: float = Field(description="潜在市场规模(0-1)") 竞争对手弱点: List[str] = Field(description="竞争对手弱点") 差异化建议: str = Field(description="差异化建议") class CompetitiveIntelligence: """竞争情报与差异化定位系统""" def __init__(self, data_path: str = "data/Tweets.csv"): self.data_path = data_path self.airlines = ["united", "american", "delta", "southwest", "us airways"] self.problem_types = [ "Bad Flight", "Can't Tell", "Late Flight", "Customer Service Issue", "Flight Booking Problems", "Lost Luggage", "Flight Attendant Complaints", "Cancelled Flight", "Damaged Luggage", "longlines" ] def load_data(self) -> pd.DataFrame: """加载数据""" df = pd.read_csv(self.data_path) return df def analyze_airline_comparison(self, target_airline: str, competitor_airlines: List[str]) -> List[AirlineComparison]: """分析航空公司对比""" df = self.load_data() comparisons = [] for problem in self.problem_types: # 分析目标航空公司在当前问题上的表现 target_data = df[(df['airline'] == target_airline) & (df['negativereason'] == problem)] if len(target_data) > 0: target_complaints = len(target_data) target_sentiment = target_data['airline_sentiment_confidence'].mean() target_score = self._calculate_satisfaction_score(target_sentiment, target_complaints) # 与每个竞争对手对比 for competitor in competitor_airlines: if competitor != target_airline: competitor_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)] if len(competitor_data) > 0: comp_complaints = len(competitor_data) comp_sentiment = competitor_data['airline_sentiment_confidence'].mean() comp_score = self._calculate_satisfaction_score(comp_sentiment, comp_complaints) # 确定相对表现 relative_performance = self._determine_relative_performance(target_score, comp_score) comparisons.append(AirlineComparison( 航空公司=f"{target_airline} vs {competitor}", 问题类型=problem, 投诉数量=target_complaints, 平均情感强度=target_sentiment, 满意度得分=target_score, 相对表现=relative_performance )) return comparisons def identify_competitive_advantages(self, target_airline: str, competitor_airlines: List[str]) -> List[CompetitiveAdvantage]: """识别竞争优势""" df = self.load_data() advantages = [] for problem in self.problem_types: # 计算目标航空公司在当前问题上的表现 target_data = df[(df['airline'] == target_airline) & (df['negativereason'] == problem)] if len(target_data) > 0: target_score = self._calculate_satisfaction_score( target_data['airline_sentiment_confidence'].mean(), len(target_data) ) # 计算竞争对手的平均表现 competitor_scores = [] for competitor in competitor_airlines: if competitor != target_airline: comp_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)] if len(comp_data) > 0: comp_score = self._calculate_satisfaction_score( comp_data['airline_sentiment_confidence'].mean(), len(comp_data) ) competitor_scores.append(comp_score) if competitor_scores: avg_competitor_score = np.mean(competitor_scores) advantage_degree = target_score - avg_competitor_score # 如果优势明显,记录为竞争优势 if advantage_degree > 5: # 优势阈值 advantages.append(CompetitiveAdvantage( 优势领域=problem, 相对优势度=advantage_degree, 关键指标=[f"满意度得分: {target_score:.1f}", f"行业平均: {avg_competitor_score:.1f}"], 改进建议=self._generate_improvement_suggestion(problem, advantage_degree) )) return advantages def discover_opportunity_spaces(self, target_airline: str, competitor_airlines: List[str]) -> List[OpportunitySpace]: """发现机会空间""" df = self.load_data() opportunities = [] # 分析竞争对手的弱点 for competitor in competitor_airlines: if competitor != target_airline: # 找出竞争对手表现最差的问题领域 competitor_problems = [] for problem in self.problem_types: comp_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)] if len(comp_data) > 0: score = self._calculate_satisfaction_score( comp_data['airline_sentiment_confidence'].mean(), len(comp_data) ) competitor_problems.append((problem, score)) # 找出竞争对手最弱的问题领域(得分最低) if competitor_problems: worst_problem = min(competitor_problems, key=lambda x: x[1]) # 检查目标航空公司在相同问题上的表现 target_data = df[(df['airline'] == target_airline) & (df['negativereason'] == worst_problem[0])] if len(target_data) > 0: target_score = self._calculate_satisfaction_score( target_data['airline_sentiment_confidence'].mean(), len(target_data) ) # 如果目标航空公司表现更好,则存在机会空间 if target_score > worst_problem[1]: market_size = self._estimate_market_size(worst_problem[0], df) opportunities.append(OpportunitySpace( 机会领域=worst_problem[0], 未满足需求=f"{competitor}在{worst_problem[0]}问题上表现不佳", 潜在市场规模=market_size, 竞争对手弱点=[f"{competitor}满意度得分: {worst_problem[1]:.1f}"], 差异化建议=self._generate_differentiation_suggestion(worst_problem[0], competitor) )) return opportunities def monitor_competitor_improvements(self, competitor_airlines: List[str]) -> Dict[str, List[Dict]]: """监控竞争对手改进""" df = self.load_data() improvements = {} for competitor in competitor_airlines: competitor_improvements = [] # 分析竞争对手在不同问题上的表现趋势 for problem in self.problem_types: problem_data = df[(df['airline'] == competitor) & (df['negativereason'] == problem)] if len(problem_data) > 10: # 确保有足够的数据 # 简单的时间趋势分析(按推文ID排序,假设ID反映时间顺序) problem_data_sorted = problem_data.sort_values('tweet_id') # 将数据分为前后两半 split_point = len(problem_data_sorted) // 2 early_period = problem_data_sorted.iloc[:split_point] late_period = problem_data_sorted.iloc[split_point:] if len(early_period) > 0 and len(late_period) > 0: early_score = self._calculate_satisfaction_score( early_period['airline_sentiment_confidence'].mean(), len(early_period) ) late_score = self._calculate_satisfaction_score( late_period['airline_sentiment_confidence'].mean(), len(late_period) ) improvement = late_score - early_score if improvement > 2: # 显著改进 competitor_improvements.append({ '问题类型': problem, '改进幅度': improvement, '前期表现': early_score, '后期表现': late_score, '改进措施': self._infer_improvement_measures(problem, improvement) }) improvements[competitor] = competitor_improvements return improvements def _calculate_satisfaction_score(self, sentiment_confidence: float, complaint_count: int) -> float: """计算满意度得分""" if pd.isna(sentiment_confidence): sentiment_confidence = 0.5 # 基于情感置信度和投诉数量计算综合得分 base_score = sentiment_confidence * 100 # 转换为0-100分 # 考虑投诉数量的影响(投诉越多,得分越低) complaint_penalty = min(complaint_count * 0.1, 20) # 最多扣20分 final_score = max(0, base_score - complaint_penalty) return final_score def _determine_relative_performance(self, target_score: float, competitor_score: float) -> str: """确定相对表现""" difference = target_score - competitor_score if difference > 5: return "优于" elif difference < -5: return "劣于" else: return "持平" def _generate_improvement_suggestion(self, problem: str, advantage_degree: float) -> str: """生成改进建议""" suggestions = { "Bad Flight": "继续保持航班质量监控,加强机组人员培训", "Late Flight": "优化航班调度,提高准点率", "Customer Service Issue": "加强客服培训,提升服务响应速度", "Lost Luggage": "改进行李追踪系统,加强行李处理流程" } base_suggestion = suggestions.get(problem, "持续优化相关服务流程") if advantage_degree > 10: return f"{base_suggestion},考虑将这一优势作为品牌差异化点进行宣传" else: return f"{base_suggestion},保持现有优势" def _estimate_market_size(self, problem: str, df: pd.DataFrame) -> float: """估计市场规模""" # 基于问题在所有航空公司中的出现频率估计市场规模 total_complaints = len(df[df['negativereason'] == problem]) total_all_complaints = len(df[df['negativereason'].notna()]) if total_all_complaints > 0: return total_complaints / total_all_complaints else: return 0.1 # 默认值 def _generate_differentiation_suggestion(self, problem: str, competitor: str) -> str: """生成差异化建议""" suggestions = { "Bad Flight": f"针对{competitor}在航班体验上的弱点,推出'舒适飞行保证'计划", "Late Flight": f"利用{competitor}准点率问题,强调自身的准点承诺", "Customer Service Issue": f"针对{competitor}的服务问题,推出'24小时客服响应'服务", "Lost Luggage": f"针对{competitor}行李问题,提供'行李实时追踪'功能" } return suggestions.get(problem, f"针对{competitor}的弱点,推出差异化服务方案") def _infer_improvement_measures(self, problem: str, improvement: float) -> str: """推断改进措施""" measures = { "Bad Flight": "可能改进了航班服务流程或机组培训", "Late Flight": "可能优化了航班调度或地面服务", "Customer Service Issue": "可能加强了客服培训或投诉处理流程", "Lost Luggage": "可能升级了行李处理系统或追踪技术" } base_measure = measures.get(problem, "实施了相关服务改进措施") if improvement > 5: return f"显著{base_measure}" else: return f"轻微{base_measure}" def generate_competitive_insights_report(self, target_airline: str, competitor_airlines: List[str]) -> Dict: """生成竞争洞察报告""" comparisons = self.analyze_airline_comparison(target_airline, competitor_airlines) advantages = self.identify_competitive_advantages(target_airline, competitor_airlines) opportunities = self.discover_opportunity_spaces(target_airline, competitor_airlines) improvements = self.monitor_competitor_improvements(competitor_airlines) return { 'comparisons': comparisons, 'advantages': advantages, 'opportunities': opportunities, 'improvements': improvements } # 创建可视化函数 def create_competitive_analysis_charts(insights_report: Dict) -> Dict: """创建竞争分析图表""" charts = {} # 航空公司对比图表 if insights_report['comparisons']: comparisons_df = pd.DataFrame([c.dict() for c in insights_report['comparisons']]) fig_comparison = px.bar( comparisons_df, x='问题类型', y='满意度得分', color='航空公司', title='航空公司满意度对比', barmode='group' ) charts['comparison_chart'] = fig_comparison # 竞争优势图表 if insights_report['advantages']: advantages_df = pd.DataFrame([a.dict() for a in insights_report['advantages']]) fig_advantages = px.bar( advantages_df, x='优势领域', y='相对优势度', title='竞争优势分析', color='相对优势度', color_continuous_scale='Viridis' ) charts['advantages_chart'] = fig_advantages # 机会空间图表 if insights_report['opportunities']: opportunities_df = pd.DataFrame([o.dict() for o in insights_report['opportunities']]) fig_opportunities = px.scatter( opportunities_df, x='机会领域', y='潜在市场规模', size='潜在市场规模', title='机会空间发现', hover_data=['未满足需求'] ) charts['opportunities_chart'] = fig_opportunities return charts