import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import jieba
from wordcloud import WordCloud
from pyecharts.charts import Map
from pyecharts import options as opts

# ================== 全局配置 ==================
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Referer": "https://www.shanghairanking.cn/",
    "Accept-Language": "zh-CN,zh;q=0.9",
}
TARGET_URL = 'https://www.shanghairanking.cn/rankings/bcur/2026'

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False


# 连接与获取数据
def get_html_text(url):
    try:
        r = requests.get(url, headers=HEADERS, timeout=30) #
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except requests.RequestException as back:
        print(f"爬取失败: {back}")
        return None

#pandas DataFrame 获取数
def fetch_univ_dataframe():
    html = get_html_text(TARGET_URL)
    if html is None:
        return pd.DataFrame()
    soup = BeautifulSoup(html, 'html.parser')
    table = soup.find('table', class_='rk-table')
    if not table: return pd.DataFrame()
    tbody = table.find('tbody')
    if not tbody: return pd.DataFrame()
    rows = tbody.find_all('tr')
    data = []
    for tr in rows:
        tds = tr.find_all('td')
        if len(tds) < 5: continue
        rank = tds[0].get_text(strip=True) #Ture 去除收尾空字符
        name_tag = tds[1].find('a')
        name = name_tag.get_text(strip=True) if name_tag else tds[1].get_text(strip=True)
        province = tds[2].get_text(strip=True)
        category = tds[3].get_text(strip=True)
        score = tds[4].get_text(strip=True)
        data.append([rank, name, province, category, score])
    df = pd.DataFrame(data, columns=["排名", "大学名称", "省市", "类型", "总分"])
    df["总分"] = pd.to_numeric(df["总分"], errors='coerce')
    df["排名"] = pd.to_numeric(df["排名"], errors='coerce')
    df.sort_values("排名", inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df


#保存的CSV
def save_to_csv(df, filename="大学排行.csv", top_n=None):
    if df.empty: return
    (df if top_n is None else df.head(top_n)).to_csv(filename, index=False, encoding='utf-8-sig')
    print(f"数据已保存至 {filename}")


def print_univ_info(df, top_n=30):
    if df.empty: return
    n = min(top_n, len(df))
    print(f"\n前{n}名大学信息：")
    for _, row in df.head(n).iterrows():
        print(
            f"排名：{int(row['排名']):<4} 大学名称：{row['大学名称']:<20} 省市：{row['省市']:<10} 类型：{row['类型']:<8} 总分：{row['总分']:.1f}")


# ================== 3. 保留的图表函数 ==================
def draw_horizontal_bar(df, top_n=20):
    data = df.head(top_n).sort_values('总分', ascending=True)
    fig, ax = plt.subplots(figsize=(12, 8), dpi=300)
    ax.barh(data['大学名称'], data['总分'], color='lightgreen', edgecolor='black')
    ax.set_xlabel('总分');
    ax.set_title(f'大学排行榜前{top_n}名总分（横向条形图）')
    fig.tight_layout();
    plt.show()


def draw_pie_chart(df, top_n=30):
    data = df.head(top_n)
    prov_counts = data["省市"].value_counts()
    total = prov_counts.sum()
    threshold = 0.05 * total
    main = prov_counts[prov_counts >= threshold]
    other_sum = prov_counts[prov_counts < threshold].sum()
    if other_sum > 0: main["其他"] = other_sum
    fig, ax = plt.subplots(figsize=(8, 6), dpi=300)
    ax.pie(main, labels=main.index, autopct='%1.1f%%', pctdistance=0.85)
    ax.axis('equal');
    ax.set_title(f'大学排名前{top_n}名的省份分布')
    plt.show()


def draw_scatter_plot(df, top_n=30):
    data = df.head(top_n)
    fig, ax = plt.subplots(figsize=(12, 8), dpi=300)
    ax.scatter(data['排名'], data['总分'], c='skyblue', edgecolors='black', s=80)
    for _, row in data.iterrows():
        ax.annotate(row['大学名称'], (row['排名'], row['总分']), xytext=(5, 5),
                    textcoords='offset points', fontsize=8)
    ax.set_xlabel('排名');
    ax.set_ylabel('总分')
    ax.set_title('大学排名与总分的关系（散点图）')
    ax.grid(True, linestyle='--', alpha=0.5);
    fig.tight_layout();
    plt.show()


def draw_bar_chart(df, top_n=20):
    data = df.head(top_n)
    fig, ax = plt.subplots(figsize=(14, 6), dpi=300)
    ax.bar(data['大学名称'], data['总分'], color='lightcoral', edgecolor='black')
    ax.set_xlabel('大学名称');
    ax.set_ylabel('总分')
    ax.set_title(f'大学排行榜前{top_n}名总分柱状图')
    plt.xticks(rotation=45, ha='right');
    fig.tight_layout();
    plt.show()


def draw_donut_chart(df, top_n=30):
    data = df.head(top_n)
    prov_counts = data["省市"].value_counts()
    fig, ax = plt.subplots(figsize=(8, 6), dpi=300)
    wedges, texts, autotexts = ax.pie(prov_counts, labels=prov_counts.index,
                                      autopct='%1.1f%%', pctdistance=0.85,
                                      wedgeprops=dict(width=0.3))
    plt.setp(autotexts, size=8)
    ax.set_title(f'大学排行前{top_n}名的省份分布（环形图）')
    ax.axis('equal');
    plt.show()


def draw_stacked_bar(df):
    stacked_data = df.groupby(['省市', '类型']).size().unstack(fill_value=0)
    stacked_data = stacked_data.loc[stacked_data.sum(axis=1).nlargest(10).index]
    fig, ax = plt.subplots(figsize=(14, 6), dpi=300)
    stacked_data.plot(kind='bar', stacked=True, ax=ax, colormap='Set3')
    ax.set_xlabel('省份');
    ax.set_ylabel('大学数量')
    ax.set_title('各省份大学类型分布（堆叠柱状图）')
    ax.legend(title='类型', bbox_to_anchor=(1.05, 1), loc='upper left')
    fig.tight_layout();
    plt.show()


def draw_multi_line(df, top_n=30):
    data = df.head(top_n)
    fig, ax = plt.subplots(figsize=(12, 8), dpi=300)
    ax.plot(data['排名'], data['总分'], marker='o', linewidth=2, label='总分')
    ax.plot(data['排名'], data['排名'], marker='s', linestyle='--', linewidth=2, label='排名')
    ax.set_xlabel('排名');
    ax.set_ylabel('值')
    ax.set_title('大学排名与总分对比（多折线图）')
    ax.legend();
    ax.grid(True, linestyle='--', alpha=0.5);
    fig.tight_layout();
    plt.show()


def draw_nested_pie(df, top_n=30):
    data = df.head(top_n)
    type_counts = data['类型'].value_counts()
    prov_counts = data['省市'].value_counts()
    fig, ax = plt.subplots(figsize=(10, 8), dpi=300)
    ax.pie(prov_counts, labels=prov_counts.index, radius=1.2,
           wedgeprops=dict(width=0.3, edgecolor='w'), autopct='%1.1f%%', pctdistance=0.85)
    ax.pie(type_counts, labels=type_counts.index, radius=0.9,
           wedgeprops=dict(width=0.3, edgecolor='w'), autopct='%1.1f%%', pctdistance=0.75)
    ax.set_title('大学类型与省份分布（嵌套饼图）');
    plt.show()


def draw_time_series(df, top_n=10):
    years = range(2018, 2027)
    np.random.seed(42)
    fig, ax = plt.subplots(figsize=(14, 6), dpi=300)
    for _, univ in df.head(top_n).iterrows():
        base_score = univ['总分']
        scores = base_score + np.random.normal(0, 1, len(years))
        z = np.polyfit(range(len(years)), scores, 1)
        p = np.poly1d(z)
        ax.plot(years, p(range(len(years))), '--', alpha=0.5)
        ax.plot(years, scores, marker='o', label=univ['大学名称'], linewidth=2)
    ax.set_xlabel('年份');
    ax.set_ylabel('总分')
    ax.set_title('Top 10 大学历年总分变化趋势')
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.grid(True, linestyle='--', alpha=0.5);
    fig.tight_layout();
    plt.show()


def draw_geo_chart(df):
    """地理空间图：省份大学数量（已修复映射与空值问题）"""
    prov_counts = df['省市'].value_counts().to_dict()

    # 扩充省份映射，确保全称都能转为 pyecharts 所需的简写
    province_mapping = {
        '北京市': '北京', '天津市': '天津', '上海市': '上海', '重庆市': '重庆',
        '河北省': '河北', '山西省': '山西', '内蒙古自治区': '内蒙古',
        '辽宁省': '辽宁', '吉林省': '吉林', '黑龙江省': '黑龙江',
        '江苏省': '江苏', '浙江省': '浙江', '安徽省': '安徽',
        '福建省': '福建', '江西省': '江西', '山东省': '山东',
        '河南省': '河南', '湖北省': '湖北', '湖南省': '湖南',
        '广东省': '广东', '广西壮族自治区': '广西', '海南省': '海南',
        '四川省': '四川', '贵州省': '贵州', '云南省': '云南',
        '西藏自治区': '西藏', '陕西省': '陕西', '甘肃省': '甘肃',
        '青海省': '青海', '宁夏回族自治区': '宁夏', '新疆维吾尔自治区': '新疆',
        '台湾省': '台湾', '香港特别行政区': '香港', '澳门特别行政区': '澳门'
    }

    data_pair = []
    for prov, count in prov_counts.items():
        mapped = province_mapping.get(prov, prov)  # 未匹配的保留原样
        data_pair.append([mapped, count])

    # 调试：打印一下看看有无异常（可注释掉）
    # print("地图数据对：", data_pair)

    map_chart = (
        Map(init_opts=opts.InitOpts(width="1000px", height="600px"))
        .add("大学数量", data_pair, "china", is_map_symbol_show=False)
        .set_global_opts(
            title_opts=opts.TitleOpts(title="全国大学分布图"),
            visualmap_opts=opts.VisualMapOpts(max_=max(prov_counts.values()), min_=0),
        )
    )
    return map_chart


def draw_wordcloud(df):
    text = ' '.join(df['大学名称'].tolist())
    words = jieba.cut(text)
    text = ' '.join(words)
    wc = WordCloud(
        font_path='C:/Windows/Fonts/simhei.ttf',  # 请根据系统调整
        width=800, height=400, background_color='white',
        max_words=200, collocations=False
    ).generate(text)
    plt.figure(figsize=(12, 6), dpi=300)
    plt.imshow(wc, interpolation='bilinear');
    plt.axis('off')
    plt.title('大学名称词云');
    plt.show()


def draw_animated_bar(df, top_n=20):
    years = list(range(2018, 2027))
    np.random.seed(42)
    simulated_data = {}
    for year in years:
        year_df = df.copy()
        year_df['总分'] = year_df['总分'] * (1 + np.random.uniform(-0.05, 0.05, len(df)))
        year_df = year_df.nlargest(top_n, '总分').sort_values('总分', ascending=True)
        simulated_data[year] = year_df
    fig, ax = plt.subplots(figsize=(12, 8), dpi=100)

    def animate(frame):
        ax.clear()
        year = years[frame]
        data = simulated_data[year]
        bars = ax.barh(data['大学名称'], data['总分'], color='lightcoral')
        ax.set_xlabel('总分');
        ax.set_title(f'大学排名动态变化 - {year}年')
        for bar, score in zip(bars, data['总分']):
            ax.text(score + 0.5, bar.get_y() + bar.get_height() / 2, f'{score:.1f}', va='center')

    ani = FuncAnimation(fig, animate, frames=len(years), interval=1000, repeat=True)
    ani.save('university_ranking_animation.gif', writer='pillow', fps=1)
    plt.show()
    print("动态图已保存为 'university_ranking_animation.gif'")


# ================== 主流程 ==================
def main():
    print("开始爬取软科大学排名数据...")
    df = fetch_univ_dataframe()
    if df.empty:
        print("无法获取数据，程序退出。")
        return
    print(f"成功获取 {len(df)} 条大学数据。")
    save_to_csv(df, filename="大学排行_全部.csv")
    print_univ_info(df, top_n=30)

    # 你选择保留的图表
    draw_horizontal_bar(df, top_n=20)
    draw_pie_chart(df, top_n=30)
    draw_scatter_plot(df, top_n=30)
    draw_bar_chart(df, top_n=20)
    draw_donut_chart(df, top_n=30)
    draw_stacked_bar(df)
    draw_multi_line(df, top_n=30)
    draw_nested_pie(df, top_n=30)
    draw_time_series(df, top_n=10)

    print("生成地理空间图...")
    map_chart = draw_geo_chart(df)
    map_chart.render("大学分布图.html")
    print("地理空间图已保存为 大学分布图.html")

    draw_wordcloud(df)
    draw_animated_bar(df, top_n=20)
    print("所有图表生成完毕！")


if __name__ == '__main__':
    main()
