192 lines
5.8 KiB
Python
192 lines
5.8 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
数据提取与转换器 - 功能测试脚本
|
|||
|
|
用于验证应用的各项功能是否正常工作
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
import tempfile
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
# 添加项目路径到Python路径
|
|||
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|||
|
|
|
|||
|
|
# 导入工具模块
|
|||
|
|
try:
|
|||
|
|
from utils.pdf_extractor import extract_text_from_pdf
|
|||
|
|
from utils.ocr_processor import extract_text_from_image
|
|||
|
|
from utils.format_converter import excel_to_csv, csv_to_excel, json_to_excel
|
|||
|
|
from utils.web_scraper import scrape_webpage
|
|||
|
|
from utils.database_exporter import export_sqlite_to_excel
|
|||
|
|
print("✅ 所有工具模块导入成功")
|
|||
|
|
except ImportError as e:
|
|||
|
|
print(f"❌ 模块导入失败: {e}")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
def test_format_conversion():
|
|||
|
|
"""测试格式转换功能"""
|
|||
|
|
print("\n📊 测试格式转换功能...")
|
|||
|
|
|
|||
|
|
# 测试数据
|
|||
|
|
test_data = [
|
|||
|
|
{"姓名": "张三", "年龄": 20, "城市": "北京"},
|
|||
|
|
{"姓名": "李四", "年龄": 21, "城市": "上海"},
|
|||
|
|
{"姓名": "王五", "年龄": 19, "城市": "广州"}
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 创建临时文件
|
|||
|
|
with tempfile.NamedTemporaryFile(suffix='.csv', delete=False, mode='w', encoding='utf-8') as f:
|
|||
|
|
f.write("姓名,年龄,城市\n")
|
|||
|
|
for item in test_data:
|
|||
|
|
f.write(f"{item['姓名']},{item['年龄']},{item['城市']}\n")
|
|||
|
|
csv_path = f.name
|
|||
|
|
|
|||
|
|
# CSV转Excel
|
|||
|
|
excel_path = csv_path.replace('.csv', '.xlsx')
|
|||
|
|
csv_to_excel(csv_path, excel_path)
|
|||
|
|
|
|||
|
|
if os.path.exists(excel_path):
|
|||
|
|
print("✅ CSV转Excel功能正常")
|
|||
|
|
os.unlink(excel_path)
|
|||
|
|
else:
|
|||
|
|
print("❌ CSV转Excel功能失败")
|
|||
|
|
|
|||
|
|
os.unlink(csv_path)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 格式转换测试失败: {e}")
|
|||
|
|
|
|||
|
|
def test_web_scraping():
|
|||
|
|
"""测试网页抓取功能"""
|
|||
|
|
print("\n🌐 测试网页抓取功能...")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 测试抓取百度首页标题
|
|||
|
|
content = scrape_webpage("https://www.baidu.com")
|
|||
|
|
if content and len(content) > 0:
|
|||
|
|
print("✅ 网页抓取功能正常")
|
|||
|
|
print(f" 抓取内容长度: {len(content)} 字符")
|
|||
|
|
else:
|
|||
|
|
print("❌ 网页抓取功能失败")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 网页抓取测试失败: {e}")
|
|||
|
|
|
|||
|
|
def test_ocr_functionality():
|
|||
|
|
"""测试OCR功能"""
|
|||
|
|
print("\n🖼️ 测试OCR功能...")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 创建一个简单的测试图片(包含文字)
|
|||
|
|
from PIL import Image, ImageDraw, ImageFont
|
|||
|
|
|
|||
|
|
# 创建图片
|
|||
|
|
img = Image.new('RGB', (400, 200), color='white')
|
|||
|
|
d = ImageDraw.Draw(img)
|
|||
|
|
|
|||
|
|
# 尝试使用系统字体
|
|||
|
|
try:
|
|||
|
|
font = ImageFont.truetype("arial.ttf", 24)
|
|||
|
|
except:
|
|||
|
|
try:
|
|||
|
|
font = ImageFont.truetype("Arial.ttf", 24)
|
|||
|
|
except:
|
|||
|
|
font = ImageFont.load_default()
|
|||
|
|
|
|||
|
|
# 添加文字
|
|||
|
|
d.text((50, 80), "测试文字: Hello World 你好世界", fill="black", font=font)
|
|||
|
|
|
|||
|
|
# 保存图片
|
|||
|
|
img_path = os.path.join(tempfile.gettempdir(), "test_ocr.png")
|
|||
|
|
img.save(img_path)
|
|||
|
|
|
|||
|
|
# 测试OCR识别
|
|||
|
|
text = extract_text_from_image(img_path)
|
|||
|
|
|
|||
|
|
if text:
|
|||
|
|
print("✅ OCR功能正常")
|
|||
|
|
print(f" 识别结果: {text}")
|
|||
|
|
else:
|
|||
|
|
print("⚠️ OCR识别无结果(可能是字体问题)")
|
|||
|
|
|
|||
|
|
os.unlink(img_path)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ OCR测试失败: {e}")
|
|||
|
|
|
|||
|
|
def test_database_functionality():
|
|||
|
|
"""测试数据库功能"""
|
|||
|
|
print("\n🗄️ 测试数据库功能...")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
import sqlite3
|
|||
|
|
|
|||
|
|
# 创建测试数据库
|
|||
|
|
db_path = os.path.join(tempfile.gettempdir(), "test.db")
|
|||
|
|
conn = sqlite3.connect(db_path)
|
|||
|
|
cursor = conn.cursor()
|
|||
|
|
|
|||
|
|
# 创建测试表
|
|||
|
|
cursor.execute("""
|
|||
|
|
CREATE TABLE IF NOT EXISTS students (
|
|||
|
|
id INTEGER PRIMARY KEY,
|
|||
|
|
name TEXT NOT NULL,
|
|||
|
|
age INTEGER,
|
|||
|
|
major TEXT
|
|||
|
|
)
|
|||
|
|
""")
|
|||
|
|
|
|||
|
|
# 插入测试数据
|
|||
|
|
test_data = [
|
|||
|
|
(1, "张三", 20, "计算机科学"),
|
|||
|
|
(2, "李四", 21, "数据科学"),
|
|||
|
|
(3, "王五", 19, "人工智能")
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
cursor.executemany("INSERT INTO students VALUES (?, ?, ?, ?)", test_data)
|
|||
|
|
conn.commit()
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
# 测试数据库导出
|
|||
|
|
excel_path = db_path.replace('.db', '.xlsx')
|
|||
|
|
export_sqlite_to_excel(db_path, excel_path)
|
|||
|
|
|
|||
|
|
if os.path.exists(excel_path):
|
|||
|
|
print("✅ 数据库导出功能正常")
|
|||
|
|
os.unlink(excel_path)
|
|||
|
|
else:
|
|||
|
|
print("❌ 数据库导出功能失败")
|
|||
|
|
|
|||
|
|
os.unlink(db_path)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 数据库功能测试失败: {e}")
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
"""主测试函数"""
|
|||
|
|
print("=" * 50)
|
|||
|
|
print("数据提取与转换器 - 功能测试")
|
|||
|
|
print("=" * 50)
|
|||
|
|
|
|||
|
|
# 测试各项功能
|
|||
|
|
test_format_conversion()
|
|||
|
|
test_web_scraping()
|
|||
|
|
test_ocr_functionality()
|
|||
|
|
test_database_functionality()
|
|||
|
|
|
|||
|
|
print("\n" + "=" * 50)
|
|||
|
|
print("测试完成!")
|
|||
|
|
print("=" * 50)
|
|||
|
|
|
|||
|
|
# 显示应用访问信息
|
|||
|
|
print("\n🌐 应用访问信息:")
|
|||
|
|
print("本地访问: http://localhost:8502")
|
|||
|
|
print("网络访问: http://192.168.10.21:8502")
|
|||
|
|
print("\n💡 测试建议:")
|
|||
|
|
print("1. 访问应用界面测试文件上传功能")
|
|||
|
|
print("2. 使用test_cases目录下的测试文件")
|
|||
|
|
print("3. 测试网页抓取功能(输入百度等网站URL)")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|