- 添加MDF文件导出功能 - 集成阿里云OCR大模型识别 - 添加百度智能云AI照片评分 - 集成DeepSeek大模型创意文案生成 - 完善文档和配置管理 - 使用uv进行现代化依赖管理 - 添加完整的.gitignore配置
241 lines
8.7 KiB
Python
241 lines
8.7 KiB
Python
from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for
|
|
import os
|
|
import uuid
|
|
from werkzeug.utils import secure_filename
|
|
from config import Config
|
|
|
|
# 导入工具模块
|
|
from utils.pdf_extractor import extract_text_from_pdf, pdf_to_excel
|
|
from utils.ocr_processor import extract_text_from_image, image_to_excel, image_to_text_file
|
|
from utils.format_converter import (
|
|
excel_to_csv, csv_to_excel, json_to_excel,
|
|
excel_to_json, csv_to_json, json_to_csv
|
|
)
|
|
from utils.web_scraper import scrape_webpage, web_to_excel
|
|
from utils.database_exporter import export_sqlite_to_excel, database_to_csv, database_to_json
|
|
|
|
app = Flask(__name__)
|
|
app.config.from_object(Config)
|
|
|
|
# 确保上传目录存在
|
|
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
|
|
|
def allowed_file(filename):
|
|
"""检查文件类型是否允许"""
|
|
return '.' in filename and \
|
|
filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']
|
|
|
|
@app.route('/')
|
|
def index():
|
|
"""首页"""
|
|
return render_template('index.html')
|
|
|
|
@app.route('/upload', methods=['POST'])
|
|
def upload_file():
|
|
"""文件上传处理"""
|
|
if 'file' not in request.files:
|
|
return jsonify({'error': '没有选择文件'}), 400
|
|
|
|
file = request.files['file']
|
|
if file.filename == '':
|
|
return jsonify({'error': '没有选择文件'}), 400
|
|
|
|
if file and allowed_file(file.filename):
|
|
filename = secure_filename(file.filename)
|
|
filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{uuid.uuid4()}_{filename}")
|
|
file.save(filepath)
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'filename': filename,
|
|
'filepath': filepath,
|
|
'file_type': filename.rsplit('.', 1)[1].lower()
|
|
})
|
|
|
|
return jsonify({'error': '不支持的文件类型'}), 400
|
|
|
|
@app.route('/process/pdf', methods=['POST'])
|
|
def process_pdf():
|
|
"""处理PDF文件"""
|
|
try:
|
|
data = request.json
|
|
filepath = data.get('filepath')
|
|
action = data.get('action', 'extract') # extract, to_excel
|
|
|
|
if not filepath or not os.path.exists(filepath):
|
|
return jsonify({'error': '文件不存在'}), 400
|
|
|
|
if action == 'extract':
|
|
text = extract_text_from_pdf(filepath)
|
|
return jsonify({'success': True, 'text': text})
|
|
|
|
elif action == 'to_excel':
|
|
output_path = filepath.replace('.pdf', '_converted.xlsx')
|
|
pdf_to_excel(filepath, output_path)
|
|
return jsonify({
|
|
'success': True,
|
|
'download_url': f'/download/{os.path.basename(output_path)}'
|
|
})
|
|
|
|
else:
|
|
return jsonify({'error': '不支持的操作'}), 400
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/process/image', methods=['POST'])
|
|
def process_image():
|
|
"""处理图片文件"""
|
|
try:
|
|
data = request.json
|
|
filepath = data.get('filepath')
|
|
action = data.get('action', 'extract') # extract, to_excel, to_text
|
|
|
|
if not filepath or not os.path.exists(filepath):
|
|
return jsonify({'error': '文件不存在'}), 400
|
|
|
|
if action == 'extract':
|
|
text = extract_text_from_image(filepath)
|
|
return jsonify({'success': True, 'text': text})
|
|
|
|
elif action == 'to_excel':
|
|
output_path = filepath.rsplit('.', 1)[0] + '_converted.xlsx'
|
|
image_to_excel(filepath, output_path)
|
|
return jsonify({
|
|
'success': True,
|
|
'download_url': f'/download/{os.path.basename(output_path)}'
|
|
})
|
|
|
|
elif action == 'to_text':
|
|
output_path = filepath.rsplit('.', 1)[0] + '_converted.txt'
|
|
image_to_text_file(filepath, output_path)
|
|
return jsonify({
|
|
'success': True,
|
|
'download_url': f'/download/{os.path.basename(output_path)}'
|
|
})
|
|
|
|
else:
|
|
return jsonify({'error': '不支持的操作'}), 400
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/process/format', methods=['POST'])
|
|
def process_format():
|
|
"""处理格式转换"""
|
|
try:
|
|
data = request.json
|
|
filepath = data.get('filepath')
|
|
target_format = data.get('target_format') # excel, csv, json
|
|
|
|
if not filepath or not os.path.exists(filepath):
|
|
return jsonify({'error': '文件不存在'}), 400
|
|
|
|
file_ext = filepath.rsplit('.', 1)[1].lower()
|
|
|
|
# 根据源格式和目标格式选择转换函数
|
|
if file_ext == 'xlsx' and target_format == 'csv':
|
|
output_path = filepath.replace('.xlsx', '.csv')
|
|
excel_to_csv(filepath, output_path)
|
|
elif file_ext == 'csv' and target_format == 'excel':
|
|
output_path = filepath.replace('.csv', '.xlsx')
|
|
csv_to_excel(filepath, output_path)
|
|
elif file_ext == 'json' and target_format == 'excel':
|
|
output_path = filepath.replace('.json', '.xlsx')
|
|
json_to_excel(filepath, output_path)
|
|
elif file_ext == 'xlsx' and target_format == 'json':
|
|
output_path = filepath.replace('.xlsx', '.json')
|
|
excel_to_json(filepath, output_path)
|
|
elif file_ext == 'csv' and target_format == 'json':
|
|
output_path = filepath.replace('.csv', '.json')
|
|
csv_to_json(filepath, output_path)
|
|
elif file_ext == 'json' and target_format == 'csv':
|
|
output_path = filepath.replace('.json', '.csv')
|
|
json_to_csv(filepath, output_path)
|
|
else:
|
|
return jsonify({'error': '不支持的格式转换'}), 400
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'download_url': f'/download/{os.path.basename(output_path)}'
|
|
})
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/process/web', methods=['POST'])
|
|
def process_web():
|
|
"""处理网页抓取"""
|
|
try:
|
|
data = request.json
|
|
url = data.get('url')
|
|
selector = data.get('selector', '')
|
|
|
|
if not url:
|
|
return jsonify({'error': '请输入URL'}), 400
|
|
|
|
# 抓取网页内容
|
|
content = scrape_webpage(url, selector if selector else None)
|
|
|
|
# 导出为Excel
|
|
output_filename = f"web_content_{uuid.uuid4().hex[:8]}.xlsx"
|
|
output_path = os.path.join(app.config['UPLOAD_FOLDER'], output_filename)
|
|
|
|
web_to_excel(url, output_path, selector)
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'content': content if isinstance(content, str) else '内容已提取',
|
|
'download_url': f'/download/{output_filename}'
|
|
})
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/process/database', methods=['POST'])
|
|
def process_database():
|
|
"""处理数据库导出"""
|
|
try:
|
|
data = request.json
|
|
filepath = data.get('filepath')
|
|
target_format = data.get('target_format', 'excel') # excel, csv, json
|
|
table_name = data.get('table_name', '') # 可选:指定表名
|
|
|
|
if not filepath or not os.path.exists(filepath):
|
|
return jsonify({'error': '文件不存在'}), 400
|
|
|
|
file_ext = filepath.rsplit('.', 1)[1].lower()
|
|
|
|
if file_ext in ['db', 'sqlite']:
|
|
if target_format == 'excel':
|
|
output_path = filepath.replace(f'.{file_ext}', '_exported.xlsx')
|
|
export_sqlite_to_excel(filepath, output_path, table_name)
|
|
elif target_format == 'csv':
|
|
output_path = filepath.replace(f'.{file_ext}', '_exported.csv')
|
|
database_to_csv(filepath, output_path, table_name)
|
|
elif target_format == 'json':
|
|
output_path = filepath.replace(f'.{file_ext}', '_exported.json')
|
|
database_to_json(filepath, output_path, table_name)
|
|
else:
|
|
return jsonify({'error': '不支持的导出格式'}), 400
|
|
else:
|
|
return jsonify({'error': '不支持的数据库格式'}), 400
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'download_url': f'/download/{os.path.basename(output_path)}'
|
|
})
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
@app.route('/download/<filename>')
|
|
def download_file(filename):
|
|
"""文件下载"""
|
|
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
|
if os.path.exists(filepath):
|
|
return send_file(filepath, as_attachment=True)
|
|
return jsonify({'error': '文件不存在'}), 404
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug=True, host='0.0.0.0', port=5000) |