import os from docx import Document uploads_folder = 'uploads' print("=== 测试文档读取功能 ===\n") for file in os.listdir(uploads_folder): filepath = os.path.join(uploads_folder, file) print(f"文件: {file}") if file.endswith('.txt'): with open(filepath, 'r', encoding='utf-8') as f: content = f.read() print(f"类型: TXT") print(f"内容长度: {len(content)} 字符") print(f"前100字符: {content[:100]}...\n") elif file.endswith('.pdf'): import pypdf with open(filepath, 'rb') as f: reader = pypdf.PdfReader(f) text = '' for page in reader.pages: text += page.extract_text() + '\n' print(f"类型: PDF") print(f"页数: {len(reader.pages)}") print(f"内容长度: {len(text)} 字符") print(f"前100字符: {text[:100]}...\n") elif file.endswith('.docx'): doc = Document(filepath) text = '' for paragraph in doc.paragraphs: text += paragraph.text + '\n' print(f"类型: DOCX") print(f"段落数: {len(doc.paragraphs)}") print(f"内容长度: {len(text)} 字符") print(f"前100字符: {text[:100]}...\n")