#!/usr/bin/env python3 """ Handle Telegram voice messages - download and transcribe Usage: telegram_voice_handler.py [--model MODEL] """ import argparse import os import sys import json import urllib.request import tempfile def download_voice_file(bot_token, file_id, output_path): """Download voice file from Telegram""" # Step 1: Get file path from Telegram file_info_url = f"https://api.telegram.org/bot{bot_token}/getFile?file_id={file_id}" try: with urllib.request.urlopen(file_info_url) as response: data = json.loads(response.read().decode()) if not data.get("ok"): print(f"Error getting file info: {data}", file=sys.stderr) sys.exit(1) file_path = data["result"]["file_path"] except Exception as e: print(f"Error fetching file info: {e}", file=sys.stderr) sys.exit(1) # Step 2: Download the actual file download_url = f"https://api.telegram.org/file/bot{bot_token}/{file_path}" try: urllib.request.urlretrieve(download_url, output_path) return output_path except Exception as e: print(f"Error downloading file: {e}", file=sys.stderr) sys.exit(1) def transcribe_with_whisper(audio_path, model_size="base"): """Transcribe using local Faster-Whisper""" from faster_whisper import WhisperModel # Load model (cached after first use) model = WhisperModel(model_size, device="cpu", compute_type="int8") # Transcribe segments, info = model.transcribe(audio_path, beam_size=5) # Collect text full_text = [] for segment in segments: full_text.append(segment.text.strip()) return { "text": " ".join(full_text), "language": info.language, "language_probability": info.language_probability } if __name__ == "__main__": parser = argparse.ArgumentParser(description="Download and transcribe Telegram voice message") parser.add_argument("bot_token", help="Telegram bot token") parser.add_argument("file_id", help="Telegram voice file_id") parser.add_argument("--model", default="base", choices=["tiny", "base", "small", "medium", "large"], help="Whisper model size (default: base)") args = parser.parse_args() # Allow override from environment model = os.environ.get("WHISPER_MODEL", args.model) # Create temp file for download with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp: temp_path = tmp.name try: # Download print(f"Downloading voice file...", file=sys.stderr) download_voice_file(args.bot_token, args.file_id, temp_path) # Transcribe print(f"Transcribing with {model} model...", file=sys.stderr) result = transcribe_with_whisper(temp_path, model) # Output result print(json.dumps(result)) finally: # Cleanup if os.path.exists(temp_path): os.remove(temp_path)