97 lines
3.1 KiB
Python
Executable File
97 lines
3.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Handle Telegram voice messages - download and transcribe
|
|
Usage: telegram_voice_handler.py <bot_token> <file_id> [--model MODEL]
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
import json
|
|
import urllib.request
|
|
import tempfile
|
|
|
|
def download_voice_file(bot_token, file_id, output_path):
|
|
"""Download voice file from Telegram"""
|
|
|
|
# Step 1: Get file path from Telegram
|
|
file_info_url = f"https://api.telegram.org/bot{bot_token}/getFile?file_id={file_id}"
|
|
|
|
try:
|
|
with urllib.request.urlopen(file_info_url) as response:
|
|
data = json.loads(response.read().decode())
|
|
if not data.get("ok"):
|
|
print(f"Error getting file info: {data}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
file_path = data["result"]["file_path"]
|
|
except Exception as e:
|
|
print(f"Error fetching file info: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Step 2: Download the actual file
|
|
download_url = f"https://api.telegram.org/file/bot{bot_token}/{file_path}"
|
|
|
|
try:
|
|
urllib.request.urlretrieve(download_url, output_path)
|
|
return output_path
|
|
except Exception as e:
|
|
print(f"Error downloading file: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
def transcribe_with_whisper(audio_path, model_size="base"):
|
|
"""Transcribe using local Faster-Whisper"""
|
|
|
|
from faster_whisper import WhisperModel
|
|
|
|
# Load model (cached after first use)
|
|
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
|
|
|
# Transcribe
|
|
segments, info = model.transcribe(audio_path, beam_size=5)
|
|
|
|
# Collect text
|
|
full_text = []
|
|
for segment in segments:
|
|
full_text.append(segment.text.strip())
|
|
|
|
return {
|
|
"text": " ".join(full_text),
|
|
"language": info.language,
|
|
"language_probability": info.language_probability
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Download and transcribe Telegram voice message")
|
|
parser.add_argument("bot_token", help="Telegram bot token")
|
|
parser.add_argument("file_id", help="Telegram voice file_id")
|
|
parser.add_argument("--model", default="base",
|
|
choices=["tiny", "base", "small", "medium", "large"],
|
|
help="Whisper model size (default: base)")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Allow override from environment
|
|
model = os.environ.get("WHISPER_MODEL", args.model)
|
|
|
|
# Create temp file for download
|
|
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp:
|
|
temp_path = tmp.name
|
|
|
|
try:
|
|
# Download
|
|
print(f"Downloading voice file...", file=sys.stderr)
|
|
download_voice_file(args.bot_token, args.file_id, temp_path)
|
|
|
|
# Transcribe
|
|
print(f"Transcribing with {model} model...", file=sys.stderr)
|
|
result = transcribe_with_whisper(temp_path, model)
|
|
|
|
# Output result
|
|
print(json.dumps(result))
|
|
|
|
finally:
|
|
# Cleanup
|
|
if os.path.exists(temp_path):
|
|
os.remove(temp_path)
|