Initial commit: workspace setup with skills, memory, config

This commit is contained in:
root
2026-02-10 14:37:49 -06:00
commit d1357c5463
77 changed files with 10822 additions and 0 deletions

View File

@@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""
Handle Telegram voice messages - download and transcribe
Usage: telegram_voice_handler.py <bot_token> <file_id> [--model MODEL]
"""
import argparse
import os
import sys
import json
import urllib.request
import tempfile
def download_voice_file(bot_token, file_id, output_path):
"""Download voice file from Telegram"""
# Step 1: Get file path from Telegram
file_info_url = f"https://api.telegram.org/bot{bot_token}/getFile?file_id={file_id}"
try:
with urllib.request.urlopen(file_info_url) as response:
data = json.loads(response.read().decode())
if not data.get("ok"):
print(f"Error getting file info: {data}", file=sys.stderr)
sys.exit(1)
file_path = data["result"]["file_path"]
except Exception as e:
print(f"Error fetching file info: {e}", file=sys.stderr)
sys.exit(1)
# Step 2: Download the actual file
download_url = f"https://api.telegram.org/file/bot{bot_token}/{file_path}"
try:
urllib.request.urlretrieve(download_url, output_path)
return output_path
except Exception as e:
print(f"Error downloading file: {e}", file=sys.stderr)
sys.exit(1)
def transcribe_with_whisper(audio_path, model_size="base"):
"""Transcribe using local Faster-Whisper"""
from faster_whisper import WhisperModel
# Load model (cached after first use)
model = WhisperModel(model_size, device="cpu", compute_type="int8")
# Transcribe
segments, info = model.transcribe(audio_path, beam_size=5)
# Collect text
full_text = []
for segment in segments:
full_text.append(segment.text.strip())
return {
"text": " ".join(full_text),
"language": info.language,
"language_probability": info.language_probability
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Download and transcribe Telegram voice message")
parser.add_argument("bot_token", help="Telegram bot token")
parser.add_argument("file_id", help="Telegram voice file_id")
parser.add_argument("--model", default="base",
choices=["tiny", "base", "small", "medium", "large"],
help="Whisper model size (default: base)")
args = parser.parse_args()
# Allow override from environment
model = os.environ.get("WHISPER_MODEL", args.model)
# Create temp file for download
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp:
temp_path = tmp.name
try:
# Download
print(f"Downloading voice file...", file=sys.stderr)
download_voice_file(args.bot_token, args.file_id, temp_path)
# Transcribe
print(f"Transcribing with {model} model...", file=sys.stderr)
result = transcribe_with_whisper(temp_path, model)
# Output result
print(json.dumps(result))
finally:
# Cleanup
if os.path.exists(temp_path):
os.remove(temp_path)

View File

@@ -0,0 +1,87 @@
#!/usr/bin/env python3
"""
Transcribe audio files using local Faster-Whisper (CPU-only)
Usage: transcribe.py <audio_file> [--model MODEL] [--output-format text|json|srt]
"""
import argparse
import os
import sys
import json
from faster_whisper import WhisperModel
def transcribe(audio_path, model_size="base", output_format="text"):
"""Transcribe audio file to text"""
if not os.path.exists(audio_path):
print(f"Error: File not found: {audio_path}", file=sys.stderr)
sys.exit(1)
# Load model (cached in ~/.cache/huggingface/hub)
print(f"Loading Whisper model: {model_size}", file=sys.stderr)
model = WhisperModel(model_size, device="cpu", compute_type="int8")
# Transcribe
print(f"Transcribing: {audio_path}", file=sys.stderr)
segments, info = model.transcribe(audio_path, beam_size=5)
# Process results
language = info.language
language_prob = info.language_probability
results = []
full_text = []
for segment in segments:
results.append({
"start": segment.start,
"end": segment.end,
"text": segment.text.strip()
})
full_text.append(segment.text.strip())
# Output format
if output_format == "json":
output = {
"language": language,
"language_probability": language_prob,
"segments": results,
"text": " ".join(full_text)
}
print(json.dumps(output, indent=2))
elif output_format == "srt":
for i, segment in enumerate(results, 1):
start = format_timestamp(segment["start"])
end = format_timestamp(segment["end"])
print(f"{i}")
print(f"{start} --> {end}")
print(f"{segment['text']}\n")
else: # text
print(" ".join(full_text))
return " ".join(full_text)
def format_timestamp(seconds):
"""Format seconds to SRT timestamp"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
millis = int((seconds % 1) * 1000)
return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Transcribe audio using Faster-Whisper")
parser.add_argument("audio_file", help="Path to audio file")
parser.add_argument("--model", default="base",
choices=["tiny", "base", "small", "medium", "large"],
help="Whisper model size (default: base)")
parser.add_argument("--output-format", default="text",
choices=["text", "json", "srt"],
help="Output format (default: text)")
args = parser.parse_args()
# Allow override from environment
model = os.environ.get("WHISPER_MODEL", args.model)
transcribe(args.audio_file, model, args.output_format)