myircbot/plugins/ollama.py
2025-05-29 22:58:53 +02:00

97 lines
2.9 KiB
Python

import os
import time
import requests
# --- Configuración ---
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
MODEL_ID = os.getenv("HUGGINGFACE_MODEL", "deepseek/deepseek-v3-0324")
HF_API_URL = "https://router.huggingface.co/novita/v3/openai/chat/completions"
if not HUGGINGFACE_API_KEY:
raise ValueError("Error: La variable de entorno HUGGINGFACE_API_KEY no está definida.")
# --- Throttling ---
LAST_CALL_TIME = 0.0
MIN_INTERVAL_SECONDS = 5.0
# --- Fragmentación de la respuesta ---
MAX_CHUNK_LENGTH = 200
DELAY_BETWEEN_CHUNKS = 1.0
def query_huggingface(prompt):
"""Envía un prompt estilo chat a HuggingFace Router y devuelve la respuesta."""
headers = {
"Authorization": f"Bearer {HUGGINGFACE_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": MODEL_ID,
"messages": [
{
"role": "system",
"content": "You are a helpful assistant that always responds in neutral Latin American Spanish. Ignore usernames or nicknames like 'teraflops'. Focus only on the user's input."
},
{
"role": "user",
"content": prompt
}
],
"temperature": 0.7,
"max_tokens": 300
}
try:
response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=120)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"].strip()
except requests.exceptions.Timeout:
return "⏱️ El servidor de HuggingFace tardó demasiado en responder."
except requests.exceptions.RequestException as e:
return f"❌ Error al conectar con HuggingFace Router: {e}"
def chunk_text(text, max_length):
"""Corta 'text' en fragmentos de longitud máxima 'max_length'."""
return [text[i:i + max_length] for i in range(0, len(text), max_length)]
def run(*args):
"""Consulta el modelo y devuelve la respuesta troceada, aplicando throttling."""
global LAST_CALL_TIME
if not args:
return help()
now = time.time()
elapsed = now - LAST_CALL_TIME
if elapsed < MIN_INTERVAL_SECONDS:
wait_time = round(MIN_INTERVAL_SECONDS - elapsed, 1)
return f"¡Espera {wait_time} seg antes de hacer otra consulta!"
LAST_CALL_TIME = now
prompt = " ".join(args)
try:
full_text = query_huggingface(prompt)
except Exception as e:
return f"Error al consultar HuggingFace: {e}"
result = []
for fragment in chunk_text(full_text, MAX_CHUNK_LENGTH):
result.append(fragment)
time.sleep(DELAY_BETWEEN_CHUNKS)
return "\n".join(result)
def help():
return f""".ollama <pregunta> - Consulta el modelo '{MODEL_ID}' en HuggingFace usando el router.
Ejemplos:
!ollama ¿Qué es la entropía en física?
!ollama Resume el argumento de Don Quijote en 3 líneas.
"""