97 lines
2.9 KiB
Python
97 lines
2.9 KiB
Python
import os
|
|
import time
|
|
import requests
|
|
|
|
# --- Configuración ---
|
|
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
|
|
MODEL_ID = os.getenv("HUGGINGFACE_MODEL", "deepseek/deepseek-v3-0324")
|
|
HF_API_URL = "https://router.huggingface.co/novita/v3/openai/chat/completions"
|
|
|
|
if not HUGGINGFACE_API_KEY:
|
|
raise ValueError("Error: La variable de entorno HUGGINGFACE_API_KEY no está definida.")
|
|
|
|
# --- Throttling ---
|
|
LAST_CALL_TIME = 0.0
|
|
MIN_INTERVAL_SECONDS = 5.0
|
|
|
|
# --- Fragmentación de la respuesta ---
|
|
MAX_CHUNK_LENGTH = 200
|
|
DELAY_BETWEEN_CHUNKS = 1.0
|
|
|
|
|
|
def query_huggingface(prompt):
|
|
"""Envía un prompt estilo chat a HuggingFace Router y devuelve la respuesta."""
|
|
headers = {
|
|
"Authorization": f"Bearer {HUGGINGFACE_API_KEY}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
payload = {
|
|
"model": MODEL_ID,
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant that always responds in neutral Latin American Spanish. Ignore usernames or nicknames like 'teraflops'. Focus only on the user's input."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": prompt
|
|
}
|
|
],
|
|
"temperature": 0.7,
|
|
"max_tokens": 300
|
|
}
|
|
|
|
try:
|
|
response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=120)
|
|
response.raise_for_status()
|
|
return response.json()["choices"][0]["message"]["content"].strip()
|
|
except requests.exceptions.Timeout:
|
|
return "⏱️ El servidor de HuggingFace tardó demasiado en responder."
|
|
except requests.exceptions.RequestException as e:
|
|
return f"❌ Error al conectar con HuggingFace Router: {e}"
|
|
|
|
|
|
def chunk_text(text, max_length):
|
|
"""Corta 'text' en fragmentos de longitud máxima 'max_length'."""
|
|
return [text[i:i + max_length] for i in range(0, len(text), max_length)]
|
|
|
|
|
|
def run(*args):
|
|
"""Consulta el modelo y devuelve la respuesta troceada, aplicando throttling."""
|
|
global LAST_CALL_TIME
|
|
|
|
if not args:
|
|
return help()
|
|
|
|
now = time.time()
|
|
elapsed = now - LAST_CALL_TIME
|
|
if elapsed < MIN_INTERVAL_SECONDS:
|
|
wait_time = round(MIN_INTERVAL_SECONDS - elapsed, 1)
|
|
return f"¡Espera {wait_time} seg antes de hacer otra consulta!"
|
|
|
|
LAST_CALL_TIME = now
|
|
prompt = " ".join(args)
|
|
|
|
try:
|
|
full_text = query_huggingface(prompt)
|
|
except Exception as e:
|
|
return f"Error al consultar HuggingFace: {e}"
|
|
|
|
result = []
|
|
for fragment in chunk_text(full_text, MAX_CHUNK_LENGTH):
|
|
result.append(fragment)
|
|
time.sleep(DELAY_BETWEEN_CHUNKS)
|
|
|
|
return "\n".join(result)
|
|
|
|
|
|
def help():
|
|
return f""".ollama <pregunta> - Consulta el modelo '{MODEL_ID}' en HuggingFace usando el router.
|
|
|
|
Ejemplos:
|
|
!ollama ¿Qué es la entropía en física?
|
|
!ollama Resume el argumento de Don Quijote en 3 líneas.
|
|
"""
|
|
|