import os import gradio as gr from dotenv import load_dotenv from langchain.llms import HuggingFaceEndpoint # Load environment variables load_dotenv() # Initialize the model repo_id = "meta-llama/Llama-2-70b" llm = HuggingFaceEndpoint( endpoint_url=repo_id, temperature=0.7, huggingfacehub_api_token=os.getenv("HF_TOKEN"), timeout=1800 # 30 minutes timeout ) def generate_response(prompt): try: response = llm.invoke(prompt) return response except Exception as e: return f"An error occurred: {str(e)}" # Create Gradio interface iface = gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."), outputs="text", title="Llama-2-70b Model Interface", description="Enter a prompt and get a response from the Llama-2-70b model." ) # Launch the interface if __name__ == "__main__": iface.launch()